From 9a671b6536d419da8ea6c57e647da4312ec072b1 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 6 May 2017 09:32:33 +0100 Subject: Initial commit --- README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..5834629ef --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# jwtf +JSON Web Token Functions -- cgit v1.2.1 From 2c3f9685f0f04b7dc1e1ae6242fa62eca010c9c6 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 6 May 2017 09:36:34 +0100 Subject: Initial commit Test does not pass yet. --- .gitignore | 4 ++ README.md | 8 +++ src/jwtf.app.src | 30 ++++++++++ src/jwtf.erl | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+) create mode 100644 .gitignore create mode 100644 src/jwtf.app.src create mode 100644 src/jwtf.erl diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..5eadeac89 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*~ +_build/ +doc/ +rebar.lock diff --git a/README.md b/README.md index 5834629ef..84e196ad8 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,10 @@ # jwtf + JSON Web Token Functions + +This library provides JWT parsing and validation functions + +Supports; + +# Verify +# RS256 diff --git a/src/jwtf.app.src b/src/jwtf.app.src new file mode 100644 index 000000000..1eec6ef4d --- /dev/null +++ b/src/jwtf.app.src @@ -0,0 +1,30 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, jwtf, [ + {description, "JSON Web Token Functions"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + b64url, + config, + crypto, + jiffy + ]}, + {env,[]}, + {modules, []}, + {maintainers, []}, + {licenses, []}, + {links, []} +]}. diff --git a/src/jwtf.erl b/src/jwtf.erl new file mode 100644 index 000000000..be930aea8 --- /dev/null +++ b/src/jwtf.erl @@ -0,0 +1,179 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf). + +-export([decode/1]). + +-spec decode(EncodedToken :: binary()) -> + {ok, DecodedToken :: term()} | {error, Reason :: term()}. +decode(EncodedToken) -> + try + [Header, Payload, Signature] = split(EncodedToken), + validate(Header, Payload, Signature), + {ok, decode_json(Payload)} + catch + throw:Error -> + Error + end. + + +validate(Header0, Payload0, Signature) -> + Header1 = props(decode_json(Header0)), + validate_header(Header1), + + Payload1 = props(decode_json(Payload0)), + validate_payload(Payload1), + + PublicKey = public_key(Payload1), + rs256_verify(Header0, Payload0, Signature, PublicKey). + + +validate_header(Props) -> + case proplists:get_value(<<"typ">>, Props) of + <<"JWT">> -> + ok; + _ -> + throw({error, invalid_type}) + end, + case proplists:get_value(<<"alg">>, Props) of + <<"RS256">> -> + ok; + _ -> + throw({error, invalid_alg}) + end. + + +validate_payload(Props) -> + validate_iss(Props), + validate_iat(Props), + validate_exp(Props). + + +validate_iss(Props) -> + ExpectedISS = list_to_binary(config:get("iam", "iss")), + case proplists:get_value(<<"iss">>, Props) of + undefined -> + throw({error, missing_iss}); + ExpectedISS -> + ok; + _ -> + throw({error, invalid_iss}) + end. + + +validate_iat(Props) -> + case proplists:get_value(<<"iat">>, Props) of + undefined -> + throw({error, missing_iat}); + IAT -> + assert_past(iat, IAT) + end. + + +validate_exp(Props) -> + case proplists:get_value(<<"exp">>, Props) of + undefined -> + throw({error, missing_exp}); + EXP -> + assert_future(exp, EXP) + end. + + +public_key(Props) -> + KID = case proplists:get_value(<<"kid">>, Props) of + undefined -> + throw({error, missing_kid}); + List -> + binary_to_list(List) + end, + case config:get("iam_rsa_public_keys", KID) of + undefined -> + throw({error, public_key_not_found}); + ExpMod -> + [Exp, Mod] = re:split(ExpMod, ",", [{return, binary}]), + [ + crypto:bytes_to_integer(base64:decode(Exp)), + crypto:bytes_to_integer(base64:decode(Mod)) + ] + end. + + +rs256_verify(Header, Payload, Signature, PublicKey) -> + Message = <
>, + case crypto:verify(rsa, sha256, Message, Signature, PublicKey) of + true -> + ok; + false -> + throw({error, bad_signature}) + end. + + +split(EncodedToken) -> + case binary:split(EncodedToken, <<$.>>, [global]) of + [_, _, _] = Split -> Split; + _ -> throw({error, malformed_token}) + end. + + +decode_json(Encoded) -> + case b64url:decode(Encoded) of + {error, Reason} -> + throw({error, Reason}); + Decoded -> + jiffy:decode(Decoded) + end. + +props({Props}) -> + Props; + +props(_) -> + throw({error, not_object}). + + +assert_past(Name, Time) -> + case Time < now_seconds() of + true -> + ok; + false -> + throw({error, {Name, not_in_past}}) + end. + +assert_future(Name, Time) -> + case Time > now_seconds() of + true -> + ok; + false -> + throw({error, {Name, not_in_future}}) + end. + + +now_seconds() -> + {MegaSecs, Secs, _MicroSecs} = os:timestamp(), + MegaSecs * 1000000 + Secs. + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +decode_test() -> + ok = application:start(config), + + EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.bi87-lkEeOblTb_5ZEh6FkmOSg3mC_kqu2xcYJpJb3So29agyJkkidu3NF8R20x-Xi1wD6E8ACgfODsbdu5dbNRc-HUaFUnvyBr-M94PXhSOvLduoXT2mg1tgD1s_n0QgmH0pP-aAINgotDiUBuQ-pMD5hDIX2EYqAjwRcnVrno">>, + + PublicKey = "AQAB,3ZWrUY0Y6IKN1qI4BhxR2C7oHVFgGPYkd38uGq1jQNSqEvJFcN93CYm16/G78FAFKWqwsJb3Wx+nbxDn6LtP4AhULB1H0K0g7/jLklDAHvI8yhOKlvoyvsUFPWtNxlJyh5JJXvkNKV/4Oo12e69f8QCuQ6NpEPl+cSvXIqUYBCs=", + + config:set("iam", "iss", "https://foo.com"), + config:set("iam_rsa_public_keys", "bar", PublicKey), + + ?assertEqual(nope, decode(EncodedToken)). + +-endif. -- cgit v1.2.1 From f2e1085805ef81a649233965c378eed12faad653 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 6 May 2017 09:52:13 +0100 Subject: validate nbf --- src/jwtf.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/jwtf.erl b/src/jwtf.erl index be930aea8..566dd0e92 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -56,6 +56,7 @@ validate_header(Props) -> validate_payload(Props) -> validate_iss(Props), validate_iat(Props), + validate_nbf(Props), validate_exp(Props). @@ -80,6 +81,15 @@ validate_iat(Props) -> end. +validate_nbf(Props) -> + case proplists:get_value(<<"nbf">>, Props) of + undefined -> + throw({error, missing_nbf}); + IAT -> + assert_past(iat, IAT) + end. + + validate_exp(Props) -> case proplists:get_value(<<"exp">>, Props) of undefined -> -- cgit v1.2.1 From 3888d182a474fcc65d749a950e5f8f38648073dd Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 6 May 2017 12:03:07 +0100 Subject: Moar Functional * remove dependency on config * make checks optional * support HS256 --- src/jwtf.app.src | 1 - src/jwtf.erl | 169 ++++++++++++++++++++++++++++++++++++------------------- 2 files changed, 112 insertions(+), 58 deletions(-) diff --git a/src/jwtf.app.src b/src/jwtf.app.src index 1eec6ef4d..d210f4c43 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -18,7 +18,6 @@ kernel, stdlib, b64url, - config, crypto, jiffy ]}, diff --git a/src/jwtf.erl b/src/jwtf.erl index 566dd0e92..61f141d82 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -12,14 +12,12 @@ -module(jwtf). --export([decode/1]). +-export([decode/3]). --spec decode(EncodedToken :: binary()) -> - {ok, DecodedToken :: term()} | {error, Reason :: term()}. -decode(EncodedToken) -> +decode(EncodedToken, Checks, KS) -> try [Header, Payload, Signature] = split(EncodedToken), - validate(Header, Payload, Signature), + validate(Header, Payload, Signature, Checks, KS), {ok, decode_json(Payload)} catch throw:Error -> @@ -27,99 +25,125 @@ decode(EncodedToken) -> end. -validate(Header0, Payload0, Signature) -> +validate(Header0, Payload0, Signature, Checks, KS) -> Header1 = props(decode_json(Header0)), validate_header(Header1), Payload1 = props(decode_json(Payload0)), - validate_payload(Payload1), + validate_payload(Payload1, Checks), - PublicKey = public_key(Payload1), - rs256_verify(Header0, Payload0, Signature, PublicKey). + Alg = prop(<<"alg">>, Header1), + Key = key(Payload1, Checks, KS), + verify(Alg, Header0, Payload0, Signature, Key). validate_header(Props) -> - case proplists:get_value(<<"typ">>, Props) of + case prop(<<"typ">>, Props) of <<"JWT">> -> ok; _ -> throw({error, invalid_type}) end, - case proplists:get_value(<<"alg">>, Props) of + case prop(<<"alg">>, Props) of <<"RS256">> -> ok; + <<"HS256">> -> + ok; _ -> throw({error, invalid_alg}) end. -validate_payload(Props) -> - validate_iss(Props), - validate_iat(Props), - validate_nbf(Props), - validate_exp(Props). +%% Not all these fields have to be present, but if they _are_ present +%% they must be valid. +validate_payload(Props, Checks) -> + validate_iss(Props, Checks), + validate_iat(Props, Checks), + validate_nbf(Props, Checks), + validate_exp(Props, Checks). + +validate_iss(Props, Checks) -> + ExpectedISS = prop(iss, Checks), + ActualISS = prop(<<"iss">>, Props), -validate_iss(Props) -> - ExpectedISS = list_to_binary(config:get("iam", "iss")), - case proplists:get_value(<<"iss">>, Props) of - undefined -> + case {ExpectedISS, ActualISS} of + {ISS, undefined} when ISS /= undefined -> throw({error, missing_iss}); - ExpectedISS -> + {ISS, ISS} -> ok; - _ -> + {_, _} -> throw({error, invalid_iss}) end. -validate_iat(Props) -> - case proplists:get_value(<<"iat">>, Props) of - undefined -> +validate_iat(Props, Checks) -> + Required = prop(iat, Checks), + IAT = prop(<<"iat">>, Props), + + case {Required, IAT} of + {undefined, undefined} -> + ok; + {true, undefined} -> throw({error, missing_iat}); - IAT -> + {true, IAT} -> assert_past(iat, IAT) end. -validate_nbf(Props) -> - case proplists:get_value(<<"nbf">>, Props) of - undefined -> +validate_nbf(Props, Checks) -> + Required = prop(nbf, Checks), + NBF = prop(<<"nbf">>, Props), + + case {Required, NBF} of + {undefined, undefined} -> + ok; + {true, undefined} -> throw({error, missing_nbf}); - IAT -> + {true, IAT} -> assert_past(iat, IAT) end. -validate_exp(Props) -> - case proplists:get_value(<<"exp">>, Props) of - undefined -> +validate_exp(Props, Checks) -> + Required = prop(exp, Checks), + EXP = prop(<<"exp">>, Props), + + case {Required, EXP} of + {undefined, undefined} -> + ok; + {true, undefined} -> throw({error, missing_exp}); - EXP -> + {true, EXP} -> assert_future(exp, EXP) end. -public_key(Props) -> - KID = case proplists:get_value(<<"kid">>, Props) of - undefined -> +key(Props, Checks, KS) -> + Required = prop(kid, Checks), + KID = prop(<<"kid">>, Props), + case {Required, KID} of + {undefined, undefined} -> + KS(undefined); + {true, undefined} -> throw({error, missing_kid}); - List -> - binary_to_list(List) - end, - case config:get("iam_rsa_public_keys", KID) of - undefined -> - throw({error, public_key_not_found}); - ExpMod -> - [Exp, Mod] = re:split(ExpMod, ",", [{return, binary}]), - [ - crypto:bytes_to_integer(base64:decode(Exp)), - crypto:bytes_to_integer(base64:decode(Mod)) - ] + {true, KID} -> + KS(KID) end. -rs256_verify(Header, Payload, Signature, PublicKey) -> +verify(Alg, Header, Payload, Signature0, Key) -> Message = <
>, + Signature1 = b64url:decode(Signature0), + case Alg of + <<"RS256">> -> + rs256_verify(Message, Signature1, Key); + <<"HS256">> -> + hs256_verify(Message, Signature1, Key) + end. + + +rs256_verify(Message, Signature, PublicKey) -> case crypto:verify(rsa, sha256, Message, Signature, PublicKey) of true -> ok; @@ -128,6 +152,15 @@ rs256_verify(Header, Payload, Signature, PublicKey) -> end. +hs256_verify(Message, HMAC, SecretKey) -> + case crypto:hmac(sha256, SecretKey, Message) of + HMAC -> + ok; + E -> + throw({error, bad_hmac}) + end. + + split(EncodedToken) -> case binary:split(EncodedToken, <<$.>>, [global]) of [_, _, _] = Split -> Split; @@ -171,19 +204,41 @@ now_seconds() -> {MegaSecs, Secs, _MicroSecs} = os:timestamp(), MegaSecs * 1000000 + Secs. + +prop(Prop, Props) -> + proplists:get_value(Prop, Props). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -decode_test() -> - ok = application:start(config), +hs256_test() -> + EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwc" + "zovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDA" + "sImtpZCI6ImJhciJ9.lpOvEnYLdcujwo9RbhzXme6J-eQ1yfl782qq" + "crR6QYE">>, + KS = fun(_) -> <<"secret">> end, + Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid], + ?assertMatch({ok, _}, decode(EncodedToken, Checks, KS)). - EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.bi87-lkEeOblTb_5ZEh6FkmOSg3mC_kqu2xcYJpJb3So29agyJkkidu3NF8R20x-Xi1wD6E8ACgfODsbdu5dbNRc-HUaFUnvyBr-M94PXhSOvLduoXT2mg1tgD1s_n0QgmH0pP-aAINgotDiUBuQ-pMD5hDIX2EYqAjwRcnVrno">>, +rs256_test() -> + EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwc" + "zovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDA" + "sImtpZCI6ImJhciJ9.bi87-lkEeOblTb_5ZEh6FkmOSg3mC_kqu2xc" + "YJpJb3So29agyJkkidu3NF8R20x-Xi1wD6E8ACgfODsbdu5dbNRc-H" + "UaFUnvyBr-M94PXhSOvLduoXT2mg1tgD1s_n0QgmH0pP-aAINgotDi" + "UBuQ-pMD5hDIX2EYqAjwRcnVrno">>, - PublicKey = "AQAB,3ZWrUY0Y6IKN1qI4BhxR2C7oHVFgGPYkd38uGq1jQNSqEvJFcN93CYm16/G78FAFKWqwsJb3Wx+nbxDn6LtP4AhULB1H0K0g7/jLklDAHvI8yhOKlvoyvsUFPWtNxlJyh5JJXvkNKV/4Oo12e69f8QCuQ6NpEPl+cSvXIqUYBCs=", + PublicKey = <<"AQAB,3ZWrUY0Y6IKN1qI4BhxR2C7oHVFgGPYkd38uGq1jQNSqEvJFcN93CY" + "m16/G78FAFKWqwsJb3Wx+nbxDn6LtP4AhULB1H0K0g7/jLklDAHvI8yhOKl" + "voyvsUFPWtNxlJyh5JJXvkNKV/4Oo12e69f8QCuQ6NpEPl+cSvXIqUYBCs=">>, - config:set("iam", "iss", "https://foo.com"), - config:set("iam_rsa_public_keys", "bar", PublicKey), + Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid], + KS = fun(<<"bar">>) -> PublicKey end, - ?assertEqual(nope, decode(EncodedToken)). + ?assertMatch({ok, _}, decode(EncodedToken, Checks, KS)). -endif. + + + -- cgit v1.2.1 From 5f93661ba16a48a521c70d621392cda8ad385548 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 6 May 2017 12:26:57 +0100 Subject: unused var --- src/jwtf.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 61f141d82..d7f9bdee9 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -156,7 +156,7 @@ hs256_verify(Message, HMAC, SecretKey) -> case crypto:hmac(sha256, SecretKey, Message) of HMAC -> ok; - E -> + _ -> throw({error, bad_hmac}) end. -- cgit v1.2.1 From 02ecf5b76321f6fc4a4b218543da0752df5f798a Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 7 May 2017 20:09:51 +0100 Subject: add more tests --- src/jwtf.app.src | 3 +- src/jwtf.erl | 132 +++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/src/jwtf.app.src b/src/jwtf.app.src index d210f4c43..304bb9e0a 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -19,7 +19,8 @@ stdlib, b64url, crypto, - jiffy + jiffy, + public_key ]}, {env,[]}, {modules, []}, diff --git a/src/jwtf.erl b/src/jwtf.erl index d7f9bdee9..e63a25823 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -42,7 +42,7 @@ validate_header(Props) -> <<"JWT">> -> ok; _ -> - throw({error, invalid_type}) + throw({error, invalid_typ}) end, case prop(<<"alg">>, Props) of <<"RS256">> -> @@ -101,7 +101,7 @@ validate_nbf(Props, Checks) -> {true, undefined} -> throw({error, missing_nbf}); {true, IAT} -> - assert_past(iat, IAT) + assert_past(nbf, IAT) end. @@ -144,7 +144,7 @@ verify(Alg, Header, Payload, Signature0, Key) -> rs256_verify(Message, Signature, PublicKey) -> - case crypto:verify(rsa, sha256, Message, Signature, PublicKey) of + case public_key:verify(Message, sha256, Signature, PublicKey) of true -> ok; false -> @@ -212,31 +212,125 @@ prop(Prop, Props) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +encode(Header0, Payload0) -> + Header1 = b64url:encode(jiffy:encode(Header0)), + Payload1 = b64url:encode(jiffy:encode(Payload0)), + Sig = b64url:encode(<<"bad">>), + <>. + +valid_header() -> + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}. + +jwt_io_pubkey() -> + PublicKeyPEM = <<"-----BEGIN PUBLIC KEY-----\n" + "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdlatRjRjogo3WojgGH" + "FHYLugdUWAY9iR3fy4arWNA1KoS8kVw33cJibXr8bvwUAUparCwlvdbH6" + "dvEOfou0/gCFQsHUfQrSDv+MuSUMAe8jzKE4qW+jK+xQU9a03GUnKHkkl" + "e+Q0pX/g6jXZ7r1/xAK5Do2kQ+X5xK9cipRgEKwIDAQAB\n" + "-----END PUBLIC KEY-----\n">>, + [PEMEntry] = public_key:pem_decode(PublicKeyPEM), + public_key:pem_entry_decode(PEMEntry). + + +invalid_typ_test() -> + Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), + ?assertEqual({error, invalid_typ}, decode(Encoded, [typ], nil)). + + +invalid_alg_test() -> + Encoded = encode({[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"NOPE">>}]}, []), + ?assertEqual({error, invalid_alg}, decode(Encoded, [alg], nil)). + + +missing_iss_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, missing_iss}, decode(Encoded, [{iss, right}], nil)). + + +invalid_iss_test() -> + Encoded = encode(valid_header(), {[{<<"iss">>, <<"wrong">>}]}), + ?assertEqual({error, invalid_iss}, decode(Encoded, [{iss, right}], nil)). + + +missing_iat_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, missing_iat}, decode(Encoded, [iat], nil)). + + +invalid_iat_test() -> + Encoded = encode(valid_header(), {[{<<"iat">>, 32503680000}]}), + ?assertEqual({error, {iat,not_in_past}}, decode(Encoded, [iat], nil)). + + +missing_nbf_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, missing_nbf}, decode(Encoded, [nbf], nil)). + + +invalid_nbf_test() -> + Encoded = encode(valid_header(), {[{<<"nbf">>, 32503680000}]}), + ?assertEqual({error, {nbf,not_in_past}}, decode(Encoded, [nbf], nil)). + + +missing_exp_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, missing_exp}, decode(Encoded, [exp], nil)). + + +invalid_exp_test() -> + Encoded = encode(valid_header(), {[{<<"exp">>, 0}]}), + ?assertEqual({error, {exp,not_in_future}}, decode(Encoded, [exp], nil)). + + +bad_rs256_sig_test() -> + Encoded = encode( + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, + {[]}), + KS = fun(undefined) -> jwt_io_pubkey() end, + ?assertEqual({error, bad_signature}, decode(Encoded, [], KS)). + + +bad_hs256_sig_test() -> + Encoded = encode( + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"HS256">>}]}, + {[]}), + KS = fun(undefined) -> <<"bad">> end, + ?assertEqual({error, bad_hmac}, decode(Encoded, [], KS)). + + +malformed_token_test() -> + ?assertEqual({error, malformed_token}, decode(<<"a.b.c.d">>, [], nil)). + + hs256_test() -> EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwc" "zovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDA" "sImtpZCI6ImJhciJ9.lpOvEnYLdcujwo9RbhzXme6J-eQ1yfl782qq" "crR6QYE">>, KS = fun(_) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid], + Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid, sig], ?assertMatch({ok, _}, decode(EncodedToken, Checks, KS)). -rs256_test() -> - EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwc" - "zovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDA" - "sImtpZCI6ImJhciJ9.bi87-lkEeOblTb_5ZEh6FkmOSg3mC_kqu2xc" - "YJpJb3So29agyJkkidu3NF8R20x-Xi1wD6E8ACgfODsbdu5dbNRc-H" - "UaFUnvyBr-M94PXhSOvLduoXT2mg1tgD1s_n0QgmH0pP-aAINgotDi" - "UBuQ-pMD5hDIX2EYqAjwRcnVrno">>, - PublicKey = <<"AQAB,3ZWrUY0Y6IKN1qI4BhxR2C7oHVFgGPYkd38uGq1jQNSqEvJFcN93CY" - "m16/G78FAFKWqwsJb3Wx+nbxDn6LtP4AhULB1H0K0g7/jLklDAHvI8yhOKl" - "voyvsUFPWtNxlJyh5JJXvkNKV/4Oo12e69f8QCuQ6NpEPl+cSvXIqUYBCs=">>, - - Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid], - KS = fun(<<"bar">>) -> PublicKey end, - - ?assertMatch({ok, _}, decode(EncodedToken, Checks, KS)). +%% jwt.io example +rs256_test() -> + EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0N" + "TY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.Ek" + "N-DOsnsuRjRO6BxXemmJDm3HbxrbRzXglbN2S4sOkopdU4IsDxTI8j" + "O19W_A4K8ZPJijNLis4EZsHeY559a4DFOd50_OqgHGuERTqYZyuhtF" + "39yxJPAjUESwxk2J5k_4zM3O-vtd1Ghyo4IbqKKSy6J9mTniYJPenn" + "5-HIirE">>, + + Checks = [sig], + KS = fun(undefined) -> jwt_io_pubkey() end, + + ExpectedPayload = {[ + {<<"sub">>, <<"1234567890">>}, + {<<"name">>, <<"John Doe">>}, + {<<"admin">>, true} + ]}, + + ?assertMatch({ok, ExpectedPayload}, decode(EncodedToken, Checks, KS)). -endif. -- cgit v1.2.1 From 5b9dad72f40750abb52184d925a15667b29abe1e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 8 May 2017 15:45:53 +0100 Subject: Add JKWS cache --- src/jwks.erl | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/jwtf.erl | 3 -- 2 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 src/jwks.erl diff --git a/src/jwks.erl b/src/jwks.erl new file mode 100644 index 000000000..62bf3ca1d --- /dev/null +++ b/src/jwks.erl @@ -0,0 +1,141 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwks). +-behaviour(gen_server). + +-export([ + start_link/1, + get_key/2 +]). + +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3, + terminate/2 +]). + +start_link(Url) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, Url, []). + + +get_key(Pid, Kid) -> + case lookup(Kid) of + {ok, Key} -> + %% couch_stats:increment_counter([jkws, hit]), + {ok, Key}; + {error, not_found} -> + %% couch_stats:increment_counter([jkws, miss]), + Url = gen_server:call(Pid, get_url), + KeySet = get_keyset(Url), + ok = gen_server:call(Pid, {replace_keyset, KeySet}), + lookup(Kid) + end. + + +lookup(Kid) -> + case ets:lookup(?MODULE, Kid) of + [{Kid, Key}] -> + {ok, Key}; + [] -> + {error, not_found} + end. + + + +%% gen_server functions + +init(Url) -> + ?MODULE = ets:new(?MODULE, [protected, named_table, {read_concurrency, true}]), + KeySet = get_keyset(Url), + set_keyset(KeySet), + {ok, Url}. + + +handle_call({replace_keyset, KeySet}, _From, State) -> + set_keyset(KeySet), + {reply, ok, State}; + +handle_call(get_url, _From, State) -> + {reply, State, State}; + +handle_call(_Msg, _From, State) -> + {noreply, State}. + + +handle_cast(_Msg, State) -> + {noreply, State}. + + +handle_info(_Msg, State) -> + {noreply, State}. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +terminate(_Reason, _State) -> + ok. + +%% private functions + +get_keyset(Url) -> + ReqHeaders = [], + %% T0 = os:timestamp(), + case ibrowse:send_req(Url, ReqHeaders, get) of + {ok, "200", _RespHeaders, RespBody} -> + %% Latency = timer:now_diff(os:timestamp(), T0) / 1000, + %% couch_stats:update_histogram([jkws, latency], Latency), + parse_keyset(RespBody); + Else -> + io:format("~p", [Else]), + [] + end. + + +set_keyset(KeySet) -> + true = ets:delete_all_objects(?MODULE), + true = ets:insert(?MODULE, KeySet). + + +parse_keyset(Body) -> + {Props} = jiffy:decode(Body), + Keys = proplists:get_value(<<"keys">>, Props), + [parse_key(Key) || Key <- Keys]. + + +parse_key({Props}) -> + <<"RS256">> = proplists:get_value(<<"alg">>, Props), + <<"RSA">> = proplists:get_value(<<"kty">>, Props), + Kid = proplists:get_value(<<"kid">>, Props), + E = proplists:get_value(<<"e">>, Props), + N = proplists:get_value(<<"n">>, Props), + {Kid, {'RSAPublicKey', decode_number(N), decode_number(E)}}. + + +decode_number(Base64) -> + crypto:bytes_to_integer(b64url:decode(Base64)). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +jwks_test() -> + application:start(ibrowse), + jwks:start_link("https://iam.stage1.eu-gb.bluemix.net/oidc/keys"), + ?assertMatch({ok, _}, jwks:get_key(?MODULE, <<"20170401-00:00:00">>)). + +-endif. diff --git a/src/jwtf.erl b/src/jwtf.erl index e63a25823..ec4a19ac8 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -333,6 +333,3 @@ rs256_test() -> ?assertMatch({ok, ExpectedPayload}, decode(EncodedToken, Checks, KS)). -endif. - - - -- cgit v1.2.1 From d7bd8d16f560d3884a7da68e03b3b4eb62544b26 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 8 May 2017 19:20:01 +0100 Subject: Make typ and alg optional and make everything truly optional. --- src/jwtf.erl | 50 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index ec4a19ac8..f3f41a686 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -27,7 +27,7 @@ decode(EncodedToken, Checks, KS) -> validate(Header0, Payload0, Signature, Checks, KS) -> Header1 = props(decode_json(Header0)), - validate_header(Header1), + validate_header(Header1, Checks), Payload1 = props(decode_json(Payload0)), validate_payload(Payload1, Checks), @@ -37,17 +37,37 @@ validate(Header0, Payload0, Signature, Checks, KS) -> verify(Alg, Header0, Payload0, Signature, Key). -validate_header(Props) -> - case prop(<<"typ">>, Props) of - <<"JWT">> -> +validate_header(Props, Checks) -> + validate_typ(Props, Checks), + validate_alg(Props, Checks). + + +validate_typ(Props, Checks) -> + Required = prop(typ, Checks), + TYP = prop(<<"typ">>, Props), + case {Required, TYP} of + {undefined, _} -> ok; - _ -> + {true, undefined} -> + throw({error, missing_typ}); + {true, <<"JWT">>} -> + ok; + {true, _} -> throw({error, invalid_typ}) - end, - case prop(<<"alg">>, Props) of - <<"RS256">> -> + end. + + +validate_alg(Props, Checks) -> + Required = prop(alg, Checks), + Alg = prop(<<"alg">>, Props), + case {Required, Alg} of + {undefined, _} -> ok; - <<"HS256">> -> + {true, undefined} -> + throw({error, missing_alg}); + {true, <<"RS256">>} -> + ok; + {true, <<"HS256">>} -> ok; _ -> throw({error, invalid_alg}) @@ -82,7 +102,7 @@ validate_iat(Props, Checks) -> IAT = prop(<<"iat">>, Props), case {Required, IAT} of - {undefined, undefined} -> + {undefined, _} -> ok; {true, undefined} -> throw({error, missing_iat}); @@ -96,7 +116,7 @@ validate_nbf(Props, Checks) -> NBF = prop(<<"nbf">>, Props), case {Required, NBF} of - {undefined, undefined} -> + {undefined, _} -> ok; {true, undefined} -> throw({error, missing_nbf}); @@ -110,7 +130,7 @@ validate_exp(Props, Checks) -> EXP = prop(<<"exp">>, Props), case {Required, EXP} of - {undefined, undefined} -> + {undefined, _} -> ok; {true, undefined} -> throw({error, missing_exp}); @@ -123,11 +143,9 @@ key(Props, Checks, KS) -> Required = prop(kid, Checks), KID = prop(<<"kid">>, Props), case {Required, KID} of - {undefined, undefined} -> - KS(undefined); {true, undefined} -> throw({error, missing_kid}); - {true, KID} -> + {_, KID} -> KS(KID) end. @@ -308,7 +326,7 @@ hs256_test() -> "sImtpZCI6ImJhciJ9.lpOvEnYLdcujwo9RbhzXme6J-eQ1yfl782qq" "crR6QYE">>, KS = fun(_) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid, sig], + Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid, sig, typ, alg], ?assertMatch({ok, _}, decode(EncodedToken, Checks, KS)). -- cgit v1.2.1 From 8077258826f6c53359df22d97a42a323e7d12a6e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 8 May 2017 20:13:35 +0100 Subject: use public url --- src/jwks.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jwks.erl b/src/jwks.erl index 62bf3ca1d..edd695964 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -135,7 +135,7 @@ decode_number(Base64) -> jwks_test() -> application:start(ibrowse), - jwks:start_link("https://iam.stage1.eu-gb.bluemix.net/oidc/keys"), - ?assertMatch({ok, _}, jwks:get_key(?MODULE, <<"20170401-00:00:00">>)). + jwks:start_link("https://iam.eu-gb.bluemix.net/oidc/keys"), + ?assertMatch({ok, _}, jwks:get_key(?MODULE, <<"20170402-00:00:00">>)). -endif. -- cgit v1.2.1 From 3cb8b7d42475bb9c0f96d075aaa7dffab64a1f7c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 8 May 2017 20:30:09 +0100 Subject: 98% coverage --- src/jwtf.erl | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index f3f41a686..e7157f1f4 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -250,11 +250,21 @@ jwt_io_pubkey() -> public_key:pem_entry_decode(PEMEntry). +missing_typ_test() -> + Encoded = encode({[]}, []), + ?assertEqual({error, missing_typ}, decode(Encoded, [typ], nil)). + + invalid_typ_test() -> Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), ?assertEqual({error, invalid_typ}, decode(Encoded, [typ], nil)). +missing_alg_test() -> + Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), + ?assertEqual({error, missing_alg}, decode(Encoded, [alg], nil)). + + invalid_alg_test() -> Encoded = encode({[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"NOPE">>}]}, []), ?assertEqual({error, invalid_alg}, decode(Encoded, [alg], nil)). @@ -300,6 +310,11 @@ invalid_exp_test() -> ?assertEqual({error, {exp,not_in_future}}, decode(Encoded, [exp], nil)). +missing_kid_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, missing_kid}, decode(Encoded, [kid], nil)). + + bad_rs256_sig_test() -> Encoded = encode( {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, @@ -339,7 +354,7 @@ rs256_test() -> "39yxJPAjUESwxk2J5k_4zM3O-vtd1Ghyo4IbqKKSy6J9mTniYJPenn" "5-HIirE">>, - Checks = [sig], + Checks = [sig, alg], KS = fun(undefined) -> jwt_io_pubkey() end, ExpectedPayload = {[ -- cgit v1.2.1 From e60fa5015b5b0debf8be7d95e70c731638d7f2bd Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 9 May 2017 12:35:29 +0100 Subject: kid belongs in the header --- src/jwtf.erl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index e7157f1f4..1a1877c03 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -33,7 +33,7 @@ validate(Header0, Payload0, Signature, Checks, KS) -> validate_payload(Payload1, Checks), Alg = prop(<<"alg">>, Header1), - Key = key(Payload1, Checks, KS), + Key = key(Header1, Checks, KS), verify(Alg, Header0, Payload0, Signature, Key). @@ -311,7 +311,7 @@ invalid_exp_test() -> missing_kid_test() -> - Encoded = encode(valid_header(), {[]}), + Encoded = encode({[]}, {[]}), ?assertEqual({error, missing_kid}, decode(Encoded, [kid], nil)). @@ -336,13 +336,13 @@ malformed_token_test() -> hs256_test() -> - EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJodHRwc" - "zovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI6MTAwMDAwMDAwMDAwMDA" - "sImtpZCI6ImJhciJ9.lpOvEnYLdcujwo9RbhzXme6J-eQ1yfl782qq" - "crR6QYE">>, - KS = fun(_) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, kid, sig, typ, alg], - ?assertMatch({ok, _}, decode(EncodedToken, Checks, KS)). + EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IjEyMzQ1Ni" + "J9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI" + "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" + "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, + KS = fun(<<"123456">>) -> <<"secret">> end, + Checks = [{iss, <<"https://foo.com">>}, iat, exp, sig, typ, alg, kid], + ?assertMatch({ok, _}, catch decode(EncodedToken, Checks, KS)). %% jwt.io example -- cgit v1.2.1 From a18a2e5e5c40bb406f67f27b00bb3d206778aefd Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 9 May 2017 13:50:33 +0100 Subject: some documentation --- src/jwks.erl | 5 +++++ src/jwtf.erl | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/jwks.erl b/src/jwks.erl index edd695964..748c162d8 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -10,6 +10,11 @@ % License for the specific language governing permissions and limitations under % the License. +% @doc +% This module parses JSON Web Key Sets (JWKS) and caches them for +% performance reasons. To use the module, include it in your +% supervision tree. + -module(jwks). -behaviour(gen_server). diff --git a/src/jwtf.erl b/src/jwtf.erl index 1a1877c03..6ec832f73 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -10,10 +10,20 @@ % License for the specific language governing permissions and limitations under % the License. +% @doc +% This module decodes and validates JWT tokens. Almost all property +% checks are optional. If not checked, the presence or validity of the +% field is not verified. Signature check is mandatory, though. + -module(jwtf). -export([decode/3]). +% @doc decode +% Decodes the supplied encoded token, checking +% for the attributes defined in Checks and calling +% the key store function to retrieve the key needed +% to verify the signature decode(EncodedToken, Checks, KS) -> try [Header, Payload, Signature] = split(EncodedToken), -- cgit v1.2.1 From 69e1ce2b3e92f87c4b2ca19c182256d8f9ac1c92 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 9 May 2017 15:14:11 +0100 Subject: Add stats, don't wipe cache on error --- priv/stats_descriptions.cfg | 12 ++++++++++++ src/jwks.erl | 31 +++++++++++++++++-------------- src/jwtf.app.src | 1 + 3 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 priv/stats_descriptions.cfg diff --git a/priv/stats_descriptions.cfg b/priv/stats_descriptions.cfg new file mode 100644 index 000000000..7aa5cab5d --- /dev/null +++ b/priv/stats_descriptions.cfg @@ -0,0 +1,12 @@ +{[jkws, hit], [ + {type, counter}, + {desc, <<"cache hit for JKWS key lookup">>} +]}. +{[jkws, miss], [ + {type, counter}, + {desc, <<"cache miss for JKWS key lookup">>} +]}. +{[jkws, latency], [ + {type, histogram}, + {desc, <<"distribution of latencies for calls to retrieve JKWS keys">>} +]}. diff --git a/src/jwks.erl b/src/jwks.erl index 748c162d8..1c416dced 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -32,21 +32,25 @@ terminate/2 ]). -start_link(Url) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, Url, []). +start_link(JWKSUrl) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, JWKSUrl, []). get_key(Pid, Kid) -> case lookup(Kid) of {ok, Key} -> - %% couch_stats:increment_counter([jkws, hit]), + couch_stats:increment_counter([jkws, hit]), {ok, Key}; {error, not_found} -> - %% couch_stats:increment_counter([jkws, miss]), + couch_stats:increment_counter([jkws, miss]), Url = gen_server:call(Pid, get_url), - KeySet = get_keyset(Url), - ok = gen_server:call(Pid, {replace_keyset, KeySet}), - lookup(Kid) + case get_keyset(Url) of + {ok, KeySet} -> + ok = gen_server:call(Pid, {replace_keyset, KeySet}), + lookup(Kid); + {error, Reason} -> + {error, Reason} + end end. @@ -99,15 +103,14 @@ terminate(_Reason, _State) -> get_keyset(Url) -> ReqHeaders = [], - %% T0 = os:timestamp(), + T0 = os:timestamp(), case ibrowse:send_req(Url, ReqHeaders, get) of {ok, "200", _RespHeaders, RespBody} -> - %% Latency = timer:now_diff(os:timestamp(), T0) / 1000, - %% couch_stats:update_histogram([jkws, latency], Latency), - parse_keyset(RespBody); - Else -> - io:format("~p", [Else]), - [] + Latency = timer:now_diff(os:timestamp(), T0) / 1000, + couch_stats:update_histogram([jkws, latency], Latency), + {ok, parse_keyset(RespBody)}; + _Else -> + {error, get_keyset_failed} end. diff --git a/src/jwtf.app.src b/src/jwtf.app.src index 304bb9e0a..87d9aafba 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -18,6 +18,7 @@ kernel, stdlib, b64url, + couch_stats, crypto, jiffy, public_key -- cgit v1.2.1 From 25bfdc3c9a4262d64bed2e11d53997ad0c838551 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 9 May 2017 17:26:59 +0100 Subject: make jwks simpler, caching can happen elsewhere --- priv/stats_descriptions.cfg | 12 ------ src/jwks.erl | 99 ++------------------------------------------- src/jwtf.app.src | 1 - 3 files changed, 4 insertions(+), 108 deletions(-) delete mode 100644 priv/stats_descriptions.cfg diff --git a/priv/stats_descriptions.cfg b/priv/stats_descriptions.cfg deleted file mode 100644 index 7aa5cab5d..000000000 --- a/priv/stats_descriptions.cfg +++ /dev/null @@ -1,12 +0,0 @@ -{[jkws, hit], [ - {type, counter}, - {desc, <<"cache hit for JKWS key lookup">>} -]}. -{[jkws, miss], [ - {type, counter}, - {desc, <<"cache miss for JKWS key lookup">>} -]}. -{[jkws, latency], [ - {type, histogram}, - {desc, <<"distribution of latencies for calls to retrieve JKWS keys">>} -]}. diff --git a/src/jwks.erl b/src/jwks.erl index 1c416dced..1820ab669 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -11,114 +11,24 @@ % the License. % @doc -% This module parses JSON Web Key Sets (JWKS) and caches them for -% performance reasons. To use the module, include it in your -% supervision tree. +% This module fetches and parses JSON Web Key Sets (JWKS). -module(jwks). --behaviour(gen_server). -export([ - start_link/1, - get_key/2 + get_keyset/1 ]). --export([ - init/1, - handle_call/3, - handle_cast/2, - handle_info/2, - code_change/3, - terminate/2 -]). - -start_link(JWKSUrl) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, JWKSUrl, []). - - -get_key(Pid, Kid) -> - case lookup(Kid) of - {ok, Key} -> - couch_stats:increment_counter([jkws, hit]), - {ok, Key}; - {error, not_found} -> - couch_stats:increment_counter([jkws, miss]), - Url = gen_server:call(Pid, get_url), - case get_keyset(Url) of - {ok, KeySet} -> - ok = gen_server:call(Pid, {replace_keyset, KeySet}), - lookup(Kid); - {error, Reason} -> - {error, Reason} - end - end. - - -lookup(Kid) -> - case ets:lookup(?MODULE, Kid) of - [{Kid, Key}] -> - {ok, Key}; - [] -> - {error, not_found} - end. - - - -%% gen_server functions - -init(Url) -> - ?MODULE = ets:new(?MODULE, [protected, named_table, {read_concurrency, true}]), - KeySet = get_keyset(Url), - set_keyset(KeySet), - {ok, Url}. - - -handle_call({replace_keyset, KeySet}, _From, State) -> - set_keyset(KeySet), - {reply, ok, State}; - -handle_call(get_url, _From, State) -> - {reply, State, State}; - -handle_call(_Msg, _From, State) -> - {noreply, State}. - - -handle_cast(_Msg, State) -> - {noreply, State}. - - -handle_info(_Msg, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -terminate(_Reason, _State) -> - ok. - -%% private functions - get_keyset(Url) -> ReqHeaders = [], - T0 = os:timestamp(), case ibrowse:send_req(Url, ReqHeaders, get) of {ok, "200", _RespHeaders, RespBody} -> - Latency = timer:now_diff(os:timestamp(), T0) / 1000, - couch_stats:update_histogram([jkws, latency], Latency), {ok, parse_keyset(RespBody)}; _Else -> {error, get_keyset_failed} end. -set_keyset(KeySet) -> - true = ets:delete_all_objects(?MODULE), - true = ets:insert(?MODULE, KeySet). - - parse_keyset(Body) -> {Props} = jiffy:decode(Body), Keys = proplists:get_value(<<"keys">>, Props), @@ -142,8 +52,7 @@ decode_number(Base64) -> -include_lib("eunit/include/eunit.hrl"). jwks_test() -> - application:start(ibrowse), - jwks:start_link("https://iam.eu-gb.bluemix.net/oidc/keys"), - ?assertMatch({ok, _}, jwks:get_key(?MODULE, <<"20170402-00:00:00">>)). + application:ensure_all_started(ibrowse), + ?assertMatch({ok, _}, get_keyset("https://iam.eu-gb.bluemix.net/oidc/keys")). -endif. diff --git a/src/jwtf.app.src b/src/jwtf.app.src index 87d9aafba..304bb9e0a 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -18,7 +18,6 @@ kernel, stdlib, b64url, - couch_stats, crypto, jiffy, public_key -- cgit v1.2.1 From 31999f40e1c4acecab3a317dcdb9e08783d9b0d2 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 9 May 2017 20:07:15 +0100 Subject: allow iss to be optional --- src/jwtf.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 6ec832f73..b03fa91c4 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -98,7 +98,9 @@ validate_iss(Props, Checks) -> ActualISS = prop(<<"iss">>, Props), case {ExpectedISS, ActualISS} of - {ISS, undefined} when ISS /= undefined -> + {undefined, _} -> + ok; + {_ISS, undefined} -> throw({error, missing_iss}); {ISS, ISS} -> ok; -- cgit v1.2.1 From acbaa3731b7a1131b1116df5cb1cd3d86ddc2534 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 9 May 2017 22:36:02 +0100 Subject: slightly improve readme --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 84e196ad8..27e1e788e 100644 --- a/README.md +++ b/README.md @@ -6,5 +6,7 @@ This library provides JWT parsing and validation functions Supports; -# Verify -# RS256 +* Verify +* RS256 +* HS256 + -- cgit v1.2.1 From bf7a2edac9024696f6ba4d0092e45cf071815e71 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 10 May 2017 18:06:13 +0100 Subject: expand algorithm support --- src/jwks.erl | 50 ++++++++++++++++++++++++++++++++++++++++++++------ src/jwtf.erl | 54 ++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 86 insertions(+), 18 deletions(-) diff --git a/src/jwks.erl b/src/jwks.erl index 1820ab669..8b72ac85c 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -19,6 +19,8 @@ get_keyset/1 ]). +-include_lib("public_key/include/public_key.hrl"). + get_keyset(Url) -> ReqHeaders = [], case ibrowse:send_req(Url, ReqHeaders, get) of @@ -32,16 +34,23 @@ get_keyset(Url) -> parse_keyset(Body) -> {Props} = jiffy:decode(Body), Keys = proplists:get_value(<<"keys">>, Props), - [parse_key(Key) || Key <- Keys]. + lists:flatmap(fun parse_key/1, Keys). parse_key({Props}) -> - <<"RS256">> = proplists:get_value(<<"alg">>, Props), - <<"RSA">> = proplists:get_value(<<"kty">>, Props), + Alg = proplists:get_value(<<"alg">>, Props), + Kty = proplists:get_value(<<"kty">>, Props), Kid = proplists:get_value(<<"kid">>, Props), - E = proplists:get_value(<<"e">>, Props), - N = proplists:get_value(<<"n">>, Props), - {Kid, {'RSAPublicKey', decode_number(N), decode_number(E)}}. + case {Alg, Kty} of + {<<"RS256">>, <<"RSA">>} -> + E = proplists:get_value(<<"e">>, Props), + N = proplists:get_value(<<"n">>, Props), + [{{Kty, Kid}, #'RSAPublicKey'{ + modulus = decode_number(N), + publicExponent = decode_number(E)}}]; + _ -> + [] + end. decode_number(Base64) -> @@ -55,4 +64,33 @@ jwks_test() -> application:ensure_all_started(ibrowse), ?assertMatch({ok, _}, get_keyset("https://iam.eu-gb.bluemix.net/oidc/keys")). +rs_test() -> + Ejson = {[ + {<<"kty">>, <<"RSA">>}, + {<<"n">>, <<"0vx7agoebGcQSuuPiLJXZptN9nndrQmbXEps2aiAFbWhM78LhWx" + "4cbbfAAtVT86zwu1RK7aPFFxuhDR1L6tSoc_BJECPebWKRXjBZCiFV4n3oknjhMs" + "tn64tZ_2W-5JsGY4Hc5n9yBXArwl93lqt7_RN5w6Cf0h4QyQ5v-65YGjQR0_FDW2" + "QvzqY368QQMicAtaSqzs8KJZgnYb9c7d0zgdAZHzu6qMQvRL5hajrn1n91CbOpbI" + "SD08qNLyrdkt-bFTWhAI4vMQFh6WeZu0fM4lFd2NcRwr3XPksINHaQ-G_xBniIqb" + "w0Ls1jF44-csFCur-kEgU8awapJzKnqDKgw">>}, + {<<"e">>, <<"AQAB">>}, + {<<"alg">>, <<"RS256">>}, + {<<"kid">>, <<"2011-04-29">>} + ]}, + ?assertMatch([{{<<"RSA">>, <<"2011-04-29">>}, {'RSAPublicKey', _, 65537}}], + parse_key(Ejson)). + + +ec_test() -> + Ejson = {[ + {<<"kty">>, <<"EC">>}, + {<<"crv">>, <<"P-256">>}, + {<<"x">>, <<"MKBCTNIcKUSDii11ySs3526iDZ8AiTo7Tu6KPAqv7D4">>}, + {<<"y">>, <<"4Etl6SRW2YiLUrN5vfvVHuhp7x8PxltmWWlbbM4IFyM">>}, + {<<"alg">>, <<"ES256">>}, + {<<"kid">>, <<"1">>} + ]}, + %% TODO figure out how to convert x,y to an ECPoint. + ?assertMatch([], parse_key(Ejson)). + -endif. diff --git a/src/jwtf.erl b/src/jwtf.erl index b03fa91c4..18f84deb7 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -70,17 +70,31 @@ validate_typ(Props, Checks) -> validate_alg(Props, Checks) -> Required = prop(alg, Checks), Alg = prop(<<"alg">>, Props), + Valid = [ + <<"RS256">>, + <<"RS384">>, + <<"RS512">>, + + <<"HS256">>, + <<"HS384">>, + <<"HS512">>, + + <<"ES384">>, + <<"ES512">>, + <<"ES512">> + ], case {Required, Alg} of {undefined, _} -> ok; {true, undefined} -> throw({error, missing_alg}); - {true, <<"RS256">>} -> - ok; - {true, <<"HS256">>} -> - ok; - _ -> - throw({error, invalid_alg}) + {true, Alg} -> + case lists:member(Alg, Valid) of + true -> + ok; + false -> + throw({error, invalid_alg}) + end end. @@ -167,14 +181,30 @@ verify(Alg, Header, Payload, Signature0, Key) -> Signature1 = b64url:decode(Signature0), case Alg of <<"RS256">> -> - rs256_verify(Message, Signature1, Key); + public_key_verify(sha256, Message, Signature1, Key); + <<"RS384">> -> + public_key_verify(sha384, Message, Signature1, Key); + <<"RS512">> -> + public_key_verify(sha512, Message, Signature1, Key); + + <<"ES256">> -> + public_key_verify(sha256, Message, Signature1, Key); + <<"ES384">> -> + public_key_verify(sha384, Message, Signature1, Key); + <<"ES512">> -> + public_key_verify(sha512, Message, Signature1, Key); + <<"HS256">> -> - hs256_verify(Message, Signature1, Key) + hmac_verify(sha256, Message, Signature1, Key); + <<"HS384">> -> + hmac_verify(sha384, Message, Signature1, Key); + <<"HS512">> -> + hmac_verify(sha512, Message, Signature1, Key) end. -rs256_verify(Message, Signature, PublicKey) -> - case public_key:verify(Message, sha256, Signature, PublicKey) of +public_key_verify(Alg, Message, Signature, PublicKey) -> + case public_key:verify(Message, Alg, Signature, PublicKey) of true -> ok; false -> @@ -182,8 +212,8 @@ rs256_verify(Message, Signature, PublicKey) -> end. -hs256_verify(Message, HMAC, SecretKey) -> - case crypto:hmac(sha256, SecretKey, Message) of +hmac_verify(Alg, Message, HMAC, SecretKey) -> + case crypto:hmac(Alg, SecretKey, Message) of HMAC -> ok; _ -> -- cgit v1.2.1 From 61f47b34cb764f9e392c3f3f18651e7cb01ef9ab Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 10 May 2017 18:50:22 +0100 Subject: support P-256 in JWKS --- src/jwks.erl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/jwks.erl b/src/jwks.erl index 8b72ac85c..d1863303c 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -48,6 +48,18 @@ parse_key({Props}) -> [{{Kty, Kid}, #'RSAPublicKey'{ modulus = decode_number(N), publicExponent = decode_number(E)}}]; + {<<"ES256">>, <<"EC">>} -> + Crv = proplists:get_value(<<"crv">>, Props), + case Crv of + <<"P-256">> -> + X = proplists:get_value(<<"x">>, Props), + Y = proplists:get_value(<<"y">>, Props), + Point = <<4:8, X/binary, Y/binary>>, + [{{Kty, Kid}, #'ECPoint'{ + point = Point}}]; + _ -> + [] + end; _ -> [] end. @@ -91,6 +103,6 @@ ec_test() -> {<<"kid">>, <<"1">>} ]}, %% TODO figure out how to convert x,y to an ECPoint. - ?assertMatch([], parse_key(Ejson)). + ?assertMatch([{{<<"EC">>, <<"1">>}, {'ECPoint', _}}], parse_key(Ejson)). -endif. -- cgit v1.2.1 From 373a3671fa576d762e4dab89a655b9536885a15f Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 10 May 2017 18:54:17 +0100 Subject: update alg list --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 27e1e788e..e6038fbc0 100644 --- a/README.md +++ b/README.md @@ -8,5 +8,11 @@ Supports; * Verify * RS256 +* RS384 +* RS512 * HS256 - +* HS384 +* HS512 +* ES256 +* ES384 +* ES512 -- cgit v1.2.1 From ae0e0f495db22069e6c811462cd974fea7ae7ad8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 10 May 2017 19:51:17 +0100 Subject: return a public key tuple --- src/jwks.erl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/jwks.erl b/src/jwks.erl index d1863303c..a2231b2f4 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -55,8 +55,10 @@ parse_key({Props}) -> X = proplists:get_value(<<"x">>, Props), Y = proplists:get_value(<<"y">>, Props), Point = <<4:8, X/binary, Y/binary>>, - [{{Kty, Kid}, #'ECPoint'{ - point = Point}}]; + [{{Kty, Kid}, { + #'ECPoint'{point = Point}, + {namedCurve, secp256r1} + }}]; _ -> [] end; @@ -103,6 +105,7 @@ ec_test() -> {<<"kid">>, <<"1">>} ]}, %% TODO figure out how to convert x,y to an ECPoint. - ?assertMatch([{{<<"EC">>, <<"1">>}, {'ECPoint', _}}], parse_key(Ejson)). + ?assertMatch([{{<<"EC">>, <<"1">>}, {{'ECPoint', _}, + {namedCurve, secp256r1}}}], parse_key(Ejson)). -endif. -- cgit v1.2.1 From e0d61d06651b576b9b0a36600529028aae334e68 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 10 May 2017 21:54:21 +0100 Subject: test EC --- src/jwks.erl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/jwks.erl b/src/jwks.erl index a2231b2f4..b88c59068 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -54,10 +54,12 @@ parse_key({Props}) -> <<"P-256">> -> X = proplists:get_value(<<"x">>, Props), Y = proplists:get_value(<<"y">>, Props), - Point = <<4:8, X/binary, Y/binary>>, + Point = <<4:8, + (b64url:decode(X))/binary, + (b64url:decode(Y))/binary>>, [{{Kty, Kid}, { #'ECPoint'{point = Point}, - {namedCurve, secp256r1} + {namedCurve,{1,2,840,10045,3,1,7}} }}]; _ -> [] @@ -96,6 +98,13 @@ rs_test() -> ec_test() -> + PrivateKey = #'ECPrivateKey'{ + version = 1, + parameters = {namedCurve,{1,2,840,10045,3,1,7}}, + privateKey = b64url:decode("870MB6gfuTJ4HtUnUvYMyJpr5eUZNP4Bk43bVdj3eAE"), + publicKey = <<4:8, + (b64url:decode("MKBCTNIcKUSDii11ySs3526iDZ8AiTo7Tu6KPAqv7D4"))/binary, + (b64url:decode("4Etl6SRW2YiLUrN5vfvVHuhp7x8PxltmWWlbbM4IFyM"))/binary>>}, Ejson = {[ {<<"kty">>, <<"EC">>}, {<<"crv">>, <<"P-256">>}, @@ -104,8 +113,10 @@ ec_test() -> {<<"alg">>, <<"ES256">>}, {<<"kid">>, <<"1">>} ]}, - %% TODO figure out how to convert x,y to an ECPoint. - ?assertMatch([{{<<"EC">>, <<"1">>}, {{'ECPoint', _}, - {namedCurve, secp256r1}}}], parse_key(Ejson)). + ?assertMatch([{_Key, _Value}], parse_key(Ejson)), + {_, ECPublicKey} = parse_key(Ejson), + Msg = <<"foo">>, + Sig = public_key:sign(Msg, sha256, PrivateKey), + ?assert(public_key:verify(Msg, sha256, Sig, ECPublicKey)). -endif. -- cgit v1.2.1 From e180555734f84612b3a6df8addf59aa6cfc89f63 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 10 May 2017 22:04:03 +0100 Subject: fix test --- src/jwks.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jwks.erl b/src/jwks.erl index b88c59068..d694d2e7b 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -114,7 +114,7 @@ ec_test() -> {<<"kid">>, <<"1">>} ]}, ?assertMatch([{_Key, _Value}], parse_key(Ejson)), - {_, ECPublicKey} = parse_key(Ejson), + [{_, ECPublicKey}] = parse_key(Ejson), Msg = <<"foo">>, Sig = public_key:sign(Msg, sha256, PrivateKey), ?assert(public_key:verify(Msg, sha256, Sig, ECPublicKey)). -- cgit v1.2.1 From e80c3d168c835adea87469ca53dec0d54bab7023 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 11 May 2017 09:28:40 +0100 Subject: add tests for HS384 and HS512 --- src/jwtf.erl | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 18f84deb7..ae8239a9a 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -377,17 +377,40 @@ malformed_token_test() -> ?assertEqual({error, malformed_token}, decode(<<"a.b.c.d">>, [], nil)). +%% jwt.io generated hs256_test() -> EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IjEyMzQ1Ni" "J9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI" "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, KS = fun(<<"123456">>) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, sig, typ, alg, kid], + Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], ?assertMatch({ok, _}, catch decode(EncodedToken, Checks, KS)). -%% jwt.io example +%% pip install PyJWT +%% > import jwt +%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS384') +hs384_test() -> + EncodedToken = <<"eyJhbGciOiJIUzM4NCIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYXIif" + "Q.2quwghs6I56GM3j7ZQbn-ASZ53xdBqzPzTDHm_CtVec32LUy-Ezy" + "L3JjIe7WjL93">>, + KS = fun(_) -> <<"secret">> end, + ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, catch decode(EncodedToken, [], KS)). + + +%% pip install PyJWT +%% > import jwt +%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS512') +hs512_test() -> + EncodedToken = <<"eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYX" + "IifQ.WePl7achkd0oGNB8XRF_LJwxlyiPZqpdNgdKpDboAjSTsW" + "q-aOGNynTp8TOv8KjonFym8vwFwppXOLoLXbkIaQ">>, + KS = fun(_) -> <<"secret">> end, + ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, catch decode(EncodedToken, [], KS)). + + +%% jwt.io generated rs256_test() -> EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0N" "TY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.Ek" @@ -407,4 +430,5 @@ rs256_test() -> ?assertMatch({ok, ExpectedPayload}, decode(EncodedToken, Checks, KS)). + -endif. -- cgit v1.2.1 From 6cc182d5bd009c0bfee036651714a3294bfa2254 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 11 May 2017 09:33:14 +0100 Subject: IAT validation requires it to be a number, any number --- src/jwtf.erl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index ae8239a9a..cffe88b00 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -132,8 +132,10 @@ validate_iat(Props, Checks) -> ok; {true, undefined} -> throw({error, missing_iat}); - {true, IAT} -> - assert_past(iat, IAT) + {true, IAT} when is_integer(IAT) -> + ok; + {true, _} -> + throw({error, invalid_iat}) end. @@ -328,8 +330,8 @@ missing_iat_test() -> invalid_iat_test() -> - Encoded = encode(valid_header(), {[{<<"iat">>, 32503680000}]}), - ?assertEqual({error, {iat,not_in_past}}, decode(Encoded, [iat], nil)). + Encoded = encode(valid_header(), {[{<<"iat">>, <<"hello">>}]}), + ?assertEqual({error, invalid_iat}, decode(Encoded, [iat], nil)). missing_nbf_test() -> -- cgit v1.2.1 From e083b22e2a66fc8ce965c09757a4fd42f333a982 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 11 May 2017 10:40:12 +0100 Subject: provide caching of JWKS keys --- src/jwks.erl | 31 +++++++++++++++++++++++++++++ src/jwtf.app.src | 2 ++ src/jwtf_app.erl | 26 ++++++++++++++++++++++++ src/jwtf_sup.erl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 src/jwtf_app.erl create mode 100644 src/jwtf_sup.erl diff --git a/src/jwks.erl b/src/jwks.erl index d694d2e7b..d6c44deb4 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -16,11 +16,42 @@ -module(jwks). -export([ + get_key/3, get_keyset/1 ]). -include_lib("public_key/include/public_key.hrl"). +get_key(Url, Kty, Kid) -> + case lookup(Url, Kty, Kid) of + {ok, Key} -> + {ok, Key}; + {error, not_found} -> + update_cache(Url), + lookup(Url, Kty, Kid) + end. + + +lookup(Url, Kty, Kid) -> + case ets_lru:lookup_d(jwks_cache_lru, {Url, Kty, Kid}) of + {ok, Key} -> + {ok, Key}; + not_found -> + {error, not_found} + end. + + +update_cache(Url) -> + case get_keyset(Url) of + {ok, KeySet} -> + [ets_lru:insert(jwks_cache_lru, {Url, Kty, Kid}, Key) + || {{Kty, Kid}, Key} <- KeySet], + ok; + {error, Reason} -> + {error, Reason} + end. + + get_keyset(Url) -> ReqHeaders = [], case ibrowse:send_req(Url, ReqHeaders, get) of diff --git a/src/jwtf.app.src b/src/jwtf.app.src index 304bb9e0a..5fd9c2562 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -14,11 +14,13 @@ {description, "JSON Web Token Functions"}, {vsn, git}, {registered, []}, + {mod, { jwtf_app, []}}, {applications, [ kernel, stdlib, b64url, crypto, + ets_lru, jiffy, public_key ]}, diff --git a/src/jwtf_app.erl b/src/jwtf_app.erl new file mode 100644 index 000000000..92a26d558 --- /dev/null +++ b/src/jwtf_app.erl @@ -0,0 +1,26 @@ +%%%------------------------------------------------------------------- +%% @doc jwtf public API +%% @end +%%%------------------------------------------------------------------- + +-module(jwtf_app). + +-behaviour(application). + +%% Application callbacks +-export([start/2, stop/1]). + +%%==================================================================== +%% API +%%==================================================================== + +start(_StartType, _StartArgs) -> + jwtf_sup:start_link(). + +%%-------------------------------------------------------------------- +stop(_State) -> + ok. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/src/jwtf_sup.erl b/src/jwtf_sup.erl new file mode 100644 index 000000000..2256ac53a --- /dev/null +++ b/src/jwtf_sup.erl @@ -0,0 +1,60 @@ +%%%------------------------------------------------------------------- +%% @doc epep top level supervisor. +%% @end +%%%------------------------------------------------------------------- + +-module(jwtf_sup). + +-behaviour(supervisor). + +%% API +-export([start_link/0]). + +%% Supervisor callbacks +-export([init/1]). + +-define(SERVER, ?MODULE). + +%%==================================================================== +%% API functions +%%==================================================================== + +start_link() -> + supervisor:start_link({local, ?SERVER}, ?MODULE, []). + +%%==================================================================== +%% Supervisor callbacks +%%==================================================================== + +%% Child :: {Id,StartFunc,Restart,Shutdown,Type,Modules} +init([]) -> + Children = [ + {jwks_cache_lru, + {ets_lru, start_link, [jwks_cache_lru, lru_opts()]}, + permanent, 5000, worker, [ets_lru]} + ], + {ok, { {one_for_all, 0, 1}, Children} }. + +%%==================================================================== +%% Internal functions +%%==================================================================== + +lru_opts() -> + case config:get_integer("jwtf_cache", "max_objects", 50) of + MxObjs when MxObjs > 0 -> + [{max_objects, MxObjs}]; + _ -> + [] + end ++ + case config:get_integer("jwtf_cache", "max_size", 0) of + MxSize when MxSize > 0 -> + [{max_size, MxSize}]; + _ -> + [] + end ++ + case config:get_integer("jwtf_cache", "max_lifetime", 0) of + MxLT when MxLT > 0 -> + [{max_lifetime, MxLT}]; + _ -> + [] + end. -- cgit v1.2.1 From 9d60fa25bec69621de6aa9df786e9c739783c754 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 12 May 2017 10:01:47 +0100 Subject: add ibrowse as dep --- src/jwtf.app.src | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jwtf.app.src b/src/jwtf.app.src index 5fd9c2562..2ff221309 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -21,6 +21,7 @@ b64url, crypto, ets_lru, + ibrowse, jiffy, public_key ]}, -- cgit v1.2.1 From ceeb019ebbc1d6aadb44b7f55d112e806403ce53 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 12 May 2017 10:57:02 +0100 Subject: require alg+kid for key lookup --- src/jwtf.erl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index cffe88b00..ae1b95a2b 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -168,13 +168,14 @@ validate_exp(Props, Checks) -> key(Props, Checks, KS) -> + Alg = prop(<<"alg">>, Props), Required = prop(kid, Checks), KID = prop(<<"kid">>, Props), case {Required, KID} of {true, undefined} -> throw({error, missing_kid}); {_, KID} -> - KS(KID) + KS(Alg, KID) end. @@ -363,7 +364,7 @@ bad_rs256_sig_test() -> Encoded = encode( {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, {[]}), - KS = fun(undefined) -> jwt_io_pubkey() end, + KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, ?assertEqual({error, bad_signature}, decode(Encoded, [], KS)). @@ -371,7 +372,7 @@ bad_hs256_sig_test() -> Encoded = encode( {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"HS256">>}]}, {[]}), - KS = fun(undefined) -> <<"bad">> end, + KS = fun(<<"HS256">>, undefined) -> <<"bad">> end, ?assertEqual({error, bad_hmac}, decode(Encoded, [], KS)). @@ -385,7 +386,7 @@ hs256_test() -> "J9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI" "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, - KS = fun(<<"123456">>) -> <<"secret">> end, + KS = fun(<<"HS256">>, <<"123456">>) -> <<"secret">> end, Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], ?assertMatch({ok, _}, catch decode(EncodedToken, Checks, KS)). @@ -397,7 +398,7 @@ hs384_test() -> EncodedToken = <<"eyJhbGciOiJIUzM4NCIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYXIif" "Q.2quwghs6I56GM3j7ZQbn-ASZ53xdBqzPzTDHm_CtVec32LUy-Ezy" "L3JjIe7WjL93">>, - KS = fun(_) -> <<"secret">> end, + KS = fun(<<"HS384">>, _) -> <<"secret">> end, ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, catch decode(EncodedToken, [], KS)). @@ -408,7 +409,7 @@ hs512_test() -> EncodedToken = <<"eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYX" "IifQ.WePl7achkd0oGNB8XRF_LJwxlyiPZqpdNgdKpDboAjSTsW" "q-aOGNynTp8TOv8KjonFym8vwFwppXOLoLXbkIaQ">>, - KS = fun(_) -> <<"secret">> end, + KS = fun(<<"HS512">>, _) -> <<"secret">> end, ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, catch decode(EncodedToken, [], KS)). @@ -422,7 +423,7 @@ rs256_test() -> "5-HIirE">>, Checks = [sig, alg], - KS = fun(undefined) -> jwt_io_pubkey() end, + KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, ExpectedPayload = {[ {<<"sub">>, <<"1234567890">>}, -- cgit v1.2.1 From 5b31b0d79aa2c0fefefb0b35e2e3fab9822eca94 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 24 May 2017 09:37:01 -0700 Subject: Improve pubkey not found error handling (#4) * Improve pubkey not found error handling When the public key identified by the {Alg, KID} tuple is not found on the IAM keystore server, it's possible to see errors like: (node1@127.0.0.1)140> epep:jwt_decode(SampleJWT). ** exception error: no function clause matching public_key:do_verify(<<"eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImtpZCI6IjIwMTcwNTIwLTAwOjAwOjAwIn0.eyJpc3MiOiJodHRwOi8vbG9jYWxob3N0OjEyMzIx"...>>, sha256, <<229,188,162,247,201,233,118,32,115,206,156, 169,17,221,78,157,161,147,46,179,42,219,66, 15,139,91,...>>, {error,not_found}) (public_key.erl, line 782) in function jwtf:public_key_verify/4 (src/jwtf.erl, line 212) in call from jwtf:decode/3 (src/jwtf.erl, line 30) Modify key/1 and public_key_not_found_test/0 to account for keystore changing from returning an error tuple to throwing one. --- src/jwtf.erl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/jwtf.erl b/src/jwtf.erl index ae1b95a2b..78b36a9c3 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -360,6 +360,15 @@ missing_kid_test() -> ?assertEqual({error, missing_kid}, decode(Encoded, [kid], nil)). +public_key_not_found_test() -> + Encoded = encode( + {[{<<"alg">>, <<"RS256">>}, {<<"kid">>, <<"1">>}]}, + {[]}), + KS = fun(_, _) -> throw({error, not_found}) end, + Expected = {error, not_found}, + ?assertEqual(Expected, decode(Encoded, [], KS)). + + bad_rs256_sig_test() -> Encoded = encode( {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, -- cgit v1.2.1 From 80d4a643d47ae2f522feceed0be308809518112e Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Mon, 29 May 2017 21:13:48 -0700 Subject: Improve restart strategy Tolerate 5 crashes per 10 seconds --- src/jwtf_sup.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jwtf_sup.erl b/src/jwtf_sup.erl index 2256ac53a..7cf56e84f 100644 --- a/src/jwtf_sup.erl +++ b/src/jwtf_sup.erl @@ -33,7 +33,7 @@ init([]) -> {ets_lru, start_link, [jwks_cache_lru, lru_opts()]}, permanent, 5000, worker, [ets_lru]} ], - {ok, { {one_for_all, 0, 1}, Children} }. + {ok, { {one_for_all, 5, 10}, Children} }. %%==================================================================== %% Internal functions -- cgit v1.2.1 From b396a1d1bc818c5138d78e74668ac94be1ef8dd1 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 8 Jun 2017 13:39:02 -0700 Subject: Generate rsa private keys and keypairs --- src/jwtf_test_util.erl | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 src/jwtf_test_util.erl diff --git a/src/jwtf_test_util.erl b/src/jwtf_test_util.erl new file mode 100644 index 000000000..c32ea1cb9 --- /dev/null +++ b/src/jwtf_test_util.erl @@ -0,0 +1,82 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_test_util). + +-export([ + create_private_key/0, + create_keypair/0, + to_public_key/1 +]). + +-include_lib("public_key/include/public_key.hrl"). + +-spec create_private_key() -> + #'RSAPrivateKey'{} | no_return(). +create_private_key() -> + create_private_key("/tmp"). + + +-spec create_keypair() -> + {#'RSAPrivateKey'{}, #'RSAPublicKey'{}} | no_return(). +create_keypair() -> + PrivateKey = create_private_key(), + {PrivateKey, to_public_key(PrivateKey)}. + + +-spec to_public_key(#'RSAPrivateKey'{}) -> + #'RSAPublicKey'{}. +to_public_key(#'RSAPrivateKey'{} = PrivateKey) -> + #'RSAPublicKey'{ + modulus = PrivateKey#'RSAPrivateKey'.modulus, + publicExponent = PrivateKey#'RSAPrivateKey'.publicExponent}. + + +create_private_key(TmpDir) -> + ok = verify_openssl(), + Path = filename:join(TmpDir, timestamp() ++ "-rsa.key.der"), + Bin = create_rsa_key(Path), + public_key:der_decode('RSAPrivateKey', Bin). + + +verify_openssl() -> + case os:cmd("openssl version") of + "OpenSSL 1." ++ _Rest -> + ok; + _ -> + throw({error, openssl_required}) + end. + + +timestamp() -> + lists:concat([integer_to_list(N) || N <- tuple_to_list(os:timestamp())]). + + +create_rsa_key(Path) -> + Cmd = "openssl genpkey -algorithm RSA -outform DER -out " ++ Path, + Out = os:cmd(Cmd), + %% Since os:cmd doesn't indicate if the command fails, we go to + %% some length to ensure the output looks correct. + ok = validate_genpkey_output(Out), + {ok, Bin} = file:read_file(Path), + ok = file:delete(Path), + Bin. + + +validate_genpkey_output(Out) when is_list(Out) -> + Length = length(Out), + case re:run(Out, "[.+\n]+") of % should only contain period, plus, or nl + {match, [{0, Length}]} -> + ok; + _ -> + throw({error, {openssl_genpkey_failed, Out}}) + end. -- cgit v1.2.1 From d9a718b8cbb68259b3611b44e1eeac9f4b15e0e1 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 8 Jun 2017 13:41:12 -0700 Subject: Support JWT encoding Implement jwtf:encode/3 for encoding JSON Web Tokens. Test encode/decode round trip for each supported alg. --- src/jwtf.erl | 159 +++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 117 insertions(+), 42 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 78b36a9c3..a461da98d 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -17,7 +17,53 @@ -module(jwtf). --export([decode/3]). +-export([ + encode/3, + decode/3 +]). + +-define(ALGS, [ + {<<"RS256">>, {public_key, sha256}}, % RSA PKCS#1 signature with SHA-256 + {<<"RS384">>, {public_key, sha384}}, + {<<"RS512">>, {public_key, sha512}}, + {<<"ES256">>, {public_key, sha256}}, + {<<"ES384">>, {public_key, sha384}}, + {<<"ES512">>, {public_key, sha512}}, + {<<"HS256">>, {hmac, sha256}}, + {<<"HS384">>, {hmac, sha384}}, + {<<"HS512">>, {hmac, sha512}}]). + +-define(VALID_ALGS, proplists:get_keys(?ALGS)). + + +% @doc encode +% Encode the JSON Header and Claims using Key and Alg obtained from Header +-spec encode(term(), term(), term()) -> + {ok, binary()} | no_return(). +encode(Header = {HeaderProps}, Claims, Key) -> + try + Alg = case prop(<<"alg">>, HeaderProps) of + undefined -> + throw(missing_alg); + Val -> + Val + end, + EncodedHeader = b64url:encode(jiffy:encode(Header)), + EncodedClaims = b64url:encode(jiffy:encode(Claims)), + Message = <>, + SignatureOrMac = case verification_algorithm(Alg) of + {public_key, Algorithm} -> + public_key:sign(Message, Algorithm, Key); + {hmac, Algorithm} -> + crypto:hmac(Algorithm, Key, Message) + end, + EncodedSignatureOrMac = b64url:encode(SignatureOrMac), + {ok, <>} + catch + throw:Error -> + {error, Error} + end. + % @doc decode % Decodes the supplied encoded token, checking @@ -35,6 +81,19 @@ decode(EncodedToken, Checks, KS) -> end. +% @doc verification_algorithm +% Return {VerificationMethod, Algorithm} tuple for the specified Alg +-spec verification_algorithm(binary()) -> + {atom(), atom()} | no_return(). +verification_algorithm(Alg) -> + case lists:keyfind(Alg, 1, ?ALGS) of + {Alg, Val} -> + Val; + false -> + throw(invalid_alg) + end. + + validate(Header0, Payload0, Signature, Checks, KS) -> Header1 = props(decode_json(Header0)), validate_header(Header1, Checks), @@ -70,26 +129,13 @@ validate_typ(Props, Checks) -> validate_alg(Props, Checks) -> Required = prop(alg, Checks), Alg = prop(<<"alg">>, Props), - Valid = [ - <<"RS256">>, - <<"RS384">>, - <<"RS512">>, - - <<"HS256">>, - <<"HS384">>, - <<"HS512">>, - - <<"ES384">>, - <<"ES512">>, - <<"ES512">> - ], case {Required, Alg} of {undefined, _} -> ok; {true, undefined} -> throw({error, missing_alg}); {true, Alg} -> - case lists:member(Alg, Valid) of + case lists:member(Alg, ?VALID_ALGS) of true -> ok; false -> @@ -179,35 +225,20 @@ key(Props, Checks, KS) -> end. -verify(Alg, Header, Payload, Signature0, Key) -> +verify(Alg, Header, Payload, SignatureOrMac0, Key) -> Message = <
>, - Signature1 = b64url:decode(Signature0), - case Alg of - <<"RS256">> -> - public_key_verify(sha256, Message, Signature1, Key); - <<"RS384">> -> - public_key_verify(sha384, Message, Signature1, Key); - <<"RS512">> -> - public_key_verify(sha512, Message, Signature1, Key); - - <<"ES256">> -> - public_key_verify(sha256, Message, Signature1, Key); - <<"ES384">> -> - public_key_verify(sha384, Message, Signature1, Key); - <<"ES512">> -> - public_key_verify(sha512, Message, Signature1, Key); - - <<"HS256">> -> - hmac_verify(sha256, Message, Signature1, Key); - <<"HS384">> -> - hmac_verify(sha384, Message, Signature1, Key); - <<"HS512">> -> - hmac_verify(sha512, Message, Signature1, Key) + SignatureOrMac1 = b64url:decode(SignatureOrMac0), + {VerificationMethod, Algorithm} = verification_algorithm(Alg), + case VerificationMethod of + public_key -> + public_key_verify(Algorithm, Message, SignatureOrMac1, Key); + hmac -> + hmac_verify(Algorithm, Message, SignatureOrMac1, Key) end. -public_key_verify(Alg, Message, Signature, PublicKey) -> - case public_key:verify(Message, Alg, Signature, PublicKey) of +public_key_verify(Algorithm, Message, Signature, PublicKey) -> + case public_key:verify(Message, Algorithm, Signature, PublicKey) of true -> ok; false -> @@ -215,8 +246,8 @@ public_key_verify(Alg, Message, Signature, PublicKey) -> end. -hmac_verify(Alg, Message, HMAC, SecretKey) -> - case crypto:hmac(Alg, SecretKey, Message) of +hmac_verify(Algorithm, Message, HMAC, SecretKey) -> + case crypto:hmac(Algorithm, SecretKey, Message) of HMAC -> ok; _ -> @@ -443,4 +474,48 @@ rs256_test() -> ?assertMatch({ok, ExpectedPayload}, decode(EncodedToken, Checks, KS)). +encode_missing_alg_test() -> + ?assertEqual({error, missing_alg}, + encode({[]}, {[]}, <<"foo">>)). + + +encode_invalid_alg_test() -> + ?assertEqual({error, invalid_alg}, + encode({[{<<"alg">>, <<"BOGUS">>}]}, {[]}, <<"foo">>)). + + +encode_decode_test_() -> + [{Alg, encode_decode(Alg)} || Alg <- ?VALID_ALGS]. + + +encode_decode(Alg) -> + {EncodeKey, DecodeKey} = case verification_algorithm(Alg) of + {public_key, Algorithm} -> + jwtf_test_util:create_keypair(); + {hmac, Algorithm} -> + Key = <<"a-super-secret-key">>, + {Key, Key} + end, + Claims = claims(), + {ok, Encoded} = encode(header(Alg), Claims, EncodeKey), + KS = fun(_, _) -> DecodeKey end, + {ok, Decoded} = decode(Encoded, [], KS), + ?_assertMatch(Claims, Decoded). + + +header(Alg) -> + {[ + {<<"typ">>, <<"JWT">>}, + {<<"alg">>, Alg}, + {<<"kid">>, <<"20170520-00:00:00">>} + ]}. + + +claims() -> + EpochSeconds = 1496205841, + {[ + {<<"iat">>, EpochSeconds}, + {<<"exp">>, EpochSeconds + 3600} + ]}. + -endif. -- cgit v1.2.1 From 382229e7cb7fb36461d53fb1f858b674a6c2c193 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 9 Jun 2017 19:37:15 +0100 Subject: Ensure error reason is convertable to JSON --- src/jwtf.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index a461da98d..28cab6cd3 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -195,7 +195,7 @@ validate_nbf(Props, Checks) -> {true, undefined} -> throw({error, missing_nbf}); {true, IAT} -> - assert_past(nbf, IAT) + assert_past(<<"nbf">>, IAT) end. @@ -209,7 +209,7 @@ validate_exp(Props, Checks) -> {true, undefined} -> throw({error, missing_exp}); {true, EXP} -> - assert_future(exp, EXP) + assert_future(<<"exp">>, EXP) end. @@ -282,7 +282,7 @@ assert_past(Name, Time) -> true -> ok; false -> - throw({error, {Name, not_in_past}}) + throw({error, <>}) end. assert_future(Name, Time) -> @@ -290,7 +290,7 @@ assert_future(Name, Time) -> true -> ok; false -> - throw({error, {Name, not_in_future}}) + throw({error, <>}) end. @@ -373,7 +373,7 @@ missing_nbf_test() -> invalid_nbf_test() -> Encoded = encode(valid_header(), {[{<<"nbf">>, 32503680000}]}), - ?assertEqual({error, {nbf,not_in_past}}, decode(Encoded, [nbf], nil)). + ?assertEqual({error, <<"nbf not in past">>}, decode(Encoded, [nbf], nil)). missing_exp_test() -> @@ -383,7 +383,7 @@ missing_exp_test() -> invalid_exp_test() -> Encoded = encode(valid_header(), {[{<<"exp">>, 0}]}), - ?assertEqual({error, {exp,not_in_future}}, decode(Encoded, [exp], nil)). + ?assertEqual({error, <<"exp not in future">>}, decode(Encoded, [exp], nil)). missing_kid_test() -> -- cgit v1.2.1 From 768732af8209405738da6875c9474c0b0b99345b Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 15 Jun 2017 10:42:02 +0100 Subject: Return error from update_cache --- src/jwks.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/jwks.erl b/src/jwks.erl index d6c44deb4..87fc4abdd 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -27,8 +27,12 @@ get_key(Url, Kty, Kid) -> {ok, Key} -> {ok, Key}; {error, not_found} -> - update_cache(Url), - lookup(Url, Kty, Kid) + case update_cache(Url) of + ok -> + lookup(Url, Kty, Kid); + {error, Reason} -> + {error, Reason} + end end. -- cgit v1.2.1 From a3b6661d50337ad50e065a660006ab7afd0125ea Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 15 Jun 2017 10:43:02 +0100 Subject: move error wrapping to decode function --- src/jwtf.erl | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 28cab6cd3..ed0ce92f6 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -77,7 +77,7 @@ decode(EncodedToken, Checks, KS) -> {ok, decode_json(Payload)} catch throw:Error -> - Error + {error, Error} end. @@ -118,11 +118,11 @@ validate_typ(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw({error, missing_typ}); + throw(missing_typ); {true, <<"JWT">>} -> ok; {true, _} -> - throw({error, invalid_typ}) + throw(invalid_typ) end. @@ -133,13 +133,13 @@ validate_alg(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw({error, missing_alg}); + throw(missing_alg); {true, Alg} -> case lists:member(Alg, ?VALID_ALGS) of true -> ok; false -> - throw({error, invalid_alg}) + throw(invalid_alg) end end. @@ -161,11 +161,11 @@ validate_iss(Props, Checks) -> {undefined, _} -> ok; {_ISS, undefined} -> - throw({error, missing_iss}); + throw(missing_iss); {ISS, ISS} -> ok; {_, _} -> - throw({error, invalid_iss}) + throw(invalid_iss) end. @@ -177,11 +177,11 @@ validate_iat(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw({error, missing_iat}); + throw(missing_iat); {true, IAT} when is_integer(IAT) -> ok; {true, _} -> - throw({error, invalid_iat}) + throw(invalid_iat) end. @@ -193,7 +193,7 @@ validate_nbf(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw({error, missing_nbf}); + throw(missing_nbf); {true, IAT} -> assert_past(<<"nbf">>, IAT) end. @@ -207,7 +207,7 @@ validate_exp(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw({error, missing_exp}); + throw(missing_exp); {true, EXP} -> assert_future(<<"exp">>, EXP) end. @@ -219,7 +219,7 @@ key(Props, Checks, KS) -> KID = prop(<<"kid">>, Props), case {Required, KID} of {true, undefined} -> - throw({error, missing_kid}); + throw(missing_kid); {_, KID} -> KS(Alg, KID) end. @@ -242,7 +242,7 @@ public_key_verify(Algorithm, Message, Signature, PublicKey) -> true -> ok; false -> - throw({error, bad_signature}) + throw(bad_signature) end. @@ -251,21 +251,21 @@ hmac_verify(Algorithm, Message, HMAC, SecretKey) -> HMAC -> ok; _ -> - throw({error, bad_hmac}) + throw(bad_hmac) end. split(EncodedToken) -> case binary:split(EncodedToken, <<$.>>, [global]) of [_, _, _] = Split -> Split; - _ -> throw({error, malformed_token}) + _ -> throw(malformed_token) end. decode_json(Encoded) -> case b64url:decode(Encoded) of {error, Reason} -> - throw({error, Reason}); + throw(Reason); Decoded -> jiffy:decode(Decoded) end. @@ -274,7 +274,7 @@ props({Props}) -> Props; props(_) -> - throw({error, not_object}). + throw(not_object). assert_past(Name, Time) -> @@ -282,7 +282,7 @@ assert_past(Name, Time) -> true -> ok; false -> - throw({error, <>}) + throw(<>) end. assert_future(Name, Time) -> @@ -290,7 +290,7 @@ assert_future(Name, Time) -> true -> ok; false -> - throw({error, <>}) + throw(<>) end. @@ -395,7 +395,7 @@ public_key_not_found_test() -> Encoded = encode( {[{<<"alg">>, <<"RS256">>}, {<<"kid">>, <<"1">>}]}, {[]}), - KS = fun(_, _) -> throw({error, not_found}) end, + KS = fun(_, _) -> throw(not_found) end, Expected = {error, not_found}, ?assertEqual(Expected, decode(Encoded, [], KS)). -- cgit v1.2.1 From f9c1f336974ae2d2b923065f92f35126ecb14313 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 15 Jun 2017 11:05:46 +0100 Subject: throw errors that chttpd:error_info can understand --- src/jwks.erl | 8 +++++-- src/jwtf.erl | 76 ++++++++++++++++++++++++++++++------------------------------ 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/src/jwks.erl b/src/jwks.erl index 87fc4abdd..4022e4184 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -61,8 +61,12 @@ get_keyset(Url) -> case ibrowse:send_req(Url, ReqHeaders, get) of {ok, "200", _RespHeaders, RespBody} -> {ok, parse_keyset(RespBody)}; - _Else -> - {error, get_keyset_failed} + {ok, Code, _RespHeaders, _RespBody} -> + couch_log:warning("get_keyset failed with code ~p", [Code]), + {error, {service_unavailable, <<"JWKS service unavailable">>}}; + {error, Reason} -> + couch_log:warning("get_keyset failed with reason ~p", [Reason]), + {error, {service_unavailable, <<"JWKS service unavailable">>}} end. diff --git a/src/jwtf.erl b/src/jwtf.erl index ed0ce92f6..bfecaccf4 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -44,7 +44,7 @@ encode(Header = {HeaderProps}, Claims, Key) -> try Alg = case prop(<<"alg">>, HeaderProps) of undefined -> - throw(missing_alg); + throw({bad_request, <<"Missing alg header parameter">>}); Val -> Val end, @@ -90,7 +90,7 @@ verification_algorithm(Alg) -> {Alg, Val} -> Val; false -> - throw(invalid_alg) + throw({bad_request, <<"Invalid alg header parameter">>}) end. @@ -118,11 +118,11 @@ validate_typ(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw(missing_typ); + throw({bad_request, <<"Missing typ header parameter">>}); {true, <<"JWT">>} -> ok; {true, _} -> - throw(invalid_typ) + throw({bad_request, <<"Invalid typ header parameter">>}) end. @@ -133,13 +133,13 @@ validate_alg(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw(missing_alg); + throw({bad_request, <<"Missing alg header parameter">>}); {true, Alg} -> case lists:member(Alg, ?VALID_ALGS) of true -> ok; false -> - throw(invalid_alg) + throw({bad_request, <<"Invalid alg header parameter">>}) end end. @@ -161,11 +161,11 @@ validate_iss(Props, Checks) -> {undefined, _} -> ok; {_ISS, undefined} -> - throw(missing_iss); + throw({bad_request, <<"Missing iss claim">>}); {ISS, ISS} -> ok; {_, _} -> - throw(invalid_iss) + throw({bad_request, <<"Invalid iss claim">>}) end. @@ -177,11 +177,11 @@ validate_iat(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw(missing_iat); + throw({bad_request, <<"Missing iat claim">>}); {true, IAT} when is_integer(IAT) -> ok; {true, _} -> - throw(invalid_iat) + throw({bad_request, <<"Invalid iat claim">>}) end. @@ -193,7 +193,7 @@ validate_nbf(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw(missing_nbf); + throw({bad_request, <<"Missing nbf claim">>}); {true, IAT} -> assert_past(<<"nbf">>, IAT) end. @@ -207,7 +207,7 @@ validate_exp(Props, Checks) -> {undefined, _} -> ok; {true, undefined} -> - throw(missing_exp); + throw({bad_request, <<"Missing exp claim">>}); {true, EXP} -> assert_future(<<"exp">>, EXP) end. @@ -219,7 +219,7 @@ key(Props, Checks, KS) -> KID = prop(<<"kid">>, Props), case {Required, KID} of {true, undefined} -> - throw(missing_kid); + throw({bad_request, <<"Missing kid claim">>}); {_, KID} -> KS(Alg, KID) end. @@ -242,7 +242,7 @@ public_key_verify(Algorithm, Message, Signature, PublicKey) -> true -> ok; false -> - throw(bad_signature) + throw({bad_request, <<"Bad signature">>}) end. @@ -251,21 +251,21 @@ hmac_verify(Algorithm, Message, HMAC, SecretKey) -> HMAC -> ok; _ -> - throw(bad_hmac) + throw({bad_request, <<"Bad HMAC">>}) end. split(EncodedToken) -> case binary:split(EncodedToken, <<$.>>, [global]) of [_, _, _] = Split -> Split; - _ -> throw(malformed_token) + _ -> throw({bad_request, <<"Malformed token">>}) end. decode_json(Encoded) -> case b64url:decode(Encoded) of {error, Reason} -> - throw(Reason); + throw({bad_request, Reason}); Decoded -> jiffy:decode(Decoded) end. @@ -274,7 +274,7 @@ props({Props}) -> Props; props(_) -> - throw(not_object). + throw({bad_request, <<"Not an object">>}). assert_past(Name, Time) -> @@ -282,7 +282,7 @@ assert_past(Name, Time) -> true -> ok; false -> - throw(<>) + throw({unauthorized, <>}) end. assert_future(Name, Time) -> @@ -290,7 +290,7 @@ assert_future(Name, Time) -> true -> ok; false -> - throw(<>) + throw({unauthorized, <>}) end. @@ -328,67 +328,67 @@ jwt_io_pubkey() -> missing_typ_test() -> Encoded = encode({[]}, []), - ?assertEqual({error, missing_typ}, decode(Encoded, [typ], nil)). + ?assertEqual({error, {bad_request,<<"Missing typ header parameter">>}}, decode(Encoded, [typ], nil)). invalid_typ_test() -> Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), - ?assertEqual({error, invalid_typ}, decode(Encoded, [typ], nil)). + ?assertEqual({error, {bad_request,<<"Invalid typ header parameter">>}}, decode(Encoded, [typ], nil)). missing_alg_test() -> Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), - ?assertEqual({error, missing_alg}, decode(Encoded, [alg], nil)). + ?assertEqual({error, {bad_request,<<"Missing alg header parameter">>}}, decode(Encoded, [alg], nil)). invalid_alg_test() -> Encoded = encode({[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"NOPE">>}]}, []), - ?assertEqual({error, invalid_alg}, decode(Encoded, [alg], nil)). + ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, decode(Encoded, [alg], nil)). missing_iss_test() -> Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, missing_iss}, decode(Encoded, [{iss, right}], nil)). + ?assertEqual({error, {bad_request,<<"Missing iss claim">>}}, decode(Encoded, [{iss, right}], nil)). invalid_iss_test() -> Encoded = encode(valid_header(), {[{<<"iss">>, <<"wrong">>}]}), - ?assertEqual({error, invalid_iss}, decode(Encoded, [{iss, right}], nil)). + ?assertEqual({error, {bad_request,<<"Invalid iss claim">>}}, decode(Encoded, [{iss, right}], nil)). missing_iat_test() -> Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, missing_iat}, decode(Encoded, [iat], nil)). + ?assertEqual({error, {bad_request,<<"Missing iat claim">>}}, decode(Encoded, [iat], nil)). invalid_iat_test() -> Encoded = encode(valid_header(), {[{<<"iat">>, <<"hello">>}]}), - ?assertEqual({error, invalid_iat}, decode(Encoded, [iat], nil)). + ?assertEqual({error, {bad_request,<<"Invalid iat claim">>}}, decode(Encoded, [iat], nil)). missing_nbf_test() -> Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, missing_nbf}, decode(Encoded, [nbf], nil)). + ?assertEqual({error, {bad_request,<<"Missing nbf claim">>}}, decode(Encoded, [nbf], nil)). invalid_nbf_test() -> Encoded = encode(valid_header(), {[{<<"nbf">>, 32503680000}]}), - ?assertEqual({error, <<"nbf not in past">>}, decode(Encoded, [nbf], nil)). + ?assertEqual({error, {unauthorized, <<"nbf not in past">>}}, decode(Encoded, [nbf], nil)). missing_exp_test() -> Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, missing_exp}, decode(Encoded, [exp], nil)). + ?assertEqual({error, {bad_request, <<"Missing exp claim">>}}, decode(Encoded, [exp], nil)). invalid_exp_test() -> Encoded = encode(valid_header(), {[{<<"exp">>, 0}]}), - ?assertEqual({error, <<"exp not in future">>}, decode(Encoded, [exp], nil)). + ?assertEqual({error, {unauthorized, <<"exp not in future">>}}, decode(Encoded, [exp], nil)). missing_kid_test() -> Encoded = encode({[]}, {[]}), - ?assertEqual({error, missing_kid}, decode(Encoded, [kid], nil)). + ?assertEqual({error, {bad_request, <<"Missing kid claim">>}}, decode(Encoded, [kid], nil)). public_key_not_found_test() -> @@ -405,7 +405,7 @@ bad_rs256_sig_test() -> {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, {[]}), KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, - ?assertEqual({error, bad_signature}, decode(Encoded, [], KS)). + ?assertEqual({error, {bad_request, <<"Bad signature">>}}, decode(Encoded, [], KS)). bad_hs256_sig_test() -> @@ -413,11 +413,11 @@ bad_hs256_sig_test() -> {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"HS256">>}]}, {[]}), KS = fun(<<"HS256">>, undefined) -> <<"bad">> end, - ?assertEqual({error, bad_hmac}, decode(Encoded, [], KS)). + ?assertEqual({error, {bad_request, <<"Bad HMAC">>}}, decode(Encoded, [], KS)). malformed_token_test() -> - ?assertEqual({error, malformed_token}, decode(<<"a.b.c.d">>, [], nil)). + ?assertEqual({error, {bad_request, <<"Malformed token">>}}, decode(<<"a.b.c.d">>, [], nil)). %% jwt.io generated @@ -475,12 +475,12 @@ rs256_test() -> encode_missing_alg_test() -> - ?assertEqual({error, missing_alg}, + ?assertEqual({error, {bad_request, <<"Missing alg header parameter">>}}, encode({[]}, {[]}, <<"foo">>)). encode_invalid_alg_test() -> - ?assertEqual({error, invalid_alg}, + ?assertEqual({error, {bad_request, <<"Invalid alg header parameter">>}}, encode({[{<<"alg">>, <<"BOGUS">>}]}, {[]}, <<"foo">>)). -- cgit v1.2.1 From 8100be3d61ebf028d89a063c92de9a19816c64f9 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 15 Jun 2017 19:17:54 +0100 Subject: remove dependency on openssl commands --- src/jwtf.erl | 33 +++++++++++++++++++- src/jwtf_test_util.erl | 82 -------------------------------------------------- 2 files changed, 32 insertions(+), 83 deletions(-) delete mode 100644 src/jwtf_test_util.erl diff --git a/src/jwtf.erl b/src/jwtf.erl index bfecaccf4..809f3f391 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -305,6 +305,7 @@ prop(Prop, Props) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("public_key/include/public_key.hrl"). encode(Header0, Payload0) -> Header1 = b64url:encode(jiffy:encode(Header0)), @@ -491,7 +492,7 @@ encode_decode_test_() -> encode_decode(Alg) -> {EncodeKey, DecodeKey} = case verification_algorithm(Alg) of {public_key, Algorithm} -> - jwtf_test_util:create_keypair(); + create_keypair(); {hmac, Algorithm} -> Key = <<"a-super-secret-key">>, {Key, Key} @@ -518,4 +519,34 @@ claims() -> {<<"exp">>, EpochSeconds + 3600} ]}. +create_keypair() -> + %% https://tools.ietf.org/html/rfc7517#appendix-C + N = decode(<<"t6Q8PWSi1dkJj9hTP8hNYFlvadM7DflW9mWepOJhJ66w7nyoK1gPNqFMSQRy" + "O125Gp-TEkodhWr0iujjHVx7BcV0llS4w5ACGgPrcAd6ZcSR0-Iqom-QFcNP" + "8Sjg086MwoqQU_LYywlAGZ21WSdS_PERyGFiNnj3QQlO8Yns5jCtLCRwLHL0" + "Pb1fEv45AuRIuUfVcPySBWYnDyGxvjYGDSM-AqWS9zIQ2ZilgT-GqUmipg0X" + "OC0Cc20rgLe2ymLHjpHciCKVAbY5-L32-lSeZO-Os6U15_aXrk9Gw8cPUaX1" + "_I8sLGuSiVdt3C_Fn2PZ3Z8i744FPFGGcG1qs2Wz-Q">>), + E = decode(<<"AQAB">>), + D = decode(<<"GRtbIQmhOZtyszfgKdg4u_N-R_mZGU_9k7JQ_jn1DnfTuMdSNprTeaSTyWfS" + "NkuaAwnOEbIQVy1IQbWVV25NY3ybc_IhUJtfri7bAXYEReWaCl3hdlPKXy9U" + "vqPYGR0kIXTQRqns-dVJ7jahlI7LyckrpTmrM8dWBo4_PMaenNnPiQgO0xnu" + "ToxutRZJfJvG4Ox4ka3GORQd9CsCZ2vsUDmsXOfUENOyMqADC6p1M3h33tsu" + "rY15k9qMSpG9OX_IJAXmxzAh_tWiZOwk2K4yxH9tS3Lq1yX8C1EWmeRDkK2a" + "hecG85-oLKQt5VEpWHKmjOi_gJSdSgqcN96X52esAQ">>), + RSAPrivateKey = #'RSAPrivateKey'{ + modulus = N, + publicExponent = E, + privateExponent = D + }, + RSAPublicKey = #'RSAPublicKey'{ + modulus = N, + publicExponent = E + }, + {RSAPrivateKey, RSAPublicKey}. + + +decode(Goop) -> + crypto:bytes_to_integer(b64url:decode(Goop)). + -endif. diff --git a/src/jwtf_test_util.erl b/src/jwtf_test_util.erl deleted file mode 100644 index c32ea1cb9..000000000 --- a/src/jwtf_test_util.erl +++ /dev/null @@ -1,82 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(jwtf_test_util). - --export([ - create_private_key/0, - create_keypair/0, - to_public_key/1 -]). - --include_lib("public_key/include/public_key.hrl"). - --spec create_private_key() -> - #'RSAPrivateKey'{} | no_return(). -create_private_key() -> - create_private_key("/tmp"). - - --spec create_keypair() -> - {#'RSAPrivateKey'{}, #'RSAPublicKey'{}} | no_return(). -create_keypair() -> - PrivateKey = create_private_key(), - {PrivateKey, to_public_key(PrivateKey)}. - - --spec to_public_key(#'RSAPrivateKey'{}) -> - #'RSAPublicKey'{}. -to_public_key(#'RSAPrivateKey'{} = PrivateKey) -> - #'RSAPublicKey'{ - modulus = PrivateKey#'RSAPrivateKey'.modulus, - publicExponent = PrivateKey#'RSAPrivateKey'.publicExponent}. - - -create_private_key(TmpDir) -> - ok = verify_openssl(), - Path = filename:join(TmpDir, timestamp() ++ "-rsa.key.der"), - Bin = create_rsa_key(Path), - public_key:der_decode('RSAPrivateKey', Bin). - - -verify_openssl() -> - case os:cmd("openssl version") of - "OpenSSL 1." ++ _Rest -> - ok; - _ -> - throw({error, openssl_required}) - end. - - -timestamp() -> - lists:concat([integer_to_list(N) || N <- tuple_to_list(os:timestamp())]). - - -create_rsa_key(Path) -> - Cmd = "openssl genpkey -algorithm RSA -outform DER -out " ++ Path, - Out = os:cmd(Cmd), - %% Since os:cmd doesn't indicate if the command fails, we go to - %% some length to ensure the output looks correct. - ok = validate_genpkey_output(Out), - {ok, Bin} = file:read_file(Path), - ok = file:delete(Path), - Bin. - - -validate_genpkey_output(Out) when is_list(Out) -> - Length = length(Out), - case re:run(Out, "[.+\n]+") of % should only contain period, plus, or nl - {match, [{0, Length}]} -> - ok; - _ -> - throw({error, {openssl_genpkey_failed, Out}}) - end. -- cgit v1.2.1 From c6e58c4edf2747379f8c7627a7c30b26ed5493d4 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 16 Jun 2017 11:49:09 +0100 Subject: get_keyset needs ssl started --- src/jwks.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jwks.erl b/src/jwks.erl index 4022e4184..458a4cf3e 100644 --- a/src/jwks.erl +++ b/src/jwks.erl @@ -117,6 +117,7 @@ decode_number(Base64) -> jwks_test() -> application:ensure_all_started(ibrowse), + application:ensure_all_started(ssl), ?assertMatch({ok, _}, get_keyset("https://iam.eu-gb.bluemix.net/oidc/keys")). rs_test() -> -- cgit v1.2.1 From 53c254f826d10c267f5c91cd519b4fdd3807b129 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 11 May 2017 16:53:47 -0700 Subject: Remove unnecessary props --- src/jwtf.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 809f3f391..dcf83fb94 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -338,12 +338,12 @@ invalid_typ_test() -> missing_alg_test() -> - Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), + Encoded = encode({[]}, []), ?assertEqual({error, {bad_request,<<"Missing alg header parameter">>}}, decode(Encoded, [alg], nil)). invalid_alg_test() -> - Encoded = encode({[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"NOPE">>}]}, []), + Encoded = encode({[{<<"alg">>, <<"NOPE">>}]}, []), ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, decode(Encoded, [alg], nil)). -- cgit v1.2.1 From a01cb0ff314dc62598190bacf315443a85e76510 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 11 May 2017 16:54:49 -0700 Subject: Make time explicitly in future --- src/jwtf.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index dcf83fb94..1f7a64266 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -373,7 +373,7 @@ missing_nbf_test() -> invalid_nbf_test() -> - Encoded = encode(valid_header(), {[{<<"nbf">>, 32503680000}]}), + Encoded = encode(valid_header(), {[{<<"nbf">>, 2 * now_seconds()}]}), ?assertEqual({error, {unauthorized, <<"nbf not in past">>}}, decode(Encoded, [nbf], nil)). -- cgit v1.2.1 From bb1744ea78b36059f9291921a77490774b2fdd55 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 20 Jun 2017 17:28:11 -0700 Subject: Suppress compiler warnings --- src/jwtf.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index 1f7a64266..3bf8be616 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -491,9 +491,9 @@ encode_decode_test_() -> encode_decode(Alg) -> {EncodeKey, DecodeKey} = case verification_algorithm(Alg) of - {public_key, Algorithm} -> + {public_key, _Algorithm} -> create_keypair(); - {hmac, Algorithm} -> + {hmac, _Algorithm} -> Key = <<"a-super-secret-key">>, {Key, Key} end, -- cgit v1.2.1 From 3d6c294eec8363575ac82c256a9a6b82d31d1673 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Mon, 7 Aug 2017 14:49:57 -0700 Subject: Move key cache to epep application --- src/jwks.erl | 162 ------------------------------------------------------- src/jwtf.app.src | 3 -- src/jwtf_app.erl | 26 --------- src/jwtf_sup.erl | 60 --------------------- 4 files changed, 251 deletions(-) delete mode 100644 src/jwks.erl delete mode 100644 src/jwtf_app.erl delete mode 100644 src/jwtf_sup.erl diff --git a/src/jwks.erl b/src/jwks.erl deleted file mode 100644 index 458a4cf3e..000000000 --- a/src/jwks.erl +++ /dev/null @@ -1,162 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - -% @doc -% This module fetches and parses JSON Web Key Sets (JWKS). - --module(jwks). - --export([ - get_key/3, - get_keyset/1 -]). - --include_lib("public_key/include/public_key.hrl"). - -get_key(Url, Kty, Kid) -> - case lookup(Url, Kty, Kid) of - {ok, Key} -> - {ok, Key}; - {error, not_found} -> - case update_cache(Url) of - ok -> - lookup(Url, Kty, Kid); - {error, Reason} -> - {error, Reason} - end - end. - - -lookup(Url, Kty, Kid) -> - case ets_lru:lookup_d(jwks_cache_lru, {Url, Kty, Kid}) of - {ok, Key} -> - {ok, Key}; - not_found -> - {error, not_found} - end. - - -update_cache(Url) -> - case get_keyset(Url) of - {ok, KeySet} -> - [ets_lru:insert(jwks_cache_lru, {Url, Kty, Kid}, Key) - || {{Kty, Kid}, Key} <- KeySet], - ok; - {error, Reason} -> - {error, Reason} - end. - - -get_keyset(Url) -> - ReqHeaders = [], - case ibrowse:send_req(Url, ReqHeaders, get) of - {ok, "200", _RespHeaders, RespBody} -> - {ok, parse_keyset(RespBody)}; - {ok, Code, _RespHeaders, _RespBody} -> - couch_log:warning("get_keyset failed with code ~p", [Code]), - {error, {service_unavailable, <<"JWKS service unavailable">>}}; - {error, Reason} -> - couch_log:warning("get_keyset failed with reason ~p", [Reason]), - {error, {service_unavailable, <<"JWKS service unavailable">>}} - end. - - -parse_keyset(Body) -> - {Props} = jiffy:decode(Body), - Keys = proplists:get_value(<<"keys">>, Props), - lists:flatmap(fun parse_key/1, Keys). - - -parse_key({Props}) -> - Alg = proplists:get_value(<<"alg">>, Props), - Kty = proplists:get_value(<<"kty">>, Props), - Kid = proplists:get_value(<<"kid">>, Props), - case {Alg, Kty} of - {<<"RS256">>, <<"RSA">>} -> - E = proplists:get_value(<<"e">>, Props), - N = proplists:get_value(<<"n">>, Props), - [{{Kty, Kid}, #'RSAPublicKey'{ - modulus = decode_number(N), - publicExponent = decode_number(E)}}]; - {<<"ES256">>, <<"EC">>} -> - Crv = proplists:get_value(<<"crv">>, Props), - case Crv of - <<"P-256">> -> - X = proplists:get_value(<<"x">>, Props), - Y = proplists:get_value(<<"y">>, Props), - Point = <<4:8, - (b64url:decode(X))/binary, - (b64url:decode(Y))/binary>>, - [{{Kty, Kid}, { - #'ECPoint'{point = Point}, - {namedCurve,{1,2,840,10045,3,1,7}} - }}]; - _ -> - [] - end; - _ -> - [] - end. - - -decode_number(Base64) -> - crypto:bytes_to_integer(b64url:decode(Base64)). - - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -jwks_test() -> - application:ensure_all_started(ibrowse), - application:ensure_all_started(ssl), - ?assertMatch({ok, _}, get_keyset("https://iam.eu-gb.bluemix.net/oidc/keys")). - -rs_test() -> - Ejson = {[ - {<<"kty">>, <<"RSA">>}, - {<<"n">>, <<"0vx7agoebGcQSuuPiLJXZptN9nndrQmbXEps2aiAFbWhM78LhWx" - "4cbbfAAtVT86zwu1RK7aPFFxuhDR1L6tSoc_BJECPebWKRXjBZCiFV4n3oknjhMs" - "tn64tZ_2W-5JsGY4Hc5n9yBXArwl93lqt7_RN5w6Cf0h4QyQ5v-65YGjQR0_FDW2" - "QvzqY368QQMicAtaSqzs8KJZgnYb9c7d0zgdAZHzu6qMQvRL5hajrn1n91CbOpbI" - "SD08qNLyrdkt-bFTWhAI4vMQFh6WeZu0fM4lFd2NcRwr3XPksINHaQ-G_xBniIqb" - "w0Ls1jF44-csFCur-kEgU8awapJzKnqDKgw">>}, - {<<"e">>, <<"AQAB">>}, - {<<"alg">>, <<"RS256">>}, - {<<"kid">>, <<"2011-04-29">>} - ]}, - ?assertMatch([{{<<"RSA">>, <<"2011-04-29">>}, {'RSAPublicKey', _, 65537}}], - parse_key(Ejson)). - - -ec_test() -> - PrivateKey = #'ECPrivateKey'{ - version = 1, - parameters = {namedCurve,{1,2,840,10045,3,1,7}}, - privateKey = b64url:decode("870MB6gfuTJ4HtUnUvYMyJpr5eUZNP4Bk43bVdj3eAE"), - publicKey = <<4:8, - (b64url:decode("MKBCTNIcKUSDii11ySs3526iDZ8AiTo7Tu6KPAqv7D4"))/binary, - (b64url:decode("4Etl6SRW2YiLUrN5vfvVHuhp7x8PxltmWWlbbM4IFyM"))/binary>>}, - Ejson = {[ - {<<"kty">>, <<"EC">>}, - {<<"crv">>, <<"P-256">>}, - {<<"x">>, <<"MKBCTNIcKUSDii11ySs3526iDZ8AiTo7Tu6KPAqv7D4">>}, - {<<"y">>, <<"4Etl6SRW2YiLUrN5vfvVHuhp7x8PxltmWWlbbM4IFyM">>}, - {<<"alg">>, <<"ES256">>}, - {<<"kid">>, <<"1">>} - ]}, - ?assertMatch([{_Key, _Value}], parse_key(Ejson)), - [{_, ECPublicKey}] = parse_key(Ejson), - Msg = <<"foo">>, - Sig = public_key:sign(Msg, sha256, PrivateKey), - ?assert(public_key:verify(Msg, sha256, Sig, ECPublicKey)). - --endif. diff --git a/src/jwtf.app.src b/src/jwtf.app.src index 2ff221309..304bb9e0a 100644 --- a/src/jwtf.app.src +++ b/src/jwtf.app.src @@ -14,14 +14,11 @@ {description, "JSON Web Token Functions"}, {vsn, git}, {registered, []}, - {mod, { jwtf_app, []}}, {applications, [ kernel, stdlib, b64url, crypto, - ets_lru, - ibrowse, jiffy, public_key ]}, diff --git a/src/jwtf_app.erl b/src/jwtf_app.erl deleted file mode 100644 index 92a26d558..000000000 --- a/src/jwtf_app.erl +++ /dev/null @@ -1,26 +0,0 @@ -%%%------------------------------------------------------------------- -%% @doc jwtf public API -%% @end -%%%------------------------------------------------------------------- - --module(jwtf_app). - --behaviour(application). - -%% Application callbacks --export([start/2, stop/1]). - -%%==================================================================== -%% API -%%==================================================================== - -start(_StartType, _StartArgs) -> - jwtf_sup:start_link(). - -%%-------------------------------------------------------------------- -stop(_State) -> - ok. - -%%==================================================================== -%% Internal functions -%%==================================================================== diff --git a/src/jwtf_sup.erl b/src/jwtf_sup.erl deleted file mode 100644 index 7cf56e84f..000000000 --- a/src/jwtf_sup.erl +++ /dev/null @@ -1,60 +0,0 @@ -%%%------------------------------------------------------------------- -%% @doc epep top level supervisor. -%% @end -%%%------------------------------------------------------------------- - --module(jwtf_sup). - --behaviour(supervisor). - -%% API --export([start_link/0]). - -%% Supervisor callbacks --export([init/1]). - --define(SERVER, ?MODULE). - -%%==================================================================== -%% API functions -%%==================================================================== - -start_link() -> - supervisor:start_link({local, ?SERVER}, ?MODULE, []). - -%%==================================================================== -%% Supervisor callbacks -%%==================================================================== - -%% Child :: {Id,StartFunc,Restart,Shutdown,Type,Modules} -init([]) -> - Children = [ - {jwks_cache_lru, - {ets_lru, start_link, [jwks_cache_lru, lru_opts()]}, - permanent, 5000, worker, [ets_lru]} - ], - {ok, { {one_for_all, 5, 10}, Children} }. - -%%==================================================================== -%% Internal functions -%%==================================================================== - -lru_opts() -> - case config:get_integer("jwtf_cache", "max_objects", 50) of - MxObjs when MxObjs > 0 -> - [{max_objects, MxObjs}]; - _ -> - [] - end ++ - case config:get_integer("jwtf_cache", "max_size", 0) of - MxSize when MxSize > 0 -> - [{max_size, MxSize}]; - _ -> - [] - end ++ - case config:get_integer("jwtf_cache", "max_lifetime", 0) of - MxLT when MxLT > 0 -> - [{max_lifetime, MxLT}]; - _ -> - [] - end. -- cgit v1.2.1 From 8e937f2d5b67ad83fc1e8e5e7317c4ba53b43f36 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Fri, 11 Aug 2017 16:10:21 -0700 Subject: Separate tests into dedicated module Currently jwtf tests don't run in a continuous integration environment, presumably due to dependency rules. This splits the tests into their own module, but requires exposing a couple new functions in jwtf to support them. Some long lines were also broken into smaller lengths. --- src/jwtf.erl | 264 ++---------------------------------------------- test/jwtf_tests.erl | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 292 insertions(+), 253 deletions(-) create mode 100644 test/jwtf_tests.erl diff --git a/src/jwtf.erl b/src/jwtf.erl index 3bf8be616..c6cc78433 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -19,7 +19,9 @@ -export([ encode/3, - decode/3 + decode/3, + valid_algorithms/0, + verification_algorithm/1 ]). -define(ALGS, [ @@ -33,8 +35,6 @@ {<<"HS384">>, {hmac, sha384}}, {<<"HS512">>, {hmac, sha512}}]). --define(VALID_ALGS, proplists:get_keys(?ALGS)). - % @doc encode % Encode the JSON Header and Claims using Key and Alg obtained from Header @@ -81,6 +81,13 @@ decode(EncodedToken, Checks, KS) -> end. +% @doc valid_algorithms +% Return a list of supported algorithms +-spec valid_algorithms() -> [binary()]. +valid_algorithms() -> + proplists:get_keys(?ALGS). + + % @doc verification_algorithm % Return {VerificationMethod, Algorithm} tuple for the specified Alg -spec verification_algorithm(binary()) -> @@ -135,7 +142,7 @@ validate_alg(Props, Checks) -> {true, undefined} -> throw({bad_request, <<"Missing alg header parameter">>}); {true, Alg} -> - case lists:member(Alg, ?VALID_ALGS) of + case lists:member(Alg, valid_algorithms()) of true -> ok; false -> @@ -301,252 +308,3 @@ now_seconds() -> prop(Prop, Props) -> proplists:get_value(Prop, Props). - - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). --include_lib("public_key/include/public_key.hrl"). - -encode(Header0, Payload0) -> - Header1 = b64url:encode(jiffy:encode(Header0)), - Payload1 = b64url:encode(jiffy:encode(Payload0)), - Sig = b64url:encode(<<"bad">>), - <>. - -valid_header() -> - {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}. - -jwt_io_pubkey() -> - PublicKeyPEM = <<"-----BEGIN PUBLIC KEY-----\n" - "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdlatRjRjogo3WojgGH" - "FHYLugdUWAY9iR3fy4arWNA1KoS8kVw33cJibXr8bvwUAUparCwlvdbH6" - "dvEOfou0/gCFQsHUfQrSDv+MuSUMAe8jzKE4qW+jK+xQU9a03GUnKHkkl" - "e+Q0pX/g6jXZ7r1/xAK5Do2kQ+X5xK9cipRgEKwIDAQAB\n" - "-----END PUBLIC KEY-----\n">>, - [PEMEntry] = public_key:pem_decode(PublicKeyPEM), - public_key:pem_entry_decode(PEMEntry). - - -missing_typ_test() -> - Encoded = encode({[]}, []), - ?assertEqual({error, {bad_request,<<"Missing typ header parameter">>}}, decode(Encoded, [typ], nil)). - - -invalid_typ_test() -> - Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), - ?assertEqual({error, {bad_request,<<"Invalid typ header parameter">>}}, decode(Encoded, [typ], nil)). - - -missing_alg_test() -> - Encoded = encode({[]}, []), - ?assertEqual({error, {bad_request,<<"Missing alg header parameter">>}}, decode(Encoded, [alg], nil)). - - -invalid_alg_test() -> - Encoded = encode({[{<<"alg">>, <<"NOPE">>}]}, []), - ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, decode(Encoded, [alg], nil)). - - -missing_iss_test() -> - Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, {bad_request,<<"Missing iss claim">>}}, decode(Encoded, [{iss, right}], nil)). - - -invalid_iss_test() -> - Encoded = encode(valid_header(), {[{<<"iss">>, <<"wrong">>}]}), - ?assertEqual({error, {bad_request,<<"Invalid iss claim">>}}, decode(Encoded, [{iss, right}], nil)). - - -missing_iat_test() -> - Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, {bad_request,<<"Missing iat claim">>}}, decode(Encoded, [iat], nil)). - - -invalid_iat_test() -> - Encoded = encode(valid_header(), {[{<<"iat">>, <<"hello">>}]}), - ?assertEqual({error, {bad_request,<<"Invalid iat claim">>}}, decode(Encoded, [iat], nil)). - - -missing_nbf_test() -> - Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, {bad_request,<<"Missing nbf claim">>}}, decode(Encoded, [nbf], nil)). - - -invalid_nbf_test() -> - Encoded = encode(valid_header(), {[{<<"nbf">>, 2 * now_seconds()}]}), - ?assertEqual({error, {unauthorized, <<"nbf not in past">>}}, decode(Encoded, [nbf], nil)). - - -missing_exp_test() -> - Encoded = encode(valid_header(), {[]}), - ?assertEqual({error, {bad_request, <<"Missing exp claim">>}}, decode(Encoded, [exp], nil)). - - -invalid_exp_test() -> - Encoded = encode(valid_header(), {[{<<"exp">>, 0}]}), - ?assertEqual({error, {unauthorized, <<"exp not in future">>}}, decode(Encoded, [exp], nil)). - - -missing_kid_test() -> - Encoded = encode({[]}, {[]}), - ?assertEqual({error, {bad_request, <<"Missing kid claim">>}}, decode(Encoded, [kid], nil)). - - -public_key_not_found_test() -> - Encoded = encode( - {[{<<"alg">>, <<"RS256">>}, {<<"kid">>, <<"1">>}]}, - {[]}), - KS = fun(_, _) -> throw(not_found) end, - Expected = {error, not_found}, - ?assertEqual(Expected, decode(Encoded, [], KS)). - - -bad_rs256_sig_test() -> - Encoded = encode( - {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, - {[]}), - KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, - ?assertEqual({error, {bad_request, <<"Bad signature">>}}, decode(Encoded, [], KS)). - - -bad_hs256_sig_test() -> - Encoded = encode( - {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"HS256">>}]}, - {[]}), - KS = fun(<<"HS256">>, undefined) -> <<"bad">> end, - ?assertEqual({error, {bad_request, <<"Bad HMAC">>}}, decode(Encoded, [], KS)). - - -malformed_token_test() -> - ?assertEqual({error, {bad_request, <<"Malformed token">>}}, decode(<<"a.b.c.d">>, [], nil)). - - -%% jwt.io generated -hs256_test() -> - EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IjEyMzQ1Ni" - "J9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI" - "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" - "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, - KS = fun(<<"HS256">>, <<"123456">>) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], - ?assertMatch({ok, _}, catch decode(EncodedToken, Checks, KS)). - - -%% pip install PyJWT -%% > import jwt -%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS384') -hs384_test() -> - EncodedToken = <<"eyJhbGciOiJIUzM4NCIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYXIif" - "Q.2quwghs6I56GM3j7ZQbn-ASZ53xdBqzPzTDHm_CtVec32LUy-Ezy" - "L3JjIe7WjL93">>, - KS = fun(<<"HS384">>, _) -> <<"secret">> end, - ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, catch decode(EncodedToken, [], KS)). - - -%% pip install PyJWT -%% > import jwt -%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS512') -hs512_test() -> - EncodedToken = <<"eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYX" - "IifQ.WePl7achkd0oGNB8XRF_LJwxlyiPZqpdNgdKpDboAjSTsW" - "q-aOGNynTp8TOv8KjonFym8vwFwppXOLoLXbkIaQ">>, - KS = fun(<<"HS512">>, _) -> <<"secret">> end, - ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, catch decode(EncodedToken, [], KS)). - - -%% jwt.io generated -rs256_test() -> - EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0N" - "TY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.Ek" - "N-DOsnsuRjRO6BxXemmJDm3HbxrbRzXglbN2S4sOkopdU4IsDxTI8j" - "O19W_A4K8ZPJijNLis4EZsHeY559a4DFOd50_OqgHGuERTqYZyuhtF" - "39yxJPAjUESwxk2J5k_4zM3O-vtd1Ghyo4IbqKKSy6J9mTniYJPenn" - "5-HIirE">>, - - Checks = [sig, alg], - KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, - - ExpectedPayload = {[ - {<<"sub">>, <<"1234567890">>}, - {<<"name">>, <<"John Doe">>}, - {<<"admin">>, true} - ]}, - - ?assertMatch({ok, ExpectedPayload}, decode(EncodedToken, Checks, KS)). - - -encode_missing_alg_test() -> - ?assertEqual({error, {bad_request, <<"Missing alg header parameter">>}}, - encode({[]}, {[]}, <<"foo">>)). - - -encode_invalid_alg_test() -> - ?assertEqual({error, {bad_request, <<"Invalid alg header parameter">>}}, - encode({[{<<"alg">>, <<"BOGUS">>}]}, {[]}, <<"foo">>)). - - -encode_decode_test_() -> - [{Alg, encode_decode(Alg)} || Alg <- ?VALID_ALGS]. - - -encode_decode(Alg) -> - {EncodeKey, DecodeKey} = case verification_algorithm(Alg) of - {public_key, _Algorithm} -> - create_keypair(); - {hmac, _Algorithm} -> - Key = <<"a-super-secret-key">>, - {Key, Key} - end, - Claims = claims(), - {ok, Encoded} = encode(header(Alg), Claims, EncodeKey), - KS = fun(_, _) -> DecodeKey end, - {ok, Decoded} = decode(Encoded, [], KS), - ?_assertMatch(Claims, Decoded). - - -header(Alg) -> - {[ - {<<"typ">>, <<"JWT">>}, - {<<"alg">>, Alg}, - {<<"kid">>, <<"20170520-00:00:00">>} - ]}. - - -claims() -> - EpochSeconds = 1496205841, - {[ - {<<"iat">>, EpochSeconds}, - {<<"exp">>, EpochSeconds + 3600} - ]}. - -create_keypair() -> - %% https://tools.ietf.org/html/rfc7517#appendix-C - N = decode(<<"t6Q8PWSi1dkJj9hTP8hNYFlvadM7DflW9mWepOJhJ66w7nyoK1gPNqFMSQRy" - "O125Gp-TEkodhWr0iujjHVx7BcV0llS4w5ACGgPrcAd6ZcSR0-Iqom-QFcNP" - "8Sjg086MwoqQU_LYywlAGZ21WSdS_PERyGFiNnj3QQlO8Yns5jCtLCRwLHL0" - "Pb1fEv45AuRIuUfVcPySBWYnDyGxvjYGDSM-AqWS9zIQ2ZilgT-GqUmipg0X" - "OC0Cc20rgLe2ymLHjpHciCKVAbY5-L32-lSeZO-Os6U15_aXrk9Gw8cPUaX1" - "_I8sLGuSiVdt3C_Fn2PZ3Z8i744FPFGGcG1qs2Wz-Q">>), - E = decode(<<"AQAB">>), - D = decode(<<"GRtbIQmhOZtyszfgKdg4u_N-R_mZGU_9k7JQ_jn1DnfTuMdSNprTeaSTyWfS" - "NkuaAwnOEbIQVy1IQbWVV25NY3ybc_IhUJtfri7bAXYEReWaCl3hdlPKXy9U" - "vqPYGR0kIXTQRqns-dVJ7jahlI7LyckrpTmrM8dWBo4_PMaenNnPiQgO0xnu" - "ToxutRZJfJvG4Ox4ka3GORQd9CsCZ2vsUDmsXOfUENOyMqADC6p1M3h33tsu" - "rY15k9qMSpG9OX_IJAXmxzAh_tWiZOwk2K4yxH9tS3Lq1yX8C1EWmeRDkK2a" - "hecG85-oLKQt5VEpWHKmjOi_gJSdSgqcN96X52esAQ">>), - RSAPrivateKey = #'RSAPrivateKey'{ - modulus = N, - publicExponent = E, - privateExponent = D - }, - RSAPublicKey = #'RSAPublicKey'{ - modulus = N, - publicExponent = E - }, - {RSAPrivateKey, RSAPublicKey}. - - -decode(Goop) -> - crypto:bytes_to_integer(b64url:decode(Goop)). - --endif. diff --git a/test/jwtf_tests.erl b/test/jwtf_tests.erl new file mode 100644 index 000000000..527bc327f --- /dev/null +++ b/test/jwtf_tests.erl @@ -0,0 +1,281 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("public_key/include/public_key.hrl"). + +encode(Header0, Payload0) -> + Header1 = b64url:encode(jiffy:encode(Header0)), + Payload1 = b64url:encode(jiffy:encode(Payload0)), + Sig = b64url:encode(<<"bad">>), + <>. + +valid_header() -> + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}. + +jwt_io_pubkey() -> + PublicKeyPEM = <<"-----BEGIN PUBLIC KEY-----\n" + "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdlatRjRjogo3WojgGH" + "FHYLugdUWAY9iR3fy4arWNA1KoS8kVw33cJibXr8bvwUAUparCwlvdbH6" + "dvEOfou0/gCFQsHUfQrSDv+MuSUMAe8jzKE4qW+jK+xQU9a03GUnKHkkl" + "e+Q0pX/g6jXZ7r1/xAK5Do2kQ+X5xK9cipRgEKwIDAQAB\n" + "-----END PUBLIC KEY-----\n">>, + [PEMEntry] = public_key:pem_decode(PublicKeyPEM), + public_key:pem_entry_decode(PEMEntry). + + +missing_typ_test() -> + Encoded = encode({[]}, []), + ?assertEqual({error, {bad_request,<<"Missing typ header parameter">>}}, + jwtf:decode(Encoded, [typ], nil)). + + +invalid_typ_test() -> + Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), + ?assertEqual({error, {bad_request,<<"Invalid typ header parameter">>}}, + jwtf:decode(Encoded, [typ], nil)). + + +missing_alg_test() -> + Encoded = encode({[]}, []), + ?assertEqual({error, {bad_request,<<"Missing alg header parameter">>}}, + jwtf:decode(Encoded, [alg], nil)). + + +invalid_alg_test() -> + Encoded = encode({[{<<"alg">>, <<"NOPE">>}]}, []), + ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, + jwtf:decode(Encoded, [alg], nil)). + + +missing_iss_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request,<<"Missing iss claim">>}}, + jwtf:decode(Encoded, [{iss, right}], nil)). + + +invalid_iss_test() -> + Encoded = encode(valid_header(), {[{<<"iss">>, <<"wrong">>}]}), + ?assertEqual({error, {bad_request,<<"Invalid iss claim">>}}, + jwtf:decode(Encoded, [{iss, right}], nil)). + + +missing_iat_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request,<<"Missing iat claim">>}}, + jwtf:decode(Encoded, [iat], nil)). + + +invalid_iat_test() -> + Encoded = encode(valid_header(), {[{<<"iat">>, <<"hello">>}]}), + ?assertEqual({error, {bad_request,<<"Invalid iat claim">>}}, + jwtf:decode(Encoded, [iat], nil)). + + +missing_nbf_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request,<<"Missing nbf claim">>}}, + jwtf:decode(Encoded, [nbf], nil)). + + +invalid_nbf_test() -> + Encoded = encode(valid_header(), {[{<<"nbf">>, 2 * now_seconds()}]}), + ?assertEqual({error, {unauthorized, <<"nbf not in past">>}}, + jwtf:decode(Encoded, [nbf], nil)). + + +missing_exp_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request, <<"Missing exp claim">>}}, + jwtf:decode(Encoded, [exp], nil)). + + +invalid_exp_test() -> + Encoded = encode(valid_header(), {[{<<"exp">>, 0}]}), + ?assertEqual({error, {unauthorized, <<"exp not in future">>}}, + jwtf:decode(Encoded, [exp], nil)). + + +missing_kid_test() -> + Encoded = encode({[]}, {[]}), + ?assertEqual({error, {bad_request, <<"Missing kid claim">>}}, + jwtf:decode(Encoded, [kid], nil)). + + +public_key_not_found_test() -> + Encoded = encode( + {[{<<"alg">>, <<"RS256">>}, {<<"kid">>, <<"1">>}]}, + {[]}), + KS = fun(_, _) -> throw(not_found) end, + Expected = {error, not_found}, + ?assertEqual(Expected, jwtf:decode(Encoded, [], KS)). + + +bad_rs256_sig_test() -> + Encoded = encode( + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, + {[]}), + KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, + ?assertEqual({error, {bad_request, <<"Bad signature">>}}, + jwtf:decode(Encoded, [], KS)). + + +bad_hs256_sig_test() -> + Encoded = encode( + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"HS256">>}]}, + {[]}), + KS = fun(<<"HS256">>, undefined) -> <<"bad">> end, + ?assertEqual({error, {bad_request, <<"Bad HMAC">>}}, + jwtf:decode(Encoded, [], KS)). + + +malformed_token_test() -> + ?assertEqual({error, {bad_request, <<"Malformed token">>}}, + jwtf:decode(<<"a.b.c.d">>, [], nil)). + + +%% jwt.io generated +hs256_test() -> + EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IjEyMzQ1Ni" + "J9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI" + "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" + "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, + KS = fun(<<"HS256">>, <<"123456">>) -> <<"secret">> end, + Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], + ?assertMatch({ok, _}, catch jwtf:decode(EncodedToken, Checks, KS)). + + +%% pip install PyJWT +%% > import jwt +%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS384') +hs384_test() -> + EncodedToken = <<"eyJhbGciOiJIUzM4NCIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYXIif" + "Q.2quwghs6I56GM3j7ZQbn-ASZ53xdBqzPzTDHm_CtVec32LUy-Ezy" + "L3JjIe7WjL93">>, + KS = fun(<<"HS384">>, _) -> <<"secret">> end, + ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, + catch jwtf:decode(EncodedToken, [], KS)). + + +%% pip install PyJWT +%% > import jwt +%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS512') +hs512_test() -> + EncodedToken = <<"eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYX" + "IifQ.WePl7achkd0oGNB8XRF_LJwxlyiPZqpdNgdKpDboAjSTsW" + "q-aOGNynTp8TOv8KjonFym8vwFwppXOLoLXbkIaQ">>, + KS = fun(<<"HS512">>, _) -> <<"secret">> end, + ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, + catch jwtf:decode(EncodedToken, [], KS)). + + +%% jwt.io generated +rs256_test() -> + EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0N" + "TY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.Ek" + "N-DOsnsuRjRO6BxXemmJDm3HbxrbRzXglbN2S4sOkopdU4IsDxTI8j" + "O19W_A4K8ZPJijNLis4EZsHeY559a4DFOd50_OqgHGuERTqYZyuhtF" + "39yxJPAjUESwxk2J5k_4zM3O-vtd1Ghyo4IbqKKSy6J9mTniYJPenn" + "5-HIirE">>, + + Checks = [sig, alg], + KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, + + ExpectedPayload = {[ + {<<"sub">>, <<"1234567890">>}, + {<<"name">>, <<"John Doe">>}, + {<<"admin">>, true} + ]}, + + ?assertMatch({ok, ExpectedPayload}, jwtf:decode(EncodedToken, Checks, KS)). + + +encode_missing_alg_test() -> + ?assertEqual({error, {bad_request, <<"Missing alg header parameter">>}}, + jwtf:encode({[]}, {[]}, <<"foo">>)). + + +encode_invalid_alg_test() -> + ?assertEqual({error, {bad_request, <<"Invalid alg header parameter">>}}, + jwtf:encode({[{<<"alg">>, <<"BOGUS">>}]}, {[]}, <<"foo">>)). + + +encode_decode_test_() -> + [{Alg, encode_decode(Alg)} || Alg <- jwtf:valid_algorithms()]. + + +encode_decode(Alg) -> + {EncodeKey, DecodeKey} = case jwtf:verification_algorithm(Alg) of + {public_key, _Algorithm} -> + create_keypair(); + {hmac, _Algorithm} -> + Key = <<"a-super-secret-key">>, + {Key, Key} + end, + Claims = claims(), + {ok, Encoded} = jwtf:encode(header(Alg), Claims, EncodeKey), + KS = fun(_, _) -> DecodeKey end, + {ok, Decoded} = jwtf:decode(Encoded, [], KS), + ?_assertMatch(Claims, Decoded). + + +header(Alg) -> + {[ + {<<"typ">>, <<"JWT">>}, + {<<"alg">>, Alg}, + {<<"kid">>, <<"20170520-00:00:00">>} + ]}. + + +claims() -> + EpochSeconds = 1496205841, + {[ + {<<"iat">>, EpochSeconds}, + {<<"exp">>, EpochSeconds + 3600} + ]}. + +create_keypair() -> + %% https://tools.ietf.org/html/rfc7517#appendix-C + N = decode(<<"t6Q8PWSi1dkJj9hTP8hNYFlvadM7DflW9mWepOJhJ66w7nyoK1gPNqFMSQRy" + "O125Gp-TEkodhWr0iujjHVx7BcV0llS4w5ACGgPrcAd6ZcSR0-Iqom-QFcNP" + "8Sjg086MwoqQU_LYywlAGZ21WSdS_PERyGFiNnj3QQlO8Yns5jCtLCRwLHL0" + "Pb1fEv45AuRIuUfVcPySBWYnDyGxvjYGDSM-AqWS9zIQ2ZilgT-GqUmipg0X" + "OC0Cc20rgLe2ymLHjpHciCKVAbY5-L32-lSeZO-Os6U15_aXrk9Gw8cPUaX1" + "_I8sLGuSiVdt3C_Fn2PZ3Z8i744FPFGGcG1qs2Wz-Q">>), + E = decode(<<"AQAB">>), + D = decode(<<"GRtbIQmhOZtyszfgKdg4u_N-R_mZGU_9k7JQ_jn1DnfTuMdSNprTeaSTyWfS" + "NkuaAwnOEbIQVy1IQbWVV25NY3ybc_IhUJtfri7bAXYEReWaCl3hdlPKXy9U" + "vqPYGR0kIXTQRqns-dVJ7jahlI7LyckrpTmrM8dWBo4_PMaenNnPiQgO0xnu" + "ToxutRZJfJvG4Ox4ka3GORQd9CsCZ2vsUDmsXOfUENOyMqADC6p1M3h33tsu" + "rY15k9qMSpG9OX_IJAXmxzAh_tWiZOwk2K4yxH9tS3Lq1yX8C1EWmeRDkK2a" + "hecG85-oLKQt5VEpWHKmjOi_gJSdSgqcN96X52esAQ">>), + RSAPrivateKey = #'RSAPrivateKey'{ + modulus = N, + publicExponent = E, + privateExponent = D + }, + RSAPublicKey = #'RSAPublicKey'{ + modulus = N, + publicExponent = E + }, + {RSAPrivateKey, RSAPublicKey}. + + +decode(Goop) -> + crypto:bytes_to_integer(b64url:decode(Goop)). + + +now_seconds() -> + {MegaSecs, Secs, _MicroSecs} = os:timestamp(), + MegaSecs * 1000000 + Secs. -- cgit v1.2.1 From 99f94e634760b67303f0179f257b20b171484cf5 Mon Sep 17 00:00:00 2001 From: Leonardo Pires Date: Thu, 13 Feb 2020 07:13:23 -0300 Subject: Port reduce_false.js and reduce_builtin.js to Elixir (#2541) Port reduce_false.js and reduce_builtin.js to Elixir --- test/elixir/README.md | 4 +- test/elixir/test/reduce_builtin_test.exs | 282 +++++++++++++++++++++++++++++++ test/elixir/test/reduce_false_test.exs | 50 ++++++ 3 files changed, 334 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/reduce_builtin_test.exs create mode 100644 test/elixir/test/reduce_false_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 90b2fd601..0a3ce63d5 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -63,8 +63,8 @@ X means done, - means partially - [ ] Port purge.js - [ ] Port reader_acl.js - [ ] Port recreate_doc.js - - [ ] Port reduce_builtin.js - - [ ] Port reduce_false.js + - [X] Port reduce_builtin.js + - [X] Port reduce_false.js - [ ] Port reduce_false_temp.js - [X] Port reduce.js - [X] Port replication.js diff --git a/test/elixir/test/reduce_builtin_test.exs b/test/elixir/test/reduce_builtin_test.exs new file mode 100644 index 000000000..d13ada1b3 --- /dev/null +++ b/test/elixir/test/reduce_builtin_test.exs @@ -0,0 +1,282 @@ +defmodule ReduceBuiltinTest do + use CouchTestCase + + @moduletag :views + + @moduledoc """ + Test CouchDB view builtin reduce functions + This is a port of the reduce_builtin.js suite + """ + + def random_ddoc(db_name) do + "/#{db_name}/_design/#{:erlang.monotonic_time()}" + end + + def summate(n) do + (n + 1) * n / 2 + end + + def sumsqr(n) do + 1..n |> Enum.reduce(0, fn i, acc -> acc + i * i end) + end + + def check_approx_distinct(expected, estimated) do + # see https://en.wikipedia.org/wiki/HyperLogLog + err = 1.04 / :math.sqrt(:math.pow(2, 11 - 1)) + abs(expected - estimated) < expected * err + end + + def query_rows(ddoc_url, builtin_fun, query \\ nil) do + http_opts = if query, do: [query: query], else: [] + Couch.get("#{ddoc_url}/_view/builtin#{builtin_fun}", http_opts).body["rows"] + end + + def query_value(ddoc_url, builtin_fun, query \\ nil) do + hd(query_rows(ddoc_url, builtin_fun, query))["value"] + end + + @tag :with_db + test "Builtin reduce functions", context do + db_name = context[:db_name] + num_docs = 500 + + docs = make_docs(1..num_docs) + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}, query: %{w: 3}) + assert resp.status_code in [201, 202] + + ddoc_url = random_ddoc(db_name) + + map = ~s""" + function (doc) { + emit(doc.integer, doc.integer); + emit(doc.integer, doc.integer); + }; + """ + + design_doc = %{ + :views => %{ + :builtin_sum => %{:map => map, :reduce => "_sum"}, + :builtin_count => %{:map => map, :reduce => "_count"}, + :builtin_stats => %{:map => map, :reduce => "_stats"}, + :builtin_approx_count_distinct => %{ + :map => map, + :reduce => "_approx_count_distinct" + } + } + } + + assert Couch.put(ddoc_url, body: design_doc).body["ok"] + + value = ddoc_url |> query_value("_sum") + assert value == 2 * summate(num_docs) + value = ddoc_url |> query_value("_count") + assert value == 1000 + value = ddoc_url |> query_value("_stats") + assert value["sum"] == 2 * summate(num_docs) + assert value["count"] == 1000 + assert value["min"] == 1 + assert value["max"] == 500 + assert value["sumsqr"] == 2 * sumsqr(num_docs) + value = ddoc_url |> query_value("_approx_count_distinct") + assert check_approx_distinct(num_docs, value) + + value = ddoc_url |> query_value("_sum", %{startkey: 4, endkey: 4}) + assert value == 8 + value = ddoc_url |> query_value("_count", %{startkey: 4, endkey: 4}) + assert value == 2 + value = ddoc_url |> query_value("_approx_count_distinct", %{startkey: 4, endkey: 4}) + assert check_approx_distinct(1, value) + + value = ddoc_url |> query_value("_sum", %{startkey: 4, endkey: 5}) + assert value == 18 + value = ddoc_url |> query_value("_count", %{startkey: 4, endkey: 5}) + assert value == 4 + value = ddoc_url |> query_value("_approx_count_distinct", %{startkey: 4, endkey: 5}) + assert check_approx_distinct(2, value) + + value = ddoc_url |> query_value("_sum", %{startkey: 4, endkey: 6}) + assert value == 30 + value = ddoc_url |> query_value("_count", %{startkey: 4, endkey: 6}) + assert value == 6 + value = ddoc_url |> query_value("_approx_count_distinct", %{startkey: 4, endkey: 6}) + assert check_approx_distinct(3, value) + + assert [row0, row1, row2] = ddoc_url |> query_rows("_sum", %{group: true, limit: 3}) + assert row0["value"] == 2 + assert row1["value"] == 4 + assert row2["value"] == 6 + + assert [row0, row1, row2] = + ddoc_url |> query_rows("_approx_count_distinct", %{group: true, limit: 3}) + + assert check_approx_distinct(1, row0["value"]) + assert check_approx_distinct(1, row1["value"]) + assert check_approx_distinct(1, row2["value"]) + + 1..div(500, 2) + |> Enum.take_every(30) + |> Enum.each(fn i -> + value = ddoc_url |> query_value("_sum", %{startkey: i, endkey: num_docs - i}) + assert value == 2 * (summate(num_docs - i) - summate(i - 1)) + end) + end + + @tag :with_db + test "Builtin reduce functions with trailings", context do + db_name = context[:db_name] + num_docs = 500 + + docs = make_docs(1..num_docs) + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}, query: %{w: 3}) + assert resp.status_code in [201, 202] + + # test for trailing characters after builtin functions, desired behaviour + # is to disregard any trailing characters + # I think the behavior should be a prefix test, so that even "_statsorama" + # or "_stats\nare\awesome" should work just as "_stats" does. - JChris + ["\n", "orama", "\nare\nawesome", " ", " \n "] + |> Enum.each(fn trailing -> + ddoc_url = random_ddoc(db_name) + + map = ~s""" + function (doc) { + emit(doc.integer, doc.integer); + emit(doc.integer, doc.integer); + }; + """ + + design_doc = %{ + :views => %{ + :builtin_sum => %{:map => map, :reduce => "_sum#{trailing}"}, + :builtin_count => %{:map => map, :reduce => "_count#{trailing}"}, + :builtin_stats => %{:map => map, :reduce => "_stats#{trailing}"}, + :builtin_approx_count_distinct => %{ + :map => map, + :reduce => "_approx_count_distinct#{trailing}" + } + } + } + + assert Couch.put(ddoc_url, body: design_doc).body["ok"] + + value = ddoc_url |> query_value("_sum") + assert value == 2 * summate(num_docs) + value = ddoc_url |> query_value("_count") + assert value == 1000 + value = ddoc_url |> query_value("_stats") + assert value["sum"] == 2 * summate(num_docs) + assert value["count"] == 1000 + assert value["min"] == 1 + assert value["max"] == 500 + assert value["sumsqr"] == 2 * sumsqr(num_docs) + end) + end + + @tag :with_db + test "Builtin count and sum reduce for key as array", context do + db_name = context[:db_name] + + ddoc_url = random_ddoc(db_name) + + map_one = ~s""" + function (doc) { + emit(doc.keys, 1); + }; + """ + + map_ones_array = ~s""" + function (doc) { + emit(doc.keys, [1, 1]); + }; + """ + + design_doc = %{ + :views => %{ + :builtin_one_sum => %{:map => map_one, :reduce => "_sum"}, + :builtin_one_count => %{:map => map_one, :reduce => "_count"}, + :builtin_ones_array_sum => %{:map => map_ones_array, :reduce => "_sum"} + } + } + + assert Couch.put(ddoc_url, body: design_doc).body["ok"] + + for i <- 1..5 do + for j <- 0..9 do + docs = [ + %{keys: ["a"]}, + %{keys: ["a"]}, + %{keys: ["a", "b"]}, + %{keys: ["a", "b"]}, + %{keys: ["a", "b", "c"]}, + %{keys: ["a", "b", "d"]}, + %{keys: ["a", "c", "d"]}, + %{keys: ["d"]}, + %{keys: ["d", "a"]}, + %{keys: ["d", "b"]}, + %{keys: ["d", "c"]} + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: docs}, query: %{w: 3}) + assert resp.status_code in [201, 202] + + total_docs = 1 + (i - 1) * 10 * 11 + (j + 1) * 11 + assert Couch.get("/#{db_name}").body["doc_count"] == total_docs + end + + ["_sum", "_count"] + |> Enum.each(fn builtin -> + builtin = "_one#{builtin}" + + # group by exact key match + rows = query_rows(ddoc_url, builtin, %{group: true}) + assert Enum.at(rows, 0) == %{"key" => ["a"], "value" => 20 * i} + assert Enum.at(rows, 1) == %{"key" => ["a", "b"], "value" => 20 * i} + assert Enum.at(rows, 2) == %{"key" => ["a", "b", "c"], "value" => 10 * i} + assert Enum.at(rows, 3) == %{"key" => ["a", "b", "d"], "value" => 10 * i} + + # make sure group reduce and limit params provide valid json + assert [row0, _] = query_rows(ddoc_url, builtin, %{group: true, limit: 2}) + assert row0 == %{"key" => ["a"], "value" => 20 * i} + + # group by the first element in the key array + rows = query_rows(ddoc_url, builtin, %{group_level: 1}) + assert Enum.at(rows, 0) == %{"key" => ["a"], "value" => 70 * i} + assert Enum.at(rows, 1) == %{"key" => ["d"], "value" => 40 * i} + + # group by the first 2 elements in the key array + rows = query_rows(ddoc_url, builtin, %{group_level: 2}) + assert Enum.at(rows, 0) == %{"key" => ["a"], "value" => 20 * i} + assert Enum.at(rows, 1) == %{"key" => ["a", "b"], "value" => 40 * i} + assert Enum.at(rows, 2) == %{"key" => ["a", "c"], "value" => 10 * i} + assert Enum.at(rows, 3) == %{"key" => ["d"], "value" => 10 * i} + assert Enum.at(rows, 4) == %{"key" => ["d", "a"], "value" => 10 * i} + assert Enum.at(rows, 5) == %{"key" => ["d", "b"], "value" => 10 * i} + assert Enum.at(rows, 6) == %{"key" => ["d", "c"], "value" => 10 * i} + end) + + rows = query_rows(ddoc_url, "_ones_array_sum", %{group: true}) + assert Enum.at(rows, 0) == %{"key" => ["a"], "value" => [20 * i, 20 * i]} + assert Enum.at(rows, 1) == %{"key" => ["a", "b"], "value" => [20 * i, 20 * i]} + assert Enum.at(rows, 2) == %{"key" => ["a", "b", "c"], "value" => [10 * i, 10 * i]} + assert Enum.at(rows, 3) == %{"key" => ["a", "b", "d"], "value" => [10 * i, 10 * i]} + + assert [row0, _] = query_rows(ddoc_url, "_ones_array_sum", %{group: true, limit: 2}) + assert row0 == %{"key" => ["a"], "value" => [20 * i, 20 * i]} + + rows = query_rows(ddoc_url, "_ones_array_sum", %{group_level: 1}) + assert Enum.at(rows, 0) == %{"key" => ["a"], "value" => [70 * i, 70 * i]} + assert Enum.at(rows, 1) == %{"key" => ["d"], "value" => [40 * i, 40 * i]} + + rows = query_rows(ddoc_url, "_ones_array_sum", %{group_level: 2}) + assert Enum.at(rows, 0) == %{"key" => ["a"], "value" => [20 * i, 20 * i]} + assert Enum.at(rows, 1) == %{"key" => ["a", "b"], "value" => [40 * i, 40 * i]} + assert Enum.at(rows, 2) == %{"key" => ["a", "c"], "value" => [10 * i, 10 * i]} + assert Enum.at(rows, 3) == %{"key" => ["d"], "value" => [10 * i, 10 * i]} + assert Enum.at(rows, 4) == %{"key" => ["d", "a"], "value" => [10 * i, 10 * i]} + assert Enum.at(rows, 5) == %{"key" => ["d", "b"], "value" => [10 * i, 10 * i]} + assert Enum.at(rows, 6) == %{"key" => ["d", "c"], "value" => [10 * i, 10 * i]} + end + end +end diff --git a/test/elixir/test/reduce_false_test.exs b/test/elixir/test/reduce_false_test.exs new file mode 100644 index 000000000..675c11dbd --- /dev/null +++ b/test/elixir/test/reduce_false_test.exs @@ -0,0 +1,50 @@ +defmodule ReduceFalseTest do + use CouchTestCase + + @moduletag :views + + @moduledoc """ + Test CouchDB view without reduces + This is a port of the reduce_false.js suite + """ + + def summate(n) do + (n + 1) * n / 2 + end + + @tag :with_db + test "Basic reduce functions", context do + db_name = context[:db_name] + view_url = "/#{db_name}/_design/foo/_view/summate" + num_docs = 5 + + map = ~s""" + function (doc) { + emit(doc.integer, doc.integer); + }; + """ + + reduce = "function (keys, values) { return sum(values); };" + red_doc = %{:views => %{:summate => %{:map => map, :reduce => reduce}}} + assert Couch.put("/#{db_name}/_design/foo", body: red_doc).body["ok"] + + docs = make_docs(1..num_docs) + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}, query: %{w: 3}) + assert resp.status_code in [201, 202] + + # Test that the reduce works + rows = Couch.get(view_url).body["rows"] + assert length(rows) == 1 + assert hd(rows)["value"] == summate(num_docs) + + # Test that we got our docs back + rows = Couch.get(view_url, query: %{reduce: false}).body["rows"] + assert length(rows) == 5 + + rows + |> Enum.with_index(1) + |> Enum.each(fn {row, i} -> + assert i == row["value"] + end) + end +end -- cgit v1.2.1 From 874831059bf914f9ab836b29480cd718c009b194 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Thu, 13 Feb 2020 16:11:45 -0500 Subject: Bump SM to 60 on Centos 8 (#2544) --- build-aux/Jenkinsfile.full | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-aux/Jenkinsfile.full b/build-aux/Jenkinsfile.full index b1d46e846..181e38871 100644 --- a/build-aux/Jenkinsfile.full +++ b/build-aux/Jenkinsfile.full @@ -258,7 +258,7 @@ pipeline { } environment { platform = 'centos8' - sm_ver = '1.8.5' + sm_ver = '60' } stages { stage('Build from tarball & test') { -- cgit v1.2.1 From ca178f0e7fe9683da51d0c3bc322fb2b99826311 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Fri, 7 Feb 2020 14:44:41 -0800 Subject: Expose `couch_util:decode/2` to support jiffy options It can be desirable in some cases for decoded JSON to e.g. return maps instead of the default data structure, which is not currently possible. This exposes a new function `couch_util:decode/2`, the second parameter being a list of options passed to `jiffy:decode/2`. --- src/couch/src/couch_util.erl | 7 +++++-- src/couch/test/eunit/couch_util_tests.erl | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index a785e2e44..dffb68152 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -21,7 +21,7 @@ -export([get_nested_json_value/2, json_user_ctx/1]). -export([proplist_apply_field/2, json_apply_field/2]). -export([to_binary/1, to_integer/1, to_list/1, url_encode/1]). --export([json_encode/1, json_decode/1]). +-export([json_encode/1, json_decode/1, json_decode/2]). -export([verify/2,simple_call/2,shutdown_sync/1]). -export([get_value/2, get_value/3]). -export([reorder_results/2]). @@ -498,8 +498,11 @@ json_encode(V) -> jiffy:encode(V, [force_utf8]). json_decode(V) -> + json_decode(V, []). + +json_decode(V, Opts) -> try - jiffy:decode(V, [dedupe_keys]) + jiffy:decode(V, [dedupe_keys | Opts]) catch error:Error -> throw({invalid_json, Error}) diff --git a/src/couch/test/eunit/couch_util_tests.erl b/src/couch/test/eunit/couch_util_tests.erl index 3e145c4f6..012c961a4 100644 --- a/src/couch/test/eunit/couch_util_tests.erl +++ b/src/couch/test/eunit/couch_util_tests.erl @@ -168,3 +168,10 @@ to_hex_test_() -> ?_assertEqual("", couch_util:to_hex(<<>>)), ?_assertEqual("010203faff", couch_util:to_hex(<<1, 2, 3, 250, 255>>)) ]. + +json_decode_test_() -> + [ + ?_assertEqual({[]}, couch_util:json_decode(<<"{}">>)), + ?_assertEqual({[]}, couch_util:json_decode(<<"{}">>, [])), + ?_assertEqual(#{}, couch_util:json_decode(<<"{}">>, [return_maps])) + ]. -- cgit v1.2.1 From 09ac7208e6078bbbf56c569a62cddabc973932db Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 16 Feb 2020 12:05:29 +0000 Subject: Reset if we don't get a view header I found a .view file with a db_header in production (cause unknown but I'm hoping it's manual intervention). This patch means we'll reset the index if we find something other than a view header when looking for one. --- src/couch_mrview/src/couch_mrview_index.erl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index c96d87173..cc013c5bd 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -133,6 +133,12 @@ open(Db, State0) -> NewSt = couch_mrview_util:reset_index(Db, Fd, State), ensure_local_purge_doc(Db, NewSt), {ok, NewSt}; + {ok, Else} -> + couch_log:error("~s has a bad header: got ~p", + [IndexFName, Else]), + NewSt = couch_mrview_util:reset_index(Db, Fd, State), + ensure_local_purge_doc(Db, NewSt), + {ok, NewSt}; no_valid_header -> NewSt = couch_mrview_util:reset_index(Db, Fd, State), ensure_local_purge_doc(Db, NewSt), -- cgit v1.2.1 From 91ecf6777cc5fff93483b8e92c8daadd7ff33fdc Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Wed, 19 Feb 2020 18:39:24 +0100 Subject: fix: single node state (#2575) --- src/setup/src/setup.erl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/setup/src/setup.erl b/src/setup/src/setup.erl index 3d23229b8..cc64ae438 100644 --- a/src/setup/src/setup.erl +++ b/src/setup/src/setup.erl @@ -65,13 +65,15 @@ is_cluster_enabled() -> end. is_single_node_enabled(Dbs) -> - % admins != empty AND dbs exist + % admins != empty AND dbs exist OR `[couchdb] single_node` is set to true Admins = config:get("admins"), HasDbs = has_cluster_system_dbs(Dbs), - case {Admins, HasDbs} of - {[], _} -> false; - {_, false} -> false; - {_,_} -> true + SingleNodeConfig = config:get_boolean("couchdb", "single_node", false), + case {Admins, HasDbs, SingleNodeConfig} of + {_, _, true} -> true; + {[], _, _} -> false; + {_, false, _} -> false; + {_,_,_} -> true end. cluster_system_dbs() -> -- cgit v1.2.1 From 1e37457de4786973558773118e518566760b4720 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Wed, 19 Feb 2020 20:33:58 +0100 Subject: feat(breaking): make _all_dbs admin-only by default (#2577) --- rel/overlay/etc/default.ini | 2 +- src/chttpd/src/chttpd_auth_request.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 1829d0d74..246c17307 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -136,7 +136,7 @@ max_db_number_for_dbs_info_req = 100 ; authentication_handlers = {chttpd_auth, proxy_authentication_handler}, {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} ; prevent non-admins from accessing /_all_dbs -;admin_only_all_dbs = false +; admin_only_all_dbs = true [couch_peruser] ; If enabled, couch_peruser ensures that a private per-user database diff --git a/src/chttpd/src/chttpd_auth_request.erl b/src/chttpd/src/chttpd_auth_request.erl index fa47f5bfa..8040f91fd 100644 --- a/src/chttpd/src/chttpd_auth_request.erl +++ b/src/chttpd/src/chttpd_auth_request.erl @@ -34,7 +34,7 @@ authorize_request_int(#httpd{path_parts=[]}=Req) -> authorize_request_int(#httpd{path_parts=[<<"favicon.ico">>|_]}=Req) -> Req; authorize_request_int(#httpd{path_parts=[<<"_all_dbs">>|_]}=Req) -> - case config:get_boolean("chttpd", "admin_only_all_dbs", false) of + case config:get_boolean("chttpd", "admin_only_all_dbs", true) of true -> require_admin(Req); false -> Req end; -- cgit v1.2.1 From e0cff2f85ec43dbee203e17a3b45d3bd912a8da9 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Wed, 19 Feb 2020 21:51:52 +0100 Subject: Revert "fix: single node state (#2575)" This reverts commit 91ecf6777cc5fff93483b8e92c8daadd7ff33fdc. --- src/setup/src/setup.erl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/setup/src/setup.erl b/src/setup/src/setup.erl index cc64ae438..3d23229b8 100644 --- a/src/setup/src/setup.erl +++ b/src/setup/src/setup.erl @@ -65,15 +65,13 @@ is_cluster_enabled() -> end. is_single_node_enabled(Dbs) -> - % admins != empty AND dbs exist OR `[couchdb] single_node` is set to true + % admins != empty AND dbs exist Admins = config:get("admins"), HasDbs = has_cluster_system_dbs(Dbs), - SingleNodeConfig = config:get_boolean("couchdb", "single_node", false), - case {Admins, HasDbs, SingleNodeConfig} of - {_, _, true} -> true; - {[], _, _} -> false; - {_, false, _} -> false; - {_,_,_} -> true + case {Admins, HasDbs} of + {[], _} -> false; + {_, false} -> false; + {_,_} -> true end. cluster_system_dbs() -> -- cgit v1.2.1 From 26f93667c9f65f5f977ab7eabd5a65fbe93d07b6 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Wed, 19 Feb 2020 21:55:15 +0100 Subject: fix: show single node on setup status with single_node=true --- src/setup/src/setup_httpd.erl | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/setup/src/setup_httpd.erl b/src/setup/src/setup_httpd.erl index f4e05ce09..949675b6a 100644 --- a/src/setup/src/setup_httpd.erl +++ b/src/setup/src/setup_httpd.erl @@ -31,24 +31,30 @@ handle_setup_req(#httpd{method='GET'}=Req) -> ok = chttpd:verify_is_server_admin(Req), Dbs = chttpd:qs_json_value(Req, "ensure_dbs_exist", setup:cluster_system_dbs()), couch_log:notice("Dbs: ~p~n", [Dbs]), - case erlang:list_to_integer(config:get("cluster", "n", undefined)) of - 1 -> - case setup:is_single_node_enabled(Dbs) of - false -> - chttpd:send_json(Req, 200, {[{state, single_node_disabled}]}); - true -> - chttpd:send_json(Req, 200, {[{state, single_node_enabled}]}) - end; + SingleNodeConfig = config:get_boolean("couchdb", "single_node", false), + case SingleNodeConfig of + true -> + chttpd:send_json(Req, 200, {[{state, single_node_enabled}]}); _ -> - case setup:is_cluster_enabled() of - false -> - chttpd:send_json(Req, 200, {[{state, cluster_disabled}]}); - true -> - case setup:has_cluster_system_dbs(Dbs) of + case config:get("cluster", "n", undefined) of + "1" -> + case setup:is_single_node_enabled(Dbs) of false -> - chttpd:send_json(Req, 200, {[{state, cluster_enabled}]}); + chttpd:send_json(Req, 200, {[{state, single_node_disabled}]}); true -> - chttpd:send_json(Req, 200, {[{state, cluster_finished}]}) + chttpd:send_json(Req, 200, {[{state, single_node_enabled}]}) + end; + _ -> + case setup:is_cluster_enabled() of + false -> + chttpd:send_json(Req, 200, {[{state, cluster_disabled}]}); + true -> + case setup:has_cluster_system_dbs(Dbs) of + false -> + chttpd:send_json(Req, 200, {[{state, cluster_enabled}]}); + true -> + chttpd:send_json(Req, 200, {[{state, cluster_finished}]}) + end end end end; -- cgit v1.2.1 From 2ef656efa1dc33b40894a125b5f57f9ac4bbc87a Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Thu, 20 Feb 2020 23:53:51 +0100 Subject: Port changes.js test suite into elixir --- test/elixir/test/changes_async_test.exs | 545 ++++++++++++++++++++++++++++++++ test/elixir/test/changes_test.exs | 440 +++++++++++++++++++++++++- test/javascript/tests/changes.js | 7 +- 3 files changed, 973 insertions(+), 19 deletions(-) create mode 100644 test/elixir/test/changes_async_test.exs diff --git a/test/elixir/test/changes_async_test.exs b/test/elixir/test/changes_async_test.exs new file mode 100644 index 000000000..07afcdc7c --- /dev/null +++ b/test/elixir/test/changes_async_test.exs @@ -0,0 +1,545 @@ +defmodule ChangesAsyncTest do + use CouchTestCase + + @moduletag :changes + + @moduledoc """ + Test CouchDB /{db}/_changes + """ + + @tag :with_db + test "live changes", context do + db_name = context[:db_name] + test_changes(db_name, "live") + end + + @tag :with_db + test "continuous changes", context do + db_name = context[:db_name] + test_changes(db_name, "continuous") + end + + @tag :with_db + test "longpoll changes", context do + db_name = context[:db_name] + + check_empty_db(db_name) + + create_doc(db_name, sample_doc_foo()) + + req_id = + Couch.get("/#{db_name}/_changes?feed=longpoll", + stream_to: self() + ) + + changes = process_response(req_id.id, &parse_chunk/1) + {changes_length, last_seq_prefix} = parse_changes_response(changes) + assert changes_length == 1, "db should not be empty" + assert last_seq_prefix == "1-", "seq must start with 1-" + + last_seq = changes["last_seq"] + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + req_id = + Couch.get("/#{db_name}/_changes?feed=longpoll&since=#{last_seq}", + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + + create_doc_bar(db_name, "bar") + + {changes_length, last_seq_prefix} = + req_id.id + |> process_response(&parse_chunk/1) + |> parse_changes_response() + + assert changes_length == 1, "should return one change" + assert last_seq_prefix == "2-", "seq must start with 2-" + + req_id = + Couch.get("/#{db_name}/_changes?feed=longpoll&since=now", + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + + create_doc_bar(db_name, "barzzzz") + + changes = process_response(req_id.id, &parse_chunk/1) + {changes_length, last_seq_prefix} = parse_changes_response(changes) + assert changes_length == 1, "should return one change" + assert Enum.at(changes["results"], 0)["id"] == "barzzzz" + assert last_seq_prefix == "3-", "seq must start with 3-" + end + + @tag :with_db + test "eventsource changes", context do + db_name = context[:db_name] + + check_empty_db(db_name) + + create_doc(db_name, sample_doc_foo()) + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + req_id = + Rawresp.get("/#{db_name}/_changes?feed=eventsource&timeout=500", + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + + create_doc_bar(db_name, "bar") + + changes = process_response(req_id.id, &parse_event/1) + + assert length(changes) == 2 + assert Enum.at(changes, 0)["id"] == "foo" + assert Enum.at(changes, 1)["id"] == "bar" + + HTTPotion.stop_worker_process(worker_pid) + end + + @tag :with_db + test "eventsource heartbeat", context do + db_name = context[:db_name] + + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + req_id = + Rawresp.get("/#{db_name}/_changes?feed=eventsource&heartbeat=10", + stream_to: {self(), :once}, + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + beats = wait_for_heartbeats(req_id.id, 0, 3) + assert beats == 3 + HTTPotion.stop_worker_process(worker_pid) + end + + @tag :with_db + test "longpoll filtered changes", context do + db_name = context[:db_name] + create_filters_view(db_name) + + create_doc(db_name, %{bop: "foom"}) + create_doc(db_name, %{bop: false}) + + req_id = + Couch.get("/#{db_name}/_changes?feed=longpoll&filter=changes_filter/bop", + stream_to: self() + ) + + changes = process_response(req_id.id, &parse_chunk/1) + {changes_length, last_seq_prefix} = parse_changes_response(changes) + assert changes_length == 1, "db should not be empty" + assert last_seq_prefix == "3-", "seq must start with 3-" + + last_seq = changes["last_seq"] + # longpoll waits until a matching change before returning + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + req_id = + Couch.get( + "/#{db_name}/_changes?feed=longpoll&filter=changes_filter/bop&since=#{last_seq}", + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + create_doc(db_name, %{_id: "falsy", bop: ""}) + # Doc doesn't match the filter + changes = process_response(req_id.id, &parse_chunk/1) + assert changes == :timeout + + # Doc matches the filter + create_doc(db_name, %{_id: "bingo", bop: "bingo"}) + changes = process_response(req_id.id, &parse_chunk/1) + {changes_length, last_seq_prefix} = parse_changes_response(changes) + assert changes_length == 1, "db should not be empty" + assert last_seq_prefix == "5-", "seq must start with 5-" + assert Enum.at(changes["results"], 0)["id"] == "bingo" + end + + @tag :with_db + test "continuous filtered changes", context do + db_name = context[:db_name] + create_filters_view(db_name) + + create_doc(db_name, %{bop: false}) + create_doc(db_name, %{_id: "bingo", bop: "bingo"}) + + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + req_id = + Rawresp.get( + "/#{db_name}/_changes?feed=continuous&filter=changes_filter/bop&timeout=500", + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + create_doc(db_name, %{_id: "rusty", bop: "plankton"}) + + changes = process_response(req_id.id, &parse_changes_line_chunk/1) + + changes_ids = + changes + |> Enum.filter(fn p -> Map.has_key?(p, "id") end) + |> Enum.map(fn p -> p["id"] end) + + assert Enum.member?(changes_ids, "bingo") + assert Enum.member?(changes_ids, "rusty") + assert length(changes_ids) == 2 + end + + @tag :with_db + test "continuous filtered changes with doc ids", context do + db_name = context[:db_name] + doc_ids = %{doc_ids: ["doc1", "doc3", "doc4"]} + + create_doc(db_name, %{_id: "doc1", value: 1}) + create_doc(db_name, %{_id: "doc2", value: 2}) + + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + req_id = + Rawresp.post( + "/#{db_name}/_changes?feed=continuous&timeout=500&filter=_doc_ids", + body: doc_ids, + headers: ["Content-Type": "application/json"], + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id.id, 200) + create_doc(db_name, %{_id: "doc3", value: 3}) + + changes = process_response(req_id.id, &parse_changes_line_chunk/1) + + changes_ids = + changes + |> Enum.filter(fn p -> Map.has_key?(p, "id") end) + |> Enum.map(fn p -> p["id"] end) + + assert Enum.member?(changes_ids, "doc1") + assert Enum.member?(changes_ids, "doc3") + assert length(changes_ids) == 2 + end + + @tag :with_db + test "COUCHDB-1852", context do + db_name = context[:db_name] + + create_doc(db_name, %{bop: "foom"}) + create_doc(db_name, %{bop: "foom"}) + create_doc(db_name, %{bop: "foom"}) + create_doc(db_name, %{bop: "foom"}) + + resp = Couch.get("/#{db_name}/_changes") + assert length(resp.body["results"]) == 4 + seq = Enum.at(resp.body["results"], 1)["seq"] + + {:ok, worker_pid} = HTTPotion.spawn_link_worker_process(Couch.process_url("")) + + # simulate an EventSource request with a Last-Event-ID header + req_id = + Rawresp.get( + "/#{db_name}/_changes?feed=eventsource&timeout=100&since=0", + headers: [Accept: "text/event-stream", "Last-Event-ID": seq], + stream_to: self(), + direct: worker_pid + ) + + changes = process_response(req_id.id, &parse_event/1) + assert length(changes) == 2 + end + + defp wait_for_heartbeats(id, beats, expexted_beats) do + if beats < expexted_beats do + :ibrowse.stream_next(id) + is_heartbeat = process_response(id, &parse_heartbeat/1) + + case is_heartbeat do + :heartbeat -> wait_for_heartbeats(id, beats + 1, expexted_beats) + :timeout -> beats + _ -> wait_for_heartbeats(id, beats, expexted_beats) + end + else + beats + end + end + + defp wait_for_headers(id, status, timeout \\ 1000) do + receive do + %HTTPotion.AsyncHeaders{id: ^id, status_code: ^status} -> + :ok + + _ -> + wait_for_headers(id, status, timeout) + after + timeout -> :timeout + end + end + + defp process_response(id, chunk_parser, timeout \\ 1000) do + receive do + %HTTPotion.AsyncChunk{id: ^id} = msg -> + chunk_parser.(msg) + + _ -> + process_response(id, chunk_parser, timeout) + after + timeout -> :timeout + end + end + + defp parse_chunk(msg) do + msg.chunk |> IO.iodata_to_binary() |> :jiffy.decode([:return_maps]) + end + + defp parse_event(msg) do + captures = Regex.scan(~r/data: (.*)/, msg.chunk) + + captures + |> Enum.map(fn p -> Enum.at(p, 1) end) + |> Enum.filter(fn p -> String.trim(p) != "" end) + |> Enum.map(fn p -> + p + |> IO.iodata_to_binary() + |> :jiffy.decode([:return_maps]) + end) + end + + defp parse_heartbeat(msg) do + is_heartbeat = Regex.match?(~r/event: heartbeat/, msg.chunk) + + if is_heartbeat do + :heartbeat + else + :other + end + end + + defp parse_changes_response(changes) do + {length(changes["results"]), String.slice(changes["last_seq"], 0..1)} + end + + defp check_empty_db(db_name) do + resp = Couch.get("/#{db_name}/_changes") + assert resp.body["results"] == [], "db must be empty" + assert String.at(resp.body["last_seq"], 0) == "0", "seq must start with 0" + end + + defp test_changes(db_name, feed) do + check_empty_db(db_name) + {_, resp} = create_doc(db_name, sample_doc_foo()) + rev = resp.body["rev"] + + # TODO: retry_part + resp = Couch.get("/#{db_name}/_changes") + assert length(resp.body["results"]) == 1, "db must not be empty" + assert String.at(resp.body["last_seq"], 0) == "1", "seq must start with 1" + + # increase timeout to 100 to have enough time 2 assemble + # (seems like too little timeouts kill + resp = Rawresp.get("/#{db_name}/_changes?feed=#{feed}&timeout=100") + changes = parse_changes_line(resp.body) + + change = Enum.at(changes, 0) + assert Enum.at(change["changes"], 0)["rev"] == rev + + # the sequence is not fully ordered and a complex structure now + change = Enum.at(changes, 1) + assert String.at(change["last_seq"], 0) == "1" + + # create_doc_bar(db_name,"bar") + {:ok, worker_pid} = HTTPotion.spawn_worker_process(Couch.process_url("")) + + %HTTPotion.AsyncResponse{id: req_id} = + Rawresp.get("/#{db_name}/_changes?feed=#{feed}&timeout=500", + stream_to: self(), + direct: worker_pid + ) + + :ok = wait_for_headers(req_id, 200) + create_doc_bar(db_name, "bar") + + changes = process_response(req_id, &parse_changes_line_chunk/1) + assert length(changes) == 3 + + HTTPotion.stop_worker_process(worker_pid) + end + + def create_doc_bar(db_name, id) do + create_doc(db_name, %{:_id => id, :bar => 1}) + end + + defp parse_changes_line_chunk(msg) do + parse_changes_line(msg.chunk) + end + + defp parse_changes_line(body) do + body_lines = String.split(body, "\n") + + body_lines + |> Enum.filter(fn line -> line != "" end) + |> Enum.map(fn line -> + line |> IO.iodata_to_binary() |> :jiffy.decode([:return_maps]) + end) + end + + defp create_filters_view(db_name) do + dynamic_fun = """ + function(doc, req) { + var field = req.query.field; + return doc[field]; + } + """ + + userctx_fun = """ + function(doc, req) { + var field = req.query.field; + return doc[field]; + } + """ + + blah_fun = """ + function(doc) { + if (doc._id == "blah") { + emit(null, null); + } + } + """ + + ddoc = %{ + _id: "_design/changes_filter", + filters: %{ + bop: "function(doc, req) { return (doc.bop);}", + dynamic: dynamic_fun, + userCtx: userctx_fun, + conflicted: "function(doc, req) { return (doc._conflicts);}" + }, + options: %{ + local_seq: true + }, + views: %{ + local_seq: %{ + map: "function(doc) {emit(doc._local_seq, null)}" + }, + blah: %{ + map: blah_fun + } + } + } + + create_doc(db_name, ddoc) + end +end + +defmodule Rawresp do + use HTTPotion.Base + + @request_timeout 60_000 + @inactivity_timeout 55_000 + + def process_url("http://" <> _ = url) do + url + end + + def process_url(url) do + base_url = System.get_env("EX_COUCH_URL") || "http://127.0.0.1:15984" + base_url <> url + end + + def process_request_headers(headers, _body, options) do + headers = + headers + |> Keyword.put(:"User-Agent", "couch-potion") + + headers = + if headers[:"Content-Type"] do + headers + else + Keyword.put(headers, :"Content-Type", "application/json") + end + + case Keyword.get(options, :cookie) do + nil -> + headers + + cookie -> + Keyword.put(headers, :Cookie, cookie) + end + end + + def process_options(options) do + options + |> set_auth_options() + |> set_inactivity_timeout() + |> set_request_timeout() + end + + def process_request_body(body) do + if is_map(body) do + :jiffy.encode(body) + else + body + end + end + + def set_auth_options(options) do + if Keyword.get(options, :cookie) == nil do + headers = Keyword.get(options, :headers, []) + + if headers[:basic_auth] != nil or headers[:authorization] != nil do + options + else + username = System.get_env("EX_USERNAME") || "adm" + password = System.get_env("EX_PASSWORD") || "pass" + Keyword.put(options, :basic_auth, {username, password}) + end + else + options + end + end + + def set_inactivity_timeout(options) do + Keyword.update( + options, + :ibrowse, + [{:inactivity_timeout, @inactivity_timeout}], + fn ibrowse -> + Keyword.put_new(ibrowse, :inactivity_timeout, @inactivity_timeout) + end + ) + end + + def set_request_timeout(options) do + timeout = Application.get_env(:httpotion, :default_timeout, @request_timeout) + Keyword.put_new(options, :timeout, timeout) + end + + def login(userinfo) do + [user, pass] = String.split(userinfo, ":", parts: 2) + login(user, pass) + end + + def login(user, pass, expect \\ :success) do + resp = Couch.post("/_session", body: %{:username => user, :password => pass}) + + if expect == :success do + true = resp.body["ok"] + cookie = resp.headers[:"set-cookie"] + [token | _] = String.split(cookie, ";") + %Couch.Session{cookie: token} + else + true = Map.has_key?(resp.body, "error") + %Couch.Session{error: resp.body["error"]} + end + end +end diff --git a/test/elixir/test/changes_test.exs b/test/elixir/test/changes_test.exs index b5545087b..5bb376b9c 100644 --- a/test/elixir/test/changes_test.exs +++ b/test/elixir/test/changes_test.exs @@ -11,33 +11,441 @@ defmodule ChangesTest do test "Changes feed negative heartbeat", context do db_name = context[:db_name] - resp = Couch.get( - "/#{db_name}/_changes", - query: %{ - :feed => "continuous", - :heartbeat => -1000 - } - ) + resp = + Couch.get( + "/#{db_name}/_changes", + query: %{ + :feed => "continuous", + :heartbeat => -1000 + } + ) assert resp.status_code == 400 assert resp.body["error"] == "bad_request" - assert resp.body["reason"] == "The heartbeat value should be a positive integer (in milliseconds)." + + assert resp.body["reason"] == + "The heartbeat value should be a positive integer (in milliseconds)." end @tag :with_db test "Changes feed non-integer heartbeat", context do db_name = context[:db_name] - resp = Couch.get( - "/#{db_name}/_changes", - query: %{ - :feed => "continuous", - :heartbeat => "a1000" - } - ) + resp = + Couch.get( + "/#{db_name}/_changes", + query: %{ + :feed => "continuous", + :heartbeat => "a1000" + } + ) assert resp.status_code == 400 assert resp.body["error"] == "bad_request" - assert resp.body["reason"] == "Invalid heartbeat value. Expecting a positive integer value (in milliseconds)." + + assert resp.body["reason"] == + "Invalid heartbeat value. Expecting a positive integer value (in milliseconds)." + end + + @tag :with_db + test "function filtered changes", context do + db_name = context[:db_name] + create_filters_view(db_name) + + resp = Couch.get("/#{db_name}/_changes?filter=changes_filter/bop") + assert Enum.empty?(resp.body["results"]), "db must be empty" + + {:ok, doc_resp} = create_doc(db_name, %{bop: "foom"}) + rev = doc_resp.body["rev"] + id = doc_resp.body["id"] + create_doc(db_name, %{bop: false}) + + resp = Couch.get("/#{db_name}/_changes?filter=changes_filter/bop") + assert length(resp.body["results"]) == 1 + change_rev = get_change_rev_at(resp.body["results"], 0) + assert change_rev == rev + + doc = open_doc(db_name, id) + doc = Map.put(doc, "newattr", "a") + + doc = save_doc(db_name, doc) + + resp = Couch.get("/#{db_name}/_changes?filter=changes_filter/bop") + assert length(resp.body["results"]) == 1 + new_change_rev = get_change_rev_at(resp.body["results"], 0) + assert new_change_rev == doc["_rev"] + assert new_change_rev != change_rev + + resp = Couch.get("/#{db_name}/_changes?filter=changes_filter/dynamic&field=woox") + assert Enum.empty?(resp.body["results"]), "db must be empty" + + resp = Couch.get("/#{db_name}/_changes?filter=changes_filter/dynamic&field=bop") + assert length(resp.body["results"]) == 1, "db must have one change" + new_change_rev = get_change_rev_at(resp.body["results"], 0) + assert new_change_rev == doc["_rev"] + end + + @tag :with_db + test "non-existing desing doc for filtered changes", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_changes?filter=nothingtosee/bop") + assert resp.status_code == 404 + end + + @tag :with_db + test "non-existing function for filtered changes", context do + db_name = context[:db_name] + create_filters_view(db_name) + resp = Couch.get("/#{db_name}/_changes?filter=changes_filter/movealong") + assert resp.status_code == 404 + end + + @tag :with_db + test "non-existing desing doc and funcion for filtered changes", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_changes?filter=nothingtosee/movealong") + assert resp.status_code == 404 + end + + @tag :with_db + test "map function filtered changes", context do + db_name = context[:db_name] + create_filters_view(db_name) + create_doc(db_name, %{_id: "blah", bop: "plankton"}) + resp = Couch.get("/#{db_name}/_changes?filter=_view&view=changes_filter/blah") + assert length(resp.body["results"]) == 1 + assert Enum.at(resp.body["results"], 0)["id"] == "blah" + end + + @tag :with_db + test "changes limit", context do + db_name = context[:db_name] + + create_doc(db_name, %{_id: "blah", bop: "plankton"}) + create_doc(db_name, %{_id: "blah2", bop: "plankton"}) + create_doc(db_name, %{_id: "blah3", bop: "plankton"}) + + resp = Couch.get("/#{db_name}/_changes?limit=1") + assert length(resp.body["results"]) == 1 + + resp = Couch.get("/#{db_name}/_changes?limit=2") + assert length(resp.body["results"]) == 2 + end + + @tag :with_db + test "erlang function filtered changes", context do + db_name = context[:db_name] + create_erlang_filters_view(db_name) + + resp = Couch.get("/#{db_name}/_changes?filter=erlang/foo") + assert Enum.empty?(resp.body["results"]) + + create_doc(db_name, %{_id: "doc1", value: 1}) + create_doc(db_name, %{_id: "doc2", value: 2}) + create_doc(db_name, %{_id: "doc3", value: 3}) + create_doc(db_name, %{_id: "doc4", value: 4}) + + resp = Couch.get("/#{db_name}/_changes?filter=erlang/foo") + + changes_ids = + resp.body["results"] + |> Enum.map(fn p -> p["id"] end) + + assert Enum.member?(changes_ids, "doc2") + assert Enum.member?(changes_ids, "doc4") + assert length(resp.body["results"]) == 2 + end + + @tag :with_db + test "changes filtering on docids", context do + db_name = context[:db_name] + doc_ids = %{doc_ids: ["doc1", "doc3", "doc4"]} + + resp = + Couch.post("/#{db_name}/_changes?filter=_doc_ids", + body: doc_ids, + headers: ["Content-Type": "application/json"] + ) + + assert Enum.empty?(resp.body["results"]) + + create_doc(db_name, %{_id: "doc1", value: 1}) + create_doc(db_name, %{_id: "doc2", value: 2}) + + resp = + Couch.post("/#{db_name}/_changes?filter=_doc_ids", + body: doc_ids, + headers: ["Content-Type": "application/json"] + ) + + assert length(resp.body["results"]) == 1 + assert Enum.at(resp.body["results"], 0)["id"] == "doc1" + + create_doc(db_name, %{_id: "doc3", value: 3}) + + resp = + Couch.post("/#{db_name}/_changes?filter=_doc_ids", + body: doc_ids, + headers: ["Content-Type": "application/json"] + ) + + assert length(resp.body["results"]) == 2 + + changes_ids = + resp.body["results"] + |> Enum.map(fn p -> p["id"] end) + + assert Enum.member?(changes_ids, "doc1") + assert Enum.member?(changes_ids, "doc3") + + encoded_doc_ids = doc_ids.doc_ids |> :jiffy.encode() + + resp = + Couch.get("/#{db_name}/_changes", + query: %{filter: "_doc_ids", doc_ids: encoded_doc_ids} + ) + + assert length(resp.body["results"]) == 2 + + changes_ids = + resp.body["results"] + |> Enum.map(fn p -> p["id"] end) + + assert Enum.member?(changes_ids, "doc1") + assert Enum.member?(changes_ids, "doc3") + end + + @tag :with_db + test "changes filtering on design docs", context do + db_name = context[:db_name] + + create_erlang_filters_view(db_name) + create_doc(db_name, %{_id: "doc1", value: 1}) + + resp = Couch.get("/#{db_name}/_changes?filter=_design") + assert length(resp.body["results"]) == 1 + assert Enum.at(resp.body["results"], 0)["id"] == "_design/erlang" + end + + @tag :with_db + test "COUCHDB-1037-empty result for ?limit=1&filter=foo/bar in some cases", + context do + db_name = context[:db_name] + + filter_fun = """ + function(doc, req) { + return (typeof doc.integer === "number"); + } + """ + + ddoc = %{ + _id: "_design/testdocs", + language: "javascript", + filters: %{ + testdocsonly: filter_fun + } + } + + create_doc(db_name, ddoc) + + ddoc = %{ + _id: "_design/foobar", + foo: "bar" + } + + create_doc(db_name, ddoc) + bulk_save(db_name, make_docs(0..4)) + + resp = Couch.get("/#{db_name}/_changes") + assert length(resp.body["results"]) == 7 + + resp = Couch.get("/#{db_name}/_changes?limit=1&filter=testdocs/testdocsonly") + assert length(resp.body["results"]) == 1 + # we can't guarantee ordering + assert Regex.match?(~r/[0-4]/, Enum.at(resp.body["results"], 0)["id"]) + + resp = Couch.get("/#{db_name}/_changes?limit=2&filter=testdocs/testdocsonly") + assert length(resp.body["results"]) == 2 + # we can't guarantee ordering + assert Regex.match?(~r/[0-4]/, Enum.at(resp.body["results"], 0)["id"]) + assert Regex.match?(~r/[0-4]/, Enum.at(resp.body["results"], 1)["id"]) + end + + @tag :with_db + test "COUCHDB-1256", context do + db_name = context[:db_name] + {:ok, resp} = create_doc(db_name, %{_id: "foo", a: 123}) + create_doc(db_name, %{_id: "bar", a: 456}) + foo_rev = resp.body["rev"] + + Couch.put("/#{db_name}/foo?new_edits=false", + headers: ["Content-Type": "application/json"], + body: %{_rev: foo_rev, a: 456} + ) + + resp = Couch.get("/#{db_name}/_changes?style=all_docs") + assert length(resp.body["results"]) == 2 + + resp = + Couch.get("/#{db_name}/_changes", + query: %{style: "all_docs", since: Enum.at(resp.body["results"], 0)["seq"]} + ) + + assert length(resp.body["results"]) == 1 + end + + @tag :with_db + test "COUCHDB-1923", context do + db_name = context[:db_name] + attachment_data = "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + + docs = + make_docs(20..29, %{ + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: attachment_data + }, + "bar.txt": %{ + content_type: "text/plain", + data: attachment_data + } + } + }) + + bulk_save(db_name, docs) + + resp = Couch.get("/#{db_name}/_changes?include_docs=true") + assert length(resp.body["results"]) == 10 + + first_doc = Enum.at(resp.body["results"], 0)["doc"] + + assert first_doc["_attachments"]["foo.txt"]["stub"] + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "data") + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "encoding") + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "encoded_length") + assert first_doc["_attachments"]["bar.txt"]["stub"] + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "data") + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "encoding") + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "encoded_length") + + resp = Couch.get("/#{db_name}/_changes?include_docs=true&attachments=true") + assert length(resp.body["results"]) == 10 + + first_doc = Enum.at(resp.body["results"], 0)["doc"] + + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "stub") + assert first_doc["_attachments"]["foo.txt"]["data"] == attachment_data + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "encoding") + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "encoded_length") + + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "stub") + assert first_doc["_attachments"]["bar.txt"]["data"] == attachment_data + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "encoding") + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "encoded_length") + + resp = Couch.get("/#{db_name}/_changes?include_docs=true&att_encoding_info=true") + assert length(resp.body["results"]) == 10 + + first_doc = Enum.at(resp.body["results"], 0)["doc"] + + assert first_doc["_attachments"]["foo.txt"]["stub"] + assert not Enum.member?(first_doc["_attachments"]["foo.txt"], "data") + assert first_doc["_attachments"]["foo.txt"]["encoding"] == "gzip" + assert first_doc["_attachments"]["foo.txt"]["encoded_length"] == 47 + assert first_doc["_attachments"]["bar.txt"]["stub"] + assert not Enum.member?(first_doc["_attachments"]["bar.txt"], "data") + assert first_doc["_attachments"]["bar.txt"]["encoding"] == "gzip" + assert first_doc["_attachments"]["bar.txt"]["encoded_length"] == 47 + end + + defp create_erlang_filters_view(db_name) do + erlang_fun = """ + fun({Doc}, Req) -> + case couch_util:get_value(<<"value">>, Doc) of + undefined -> false; + Value -> (Value rem 2) =:= 0; + _ -> false + end + end. + """ + + ddoc = %{ + _id: "_design/erlang", + language: "erlang", + filters: %{ + foo: erlang_fun + } + } + + create_doc(db_name, ddoc) + end + + defp create_filters_view(db_name) do + dynamic_fun = """ + function(doc, req) { + var field = req.query.field; + return doc[field]; + } + """ + + userctx_fun = """ + function(doc, req) { + var field = req.query.field; + return doc[field]; + } + """ + + blah_fun = """ + function(doc) { + if (doc._id == "blah") { + emit(null, null); + } + } + """ + + ddoc = %{ + _id: "_design/changes_filter", + filters: %{ + bop: "function(doc, req) { return (doc.bop);}", + dynamic: dynamic_fun, + userCtx: userctx_fun, + conflicted: "function(doc, req) { return (doc._conflicts);}" + }, + options: %{ + local_seq: true + }, + views: %{ + local_seq: %{ + map: "function(doc) {emit(doc._local_seq, null)}" + }, + blah: %{ + map: blah_fun + } + } + } + + create_doc(db_name, ddoc) + end + + defp get_change_rev_at(results, idx) do + results + |> Enum.at(idx) + |> Map.fetch!("changes") + |> Enum.at(0) + |> Map.fetch!("rev") + end + + defp open_doc(db_name, id) do + resp = Couch.get("/#{db_name}/#{id}") + assert resp.status_code == 200 + resp.body + end + + defp save_doc(db_name, body) do + resp = Couch.put("/#{db_name}/#{body["_id"]}", body: body) + assert resp.status_code in [201, 202] + assert resp.body["ok"] + Map.put(body, "_rev", resp.body["rev"]) end end diff --git a/test/javascript/tests/changes.js b/test/javascript/tests/changes.js index d312edc41..d98e37cc8 100644 --- a/test/javascript/tests/changes.js +++ b/test/javascript/tests/changes.js @@ -11,6 +11,7 @@ // the License. function jsonp(obj) { + return console.log('done in test/elixir/test/changes_test.exs and changes_async_test.exs'); T(jsonp_flag == 0); T(obj.results.length == 1 && obj.last_seq == 1, "jsonp"); jsonp_flag = 1; @@ -359,7 +360,7 @@ couchTests.changes = function(debug) { resp = JSON.parse(req.responseText); T(resp.results.length == 1, "changes_filter/dynamic&field=bop"); T(resp.results[0].changes[0].rev == docres1.rev, "filtered/dynamic&field=bop rev"); - + // these will NEVER run as we're always in navigator == undefined if (!is_safari && xhr) { // full test requires parallel connections // filter with longpoll @@ -708,7 +709,7 @@ couchTests.changes = function(debug) { db = new CouchDB(db_name, {"X-Couch-Full-Commit":"true"}, {"w": 3}); T(db.createDb()); - // create 4 documents... this assumes the update sequnce will start from 0 and then do sth in the cluster + // create 4 documents... this assumes the update sequnce will start from 0 and then do sth in the cluster db.save({"bop" : "foom"}); db.save({"bop" : "foom"}); db.save({"bop" : "foom"}); @@ -717,7 +718,7 @@ couchTests.changes = function(debug) { req = CouchDB.request("GET", "/" + db_name + "/_changes"); // simulate an EventSource request with a Last-Event-ID header - // increase timeout to 100 to have enough time 2 assemble (seems like too little timeouts kill + // increase timeout to 100 to have enough time 2 assemble (seems like too little timeouts kill req = CouchDB.request("GET", "/" + db_name + "/_changes?feed=eventsource&timeout=100&since=0", {"headers": {"Accept": "text/event-stream", "Last-Event-ID": JSON.parse(req.responseText).results[1].seq}}); -- cgit v1.2.1 From fb30ac5e77e2aa2554d4bd0cd8d0cdd7dc1af537 Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Tue, 4 Feb 2020 17:49:23 +0100 Subject: Upgrade Credo to 1.2.2 --- mix.exs | 2 +- mix.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mix.exs b/mix.exs index d717e4b4a..29c81fa49 100644 --- a/mix.exs +++ b/mix.exs @@ -68,7 +68,7 @@ defmodule CouchDBTest.Mixfile do {:jiffy, path: Path.expand("src/jiffy", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, - {:credo, "~> 1.0.0", only: [:dev, :test, :integration], runtime: false} + {:credo, "~> 1.2.0", only: [:dev, :test, :integration], runtime: false} ] end diff --git a/mix.lock b/mix.lock index 30134f20f..c03e11f64 100644 --- a/mix.lock +++ b/mix.lock @@ -1,7 +1,7 @@ %{ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm", "805abd97539caf89ec6d4732c91e62ba9da0cda51ac462380bbd28ee697a8c42"}, - "credo": {:hex, :credo, "1.0.5", "fdea745579f8845315fe6a3b43e2f9f8866839cfbc8562bb72778e9fdaa94214", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "16105fac37c5c4b3f6e1f70ba0784511fec4275cd8bb979386e3c739cf4e6455"}, + "credo": {:hex, :credo, "1.2.2", "f57faf60e0a12b0ba9fd4bad07966057fde162b33496c509b95b027993494aab", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8f2623cd8c895a6f4a55ef10f3fdf6a55a9ca7bef09676bd835551687bf8a740"}, "excoveralls": {:hex, :excoveralls, "0.12.1", "a553c59f6850d0aff3770e4729515762ba7c8e41eedde03208182a8dc9d0ce07", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "5c1f717066a299b1b732249e736c5da96bb4120d1e55dc2e6f442d251e18a812"}, "hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "e0100f8ef7d1124222c11ad362c857d3df7cb5f4204054f9f0f4a728666591fc"}, "httpotion": {:hex, :httpotion, "3.1.3", "fdaf1e16b9318dcb722de57e75ac368c93d4c6e3c9125f93e960f953a750fb77", [:mix], [{:ibrowse, "== 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: false]}], "hexpm", "e420172ef697a0f1f4dc40f89a319d5a3aad90ec51fa424f08c115f04192ae43"}, -- cgit v1.2.1 From 6a44b3252e75a298766ab0d1db32521ccdd3fabf Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Tue, 4 Feb 2020 17:49:46 +0100 Subject: Disable legacy Credo checks incompatible with Elixir >= 1.9 --- .credo.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.credo.exs b/.credo.exs index c2ffd19d0..bd26f407c 100644 --- a/.credo.exs +++ b/.credo.exs @@ -119,7 +119,7 @@ {Credo.Check.Refactor.CyclomaticComplexity, false}, {Credo.Check.Refactor.FunctionArity, []}, {Credo.Check.Refactor.LongQuoteBlocks, false}, - {Credo.Check.Refactor.MapInto, []}, + {Credo.Check.Refactor.MapInto, false}, # Disabled since not compatible with Elixir > 1.9 {Credo.Check.Refactor.MatchInCondition, []}, {Credo.Check.Refactor.NegatedConditionsInUnless, []}, {Credo.Check.Refactor.NegatedConditionsWithElse, []}, @@ -138,7 +138,7 @@ {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, {Credo.Check.Warning.IExPry, []}, {Credo.Check.Warning.IoInspect, []}, - {Credo.Check.Warning.LazyLogging, []}, + {Credo.Check.Warning.LazyLogging, false}, # Disabled since not compatible with Elixir > 1.9 {Credo.Check.Warning.OperationOnSameValues, []}, {Credo.Check.Warning.OperationWithConstantResult, []}, {Credo.Check.Warning.RaiseInsideRescue, []}, -- cgit v1.2.1 From 65bc5b0eab5d4a3e902e63cd768c564c7a704082 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 27 Feb 2020 12:02:58 -0600 Subject: Bump to jiffy 1.0.4 --- rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config.script b/rebar.config.script index 2f7de3dc2..1dcad566c 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -157,7 +157,7 @@ DepDescs = [ {folsom, "folsom", {tag, "CouchDB-0.8.3"}}, {hyper, "hyper", {tag, "CouchDB-2.2.0-6"}}, {ibrowse, "ibrowse", {tag, "CouchDB-4.0.1-1"}}, -{jiffy, "jiffy", {tag, "CouchDB-1.0.3-1"}}, +{jiffy, "jiffy", {tag, "CouchDB-1.0.4-1"}}, {mochiweb, "mochiweb", {tag, "v2.20.0"}}, {meck, "meck", {tag, "0.8.8"}}, {recon, "recon", {tag, "2.5.0"}} -- cgit v1.2.1 From 7a33ca09e15b3a995afab373dbc9162ec9272d4a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 27 Feb 2020 14:02:51 -0600 Subject: Fix mem3_sync_event_listener test There's a race between the meck:wait call in setup and killing the config_event process. Its possible that we could kill and restart the config_event process after meck:wait returns, but before gen_event:add_sup_handler is called. More likely, we could end up killing the config_event gen_event process before its fully handled the add_sup_handler message and linked the notifier pid. This avoids the race by waiting for config_event to return that it has processed the add_sup_handler message instead of relying on meck:wait for the subscription call. --- src/mem3/src/mem3_sync_event_listener.erl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mem3/src/mem3_sync_event_listener.erl b/src/mem3/src/mem3_sync_event_listener.erl index b6fbe3279..cad34225d 100644 --- a/src/mem3/src/mem3_sync_event_listener.erl +++ b/src/mem3/src/mem3_sync_event_listener.erl @@ -236,7 +236,7 @@ teardown_all(_) -> setup() -> {ok, Pid} = ?MODULE:start_link(), erlang:unlink(Pid), - meck:wait(config_notifier, subscribe, '_', 1000), + wait_config_subscribed(Pid), Pid. teardown(Pid) -> @@ -338,4 +338,16 @@ wait_state(Pid, Field, Val) when is_pid(Pid), is_integer(Field) -> end, test_util:wait(WaitFun). + +wait_config_subscribed(Pid) -> + WaitFun = fun() -> + Handlers = gen_event:which_handlers(config_event), + Pids = [Id || {config_notifier, Id} <- Handlers], + case lists:member(Pid, Pids) of + true -> true; + false -> wait + end + end, + test_util:wait(WaitFun). + -endif. -- cgit v1.2.1 From d163648521f74f5b9d3085e8eab68b8339c4fc2a Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Thu, 27 Feb 2020 20:33:01 +0100 Subject: Port form_submit.js test to Elixir --- test/elixir/test/form_submit_test.exs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 test/elixir/test/form_submit_test.exs diff --git a/test/elixir/test/form_submit_test.exs b/test/elixir/test/form_submit_test.exs new file mode 100644 index 000000000..1baf947ac --- /dev/null +++ b/test/elixir/test/form_submit_test.exs @@ -0,0 +1,29 @@ +defmodule FormSubmitTest do + use CouchTestCase + + @moduletag :form_submit + + @moduledoc """ + Test that form submission is invalid + This is a port of form_submit.js + """ + + @tag :with_db + test "form submission gives back invalid content-type", context do + headers = [ + Referer: "http://127.0.0.1:15984", + "Content-Type": "application/x-www-form-urlencoded" + ] + + body = %{} + + %{:body => response_body, :status_code => status_code} = + Couch.post("/#{context[:db_name]}/baz", headers: headers, body: body) + + %{"error" => error, "reason" => reason} = response_body + + assert status_code == 415 + assert error == "bad_content_type" + assert reason == "Content-Type must be multipart/form-data" + end +end -- cgit v1.2.1 From 3f76c9f807ff16b5c2dcbaea7b85eb51350dde80 Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Thu, 27 Feb 2020 22:44:55 +0100 Subject: Mark form_submit JS test as ported in README and inside the test itself --- test/elixir/README.md | 2 +- test/javascript/tests/form_submit.js | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/elixir/README.md b/test/elixir/README.md index 0a3ce63d5..ee087c0b8 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -48,7 +48,7 @@ X means done, - means partially - [X] Port erlang_views.js - [X] Port etags_head.js - [ ] ~~Port etags_views.js~~ (skipped in js test suite) - - [ ] Port form_submit.js + - [X] Port form_submit.js - [ ] Port http.js - [X] Port invalid_docids.js - [ ] Port jsonp.js diff --git a/test/javascript/tests/form_submit.js b/test/javascript/tests/form_submit.js index 356182e8d..617686543 100644 --- a/test/javascript/tests/form_submit.js +++ b/test/javascript/tests/form_submit.js @@ -12,6 +12,8 @@ // Do some basic tests. couchTests.form_submit = function(debug) { + return console.log('done in test/elixir/test/form_summit_test.exs'); + var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); db.createDb(); -- cgit v1.2.1 From c6b54d6d1a7cbbacf877f7516547976b98c1c4c6 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Sat, 29 Feb 2020 16:30:56 +0100 Subject: doc: link README-DEV in README --- README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 47ce32e19..aaf4e17d3 100644 --- a/README.rst +++ b/README.rst @@ -60,7 +60,9 @@ Run a basic test suite for CouchDB by browsing here: Getting started with developing ------------------------------- -For more detail, read the README-DEV.rst file in this directory. +For more detail, read the README-DEV.rst_ file in this directory. + +.. _README-DEV.rst: https://github.com/apache/couchdb/blob/master/README-DEV.rst Basically you just have to install the needed dependencies which are documented in the install docs and then run ``./configure && make``. -- cgit v1.2.1 From 93d52635f2410e264d1a75c0b4d290f491fb492d Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Mon, 2 Mar 2020 17:54:07 +0100 Subject: feat: add mac ci (#2622) --- build-aux/Jenkinsfile.full | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/build-aux/Jenkinsfile.full b/build-aux/Jenkinsfile.full index 181e38871..d88525415 100644 --- a/build-aux/Jenkinsfile.full +++ b/build-aux/Jenkinsfile.full @@ -158,6 +158,43 @@ pipeline { } // post } // stage FreeBSD + stage('macOS') { + agent { + label 'macos' + } + steps { + // deleteDir is OK here because we're not inside of a Docker container! + deleteDir() + unstash 'tarball' + withEnv(['HOME='+pwd()]) { + sh ''' + PATH=/usr/local/bin:$PATH + export PATH + mkdir -p $COUCHDB_IO_LOG_DIR + + # Build CouchDB from tarball & test + mkdir build + cd build + tar -xzf $WORKSPACE/apache-couchdb-*.tar.gz + cd apache-couchdb-* + ./configure --with-curl --spidermonkey-version 60 + make check || (build-aux/logfile-uploader.py && false) + + # No package build for macOS at this time + ''' + } // withEnv + } // steps + post { + always { + junit '**/.eunit/*.xml, **/_build/*/lib/couchdbtest/*.xml, **/src/mango/nosetests.xml, **/test/javascript/junit.xml' + } + cleanup { + sh 'killall -9 beam.smp || true' + sh 'rm -rf ${WORKSPACE}/* ${COUCHDB_IO_LOG_DIR} || true' + } + } // post + } // stage macOS + stage('CentOS 6') { agent { docker { -- cgit v1.2.1 From fff07ba136cfa7433647e974355068b43e63199c Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Jun 2019 13:04:56 -0500 Subject: Update build system for FoundationDB --- .gitignore | 3 +++ Makefile | 6 +++++- dev/run | 9 ++++++++- rebar.config.script | 7 ++++++- rel/files/eunit.config | 3 ++- test/elixir/run-only | 3 +++ 6 files changed, 27 insertions(+), 4 deletions(-) create mode 100755 test/elixir/run-only diff --git a/.gitignore b/.gitignore index 60e6d145a..d1c106821 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ .DS_Store .vscode .rebar/ +.erlfdb/ .eunit/ cover/ log @@ -45,6 +46,7 @@ src/couch/priv/couch_js/**/*.d src/couch/priv/icu_driver/couch_icu_driver.d src/mango/src/mango_cursor_text.nocompile src/docs/ +src/erlfdb/ src/ets_lru/ src/excoveralls/ src/fauxton/ @@ -55,6 +57,7 @@ src/hyper/ src/ibrowse/ src/idna/ src/ioq/ +src/hqueue/ src/jiffy/ src/ken/ src/khash/ diff --git a/Makefile b/Makefile index e229ee55b..b5f6441ee 100644 --- a/Makefile +++ b/Makefile @@ -223,7 +223,11 @@ python-black-update: .venv/bin/black elixir: export MIX_ENV=integration elixir: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 elixir: elixir-init elixir-check-formatted elixir-credo devclean - @dev/run "$(TEST_OPTS)" -a adm:pass -n 1 --enable-erlang-views --no-eval 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' + @dev/run "$(TEST_OPTS)" -a adm:pass -n 1 --enable-erlang-views --no-eval --erlang-config=rel/files/eunit.config 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' + +.PHONY: elixir-only +elixir-only: devclean + @dev/run "$(TEST_OPTS)" -a adm:pass -n 1 --enable-erlang-views --no-eval --erlang-config=rel/files/eunit.config 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' .PHONY: elixir-init elixir-init: MIX_ENV=test diff --git a/dev/run b/dev/run index a96817d83..f125c049f 100755 --- a/dev/run +++ b/dev/run @@ -185,6 +185,12 @@ def get_args_parser(): default=[], help="Optional key=val config overrides. Can be repeated", ) + parser.add_option( + "--erlang-config", + dest="erlang_config", + default="rel/files/sys.config", + help="Specify an alternative Erlang application configuration" + ) parser.add_option( "--degrade-cluster", dest="degrade_cluster", @@ -233,6 +239,7 @@ def setup_context(opts, args): "haproxy": opts.haproxy, "haproxy_port": opts.haproxy_port, "config_overrides": opts.config_overrides, + "erlang_config": opts.erlang_config, "no_eval": opts.no_eval, "extra_args": opts.extra_args, "reset_logs": True, @@ -576,7 +583,7 @@ def boot_node(ctx, node): "-args_file", os.path.join(node_etcdir, "vm.args"), "-config", - os.path.join(reldir, "files", "sys"), + os.path.join(ctx["rootdir"], ctx["erlang_config"]), "-couch_ini", os.path.join(node_etcdir, "default.ini"), os.path.join(node_etcdir, "local.ini"), diff --git a/rebar.config.script b/rebar.config.script index 1dcad566c..1d11e7d36 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -160,7 +160,10 @@ DepDescs = [ {jiffy, "jiffy", {tag, "CouchDB-1.0.4-1"}}, {mochiweb, "mochiweb", {tag, "v2.20.0"}}, {meck, "meck", {tag, "0.8.8"}}, -{recon, "recon", {tag, "2.5.0"}} +{recon, "recon", {tag, "2.5.0"}}, + +%% TMP - Until this is moved to a proper Apache repo +{erlfdb, "erlfdb", {branch, "master"}} ], WithProper = lists:keyfind(with_proper, 1, CouchConfig) == {with_proper, true}, @@ -175,6 +178,8 @@ end, BaseUrl = "https://github.com/apache/", MakeDep = fun + ({erlfdb, _, Version}) -> + {erlfdb, ".*", {git, "https://github.com/cloudant-labs/couchdb-erlfdb", {branch, "master"}}}; ({AppName, {url, Url}, Version}) -> {AppName, ".*", {git, Url, Version}}; ({AppName, {url, Url}, Version, Options}) -> diff --git a/rel/files/eunit.config b/rel/files/eunit.config index 3c7457d3a..5e96fae9e 100644 --- a/rel/files/eunit.config +++ b/rel/files/eunit.config @@ -12,5 +12,6 @@ [ {kernel, [{error_logger, silent}]}, - {sasl, [{sasl_error_logger, false}]} + {sasl, [{sasl_error_logger, false}]}, + {fabric, [{eunit_run, true}]} ]. diff --git a/test/elixir/run-only b/test/elixir/run-only new file mode 100755 index 000000000..7c2a4aeea --- /dev/null +++ b/test/elixir/run-only @@ -0,0 +1,3 @@ +#!/bin/bash -e +cd "$(dirname "$0")" +mix test --trace "$@" -- cgit v1.2.1 From 95cb955b7c97cac34ab4f76afccadd029f9906a8 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Jun 2019 13:09:50 -0500 Subject: Disable eunit test suite in fabric Most of these tests are for quorum and clustered response handling which will no longer exist with FoundationDB. Eventually we'll want to go through these and pick out anything that is still applicable and ensure that we re-add them to the new test suite. --- src/fabric/src/fabric.erl | 100 +-- src/fabric/src/fabric_db_create.erl | 82 +- src/fabric/src/fabric_db_info.erl | 62 +- src/fabric/src/fabric_doc_open.erl | 852 ++++++++++---------- src/fabric/src/fabric_doc_open_revs.erl | 968 +++++++++++------------ src/fabric/src/fabric_doc_purge.erl | 690 ++++++++-------- src/fabric/src/fabric_doc_update.erl | 310 ++++---- src/fabric/src/fabric_rpc.erl | 38 +- src/fabric/src/fabric_streams.erl | 157 ++-- src/fabric/src/fabric_util.erl | 48 +- src/fabric/src/fabric_view.erl | 126 +-- src/fabric/src/fabric_view_changes.erl | 362 ++++----- src/fabric/test/eunit/fabric_rpc_purge_tests.erl | 307 ------- 13 files changed, 1898 insertions(+), 2204 deletions(-) delete mode 100644 src/fabric/test/eunit/fabric_rpc_purge_tests.erl diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 27fa8c045..bb538e2db 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -668,53 +668,53 @@ set_namespace(NS, #mrargs{extra = Extra} = Args) -> get_view_sig_from_filename(FilePath) -> filename:basename(filename:basename(FilePath, ".view"), ".compact"). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -update_doc_test_() -> - { - "Update doc tests", { - setup, fun setup/0, fun teardown/1, - fun(Ctx) -> [ - should_throw_conflict(Ctx) - ] end - } - }. - -should_throw_conflict(Doc) -> - ?_test(begin - ?assertThrow(conflict, update_doc(<<"test-db">>, Doc, [])) - end). - - -setup() -> - Doc = #doc{ - id = <<"test_doc">>, - revs = {3, [<<5,68,252,180,43,161,216,223,26,119,71,219,212,229, - 159,113>>]}, - body = {[{<<"foo">>,<<"asdf">>},{<<"author">>,<<"tom">>}]}, - atts = [], deleted = false, meta = [] - }, - ok = application:ensure_started(config), - ok = meck:expect(mem3, shards, fun(_, _) -> [] end), - ok = meck:expect(mem3, quorum, fun(_) -> 1 end), - ok = meck:expect(rexi, cast, fun(_, _) -> ok end), - ok = meck:expect(rexi_utils, recv, - fun(_, _, _, _, _, _) -> - {ok, {error, [{Doc, conflict}]}} - end), - ok = meck:expect(couch_util, reorder_results, - fun(_, [{_, Res}]) -> - [Res] - end), - ok = meck:expect(fabric_util, create_monitors, fun(_) -> ok end), - ok = meck:expect(rexi_monitor, stop, fun(_) -> ok end), - Doc. - - -teardown(_) -> - meck:unload(), - ok = application:stop(config). - - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% update_doc_test_() -> +%% { +%% "Update doc tests", { +%% setup, fun setup/0, fun teardown/1, +%% fun(Ctx) -> [ +%% should_throw_conflict(Ctx) +%% ] end +%% } +%% }. +%% +%% should_throw_conflict(Doc) -> +%% ?_test(begin +%% ?assertThrow(conflict, update_doc(<<"test-db">>, Doc, [])) +%% end). +%% +%% +%% setup() -> +%% Doc = #doc{ +%% id = <<"test_doc">>, +%% revs = {3, [<<5,68,252,180,43,161,216,223,26,119,71,219,212,229, +%% 159,113>>]}, +%% body = {[{<<"foo">>,<<"asdf">>},{<<"author">>,<<"tom">>}]}, +%% atts = [], deleted = false, meta = [] +%% }, +%% ok = application:ensure_started(config), +%% ok = meck:expect(mem3, shards, fun(_, _) -> [] end), +%% ok = meck:expect(mem3, quorum, fun(_) -> 1 end), +%% ok = meck:expect(rexi, cast, fun(_, _) -> ok end), +%% ok = meck:expect(rexi_utils, recv, +%% fun(_, _, _, _, _, _) -> +%% {ok, {error, [{Doc, conflict}]}} +%% end), +%% ok = meck:expect(couch_util, reorder_results, +%% fun(_, [{_, Res}]) -> +%% [Res] +%% end), +%% ok = meck:expect(fabric_util, create_monitors, fun(_) -> ok end), +%% ok = meck:expect(rexi_monitor, stop, fun(_) -> ok end), +%% Doc. +%% +%% +%% teardown(_) -> +%% meck:unload(), +%% ok = application:stop(config). +%% +%% +%% -endif. diff --git a/src/fabric/src/fabric_db_create.erl b/src/fabric/src/fabric_db_create.erl index 03fabb4ea..a2833e6aa 100644 --- a/src/fabric/src/fabric_db_create.erl +++ b/src/fabric/src/fabric_db_create.erl @@ -185,44 +185,44 @@ make_document([#shard{dbname=DbName}|_] = Shards, Suffix, Options) -> db_exists(DbName) -> is_list(catch mem3:shards(DbName)). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -db_exists_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - [ - fun db_exists_for_existing_db/0, - fun db_exists_for_missing_db/0 - ] - }. - - -setup_all() -> - meck:new(mem3). - - -teardown_all(_) -> - meck:unload(). - - -db_exists_for_existing_db() -> - Mock = fun(DbName) when is_binary(DbName) -> - [#shard{dbname = DbName, range = [0,100]}] - end, - ok = meck:expect(mem3, shards, Mock), - ?assertEqual(true, db_exists(<<"foobar">>)), - ?assertEqual(true, meck:validate(mem3)). - - -db_exists_for_missing_db() -> - Mock = fun(DbName) -> - erlang:error(database_does_not_exist, DbName) - end, - ok = meck:expect(mem3, shards, Mock), - ?assertEqual(false, db_exists(<<"foobar">>)), - ?assertEqual(false, meck:validate(mem3)). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% db_exists_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% [ +%% fun db_exists_for_existing_db/0, +%% fun db_exists_for_missing_db/0 +%% ] +%% }. +%% +%% +%% setup_all() -> +%% meck:new(mem3). +%% +%% +%% teardown_all(_) -> +%% meck:unload(). +%% +%% +%% db_exists_for_existing_db() -> +%% Mock = fun(DbName) when is_binary(DbName) -> +%% [#shard{dbname = DbName, range = [0,100]}] +%% end, +%% ok = meck:expect(mem3, shards, Mock), +%% ?assertEqual(true, db_exists(<<"foobar">>)), +%% ?assertEqual(true, meck:validate(mem3)). +%% +%% +%% db_exists_for_missing_db() -> +%% Mock = fun(DbName) -> +%% erlang:error(database_does_not_exist, DbName) +%% end, +%% ok = meck:expect(mem3, shards, Mock), +%% ?assertEqual(false, db_exists(<<"foobar">>)), +%% ?assertEqual(false, meck:validate(mem3)). +%% +%% -endif. diff --git a/src/fabric/src/fabric_db_info.erl b/src/fabric/src/fabric_db_info.erl index 40da678e5..6c7d2d177 100644 --- a/src/fabric/src/fabric_db_info.erl +++ b/src/fabric/src/fabric_db_info.erl @@ -138,34 +138,34 @@ get_cluster_info(Shards) -> {ok, [{q, Q}, {n, N}, {w, WR}, {r, WR}]}. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -get_cluster_info_test_() -> - { - setup, - fun setup/0, - fun get_cluster_info_test_generator/1 - }. - - -setup() -> - Quorums = [1, 2, 3], - Shards = [1, 3, 5, 8, 12, 24], - [{N, Q} || N <- Quorums, Q <- Shards]. - -get_cluster_info_test_generator([]) -> - []; -get_cluster_info_test_generator([{N, Q} | Rest]) -> - {generator, - fun() -> - Nodes = lists:seq(1, 8), - Shards = mem3_util:create_partition_map(<<"foo">>, N, Q, Nodes), - {ok, Info} = get_cluster_info(Shards), - [ - ?_assertEqual(N, couch_util:get_value(n, Info)), - ?_assertEqual(Q, couch_util:get_value(q, Info)) - ] ++ get_cluster_info_test_generator(Rest) - end}. - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% get_cluster_info_test_() -> +%% { +%% setup, +%% fun setup/0, +%% fun get_cluster_info_test_generator/1 +%% }. +%% +%% +%% setup() -> +%% Quorums = [1, 2, 3], +%% Shards = [1, 3, 5, 8, 12, 24], +%% [{N, Q} || N <- Quorums, Q <- Shards]. +%% +%% get_cluster_info_test_generator([]) -> +%% []; +%% get_cluster_info_test_generator([{N, Q} | Rest]) -> +%% {generator, +%% fun() -> +%% Nodes = lists:seq(1, 8), +%% Shards = mem3_util:create_partition_map(<<"foo">>, N, Q, Nodes), +%% {ok, Info} = get_cluster_info(Shards), +%% [ +%% ?_assertEqual(N, couch_util:get_value(n, Info)), +%% ?_assertEqual(Q, couch_util:get_value(q, Info)) +%% ] ++ get_cluster_info_test_generator(Rest) +%% end}. +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_open.erl b/src/fabric/src/fabric_doc_open.erl index 8ef604b60..fe3a79a1f 100644 --- a/src/fabric/src/fabric_doc_open.erl +++ b/src/fabric/src/fabric_doc_open.erl @@ -182,429 +182,429 @@ format_reply(Else, _) -> Else. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - --define(MECK_MODS, [ - couch_log, - couch_stats, - fabric, - fabric_util, - mem3, - rexi, - rexi_monitor -]). - - -setup_all() -> - meck:new(?MECK_MODS, [passthrough]). - - -teardown_all(_) -> - meck:unload(). - - -setup() -> - meck:reset(?MECK_MODS). - - -teardown(_) -> - ok. - - -open_doc_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_is_r_met(), - t_handle_message_down(), - t_handle_message_exit(), - t_handle_message_reply(), - t_store_node_revs(), - t_read_repair(), - t_handle_response_quorum_met(), - t_get_doc_info() - ] - } - }. - - -t_is_r_met() -> - ?_test(begin - Workers0 = [], - Workers1 = [nil], - Workers2 = [nil, nil], - - SuccessCases = [ - {{true, foo}, [fabric_util:kv(foo, 2)], 2}, - {{true, foo}, [fabric_util:kv(foo, 3)], 2}, - {{true, foo}, [fabric_util:kv(foo, 1)], 1}, - {{true, foo}, [fabric_util:kv(foo, 2), fabric_util:kv(bar, 1)], 2}, - {{true, bar}, [fabric_util:kv(bar, 1), fabric_util:kv(bar, 2)], 2}, - {{true, bar}, [fabric_util:kv(bar, 2), fabric_util:kv(foo, 1)], 2} - ], - lists:foreach(fun({Expect, Replies, Q}) -> - ?assertEqual(Expect, is_r_met(Workers0, Replies, Q)) - end, SuccessCases), - - WaitForMoreCases = [ - {[fabric_util:kv(foo, 1)], 2}, - {[fabric_util:kv(foo, 2)], 3}, - {[fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2} - ], - lists:foreach(fun({Replies, Q}) -> - ?assertEqual(wait_for_more, is_r_met(Workers2, Replies, Q)) - end, WaitForMoreCases), - - FailureCases = [ - {Workers0, [fabric_util:kv(foo, 1)], 2}, - {Workers1, [fabric_util:kv(foo, 1)], 2}, - {Workers1, [fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2}, - {Workers1, [fabric_util:kv(foo, 2)], 3} - ], - lists:foreach(fun({Workers, Replies, Q}) -> - ?assertEqual(no_more_workers, is_r_met(Workers, Replies, Q)) - end, FailureCases) - end). - - -t_handle_message_down() -> - Node0 = 'foo@localhost', - Node1 = 'bar@localhost', - Down0 = {rexi_DOWN, nil, {nil, Node0}, nil}, - Down1 = {rexi_DOWN, nil, {nil, Node1}, nil}, - Workers0 = [#shard{node=Node0} || _ <- [a, b]], - Worker1 = #shard{node=Node1}, - Workers1 = Workers0 ++ [Worker1], - - ?_test(begin - % Stop when no more workers are left - ?assertEqual( - {stop, #acc{workers=[]}}, - handle_message(Down0, nil, #acc{workers=Workers0}) - ), - - % Continue when we have more workers - ?assertEqual( - {ok, #acc{workers=[Worker1]}}, - handle_message(Down0, nil, #acc{workers=Workers1}) - ), - - % A second DOWN removes the remaining workers - ?assertEqual( - {stop, #acc{workers=[]}}, - handle_message(Down1, nil, #acc{workers=[Worker1]}) - ) - end). - - -t_handle_message_exit() -> - Exit = {rexi_EXIT, nil}, - Worker0 = #shard{ref=erlang:make_ref()}, - Worker1 = #shard{ref=erlang:make_ref()}, - - ?_test(begin - % Only removes the specified worker - ?assertEqual( - {ok, #acc{workers=[Worker1]}}, - handle_message(Exit, Worker0, #acc{workers=[Worker0, Worker1]}) - ), - - ?assertEqual( - {ok, #acc{workers=[Worker0]}}, - handle_message(Exit, Worker1, #acc{workers=[Worker0, Worker1]}) - ), - - % We bail if it was the last worker - ?assertEqual( - {stop, #acc{workers=[]}}, - handle_message(Exit, Worker0, #acc{workers=[Worker0]}) - ) - end). - - -t_handle_message_reply() -> - Worker0 = #shard{ref=erlang:make_ref()}, - Worker1 = #shard{ref=erlang:make_ref()}, - Worker2 = #shard{ref=erlang:make_ref()}, - Workers = [Worker0, Worker1, Worker2], - Acc0 = #acc{workers=Workers, r=2, replies=[]}, - - ?_test(begin - meck:expect(rexi, kill_all, fun(_) -> ok end), - - % Test that we continue when we haven't met R yet - ?assertMatch( - {ok, #acc{ - workers=[Worker0, Worker1], - replies=[{foo, {foo, 1}}] - }}, - handle_message(foo, Worker2, Acc0) - ), - - ?assertMatch( - {ok, #acc{ - workers=[Worker0, Worker1], - replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] - }}, - handle_message(bar, Worker2, Acc0#acc{ - replies=[{foo, {foo, 1}}] - }) - ), - - % Test that we don't get a quorum when R isn't met. q_reply - % isn't set and state remains unchanged and {stop, NewAcc} - % is returned. Bit subtle on the assertions here. - - ?assertMatch( - {stop, #acc{workers=[], replies=[{foo, {foo, 1}}]}}, - handle_message(foo, Worker0, Acc0#acc{workers=[Worker0]}) - ), - - ?assertMatch( - {stop, #acc{ - workers=[], - replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] - }}, - handle_message(bar, Worker0, Acc0#acc{ - workers=[Worker0], - replies=[{foo, {foo, 1}}] - }) - ), - - % Check that when R is met we stop with a new state and - % a q_reply. - - ?assertMatch( - {stop, #acc{ - workers=[], - replies=[{foo, {foo, 2}}], - state=r_met, - q_reply=foo - }}, - handle_message(foo, Worker1, Acc0#acc{ - workers=[Worker0, Worker1], - replies=[{foo, {foo, 1}}] - }) - ), - - ?assertEqual( - {stop, #acc{ - workers=[], - r=1, - replies=[{foo, {foo, 1}}], - state=r_met, - q_reply=foo - }}, - handle_message(foo, Worker0, Acc0#acc{r=1}) - ), - - ?assertMatch( - {stop, #acc{ - workers=[], - replies=[{bar, {bar, 1}}, {foo, {foo, 2}}], - state=r_met, - q_reply=foo - }}, - handle_message(foo, Worker0, Acc0#acc{ - workers=[Worker0], - replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] - }) - ) - end). - - -t_store_node_revs() -> - W1 = #shard{node = w1, ref = erlang:make_ref()}, - W2 = #shard{node = w2, ref = erlang:make_ref()}, - W3 = #shard{node = w3, ref = erlang:make_ref()}, - Foo1 = {ok, #doc{id = <<"bar">>, revs = {1, [<<"foo">>]}}}, - Foo2 = {ok, #doc{id = <<"bar">>, revs = {2, [<<"foo2">>, <<"foo">>]}}}, - NFM = {not_found, missing}, - - InitAcc = #acc{workers = [W1, W2, W3], replies = [], r = 2}, - - ?_test(begin - meck:expect(rexi, kill_all, fun(_) -> ok end), - - % Simple case - {ok, #acc{node_revs = NodeRevs1}} = handle_message(Foo1, W1, InitAcc), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs1), - - % Make sure we only hold the head rev - {ok, #acc{node_revs = NodeRevs2}} = handle_message(Foo2, W1, InitAcc), - ?assertEqual([{w1, [{2, <<"foo2">>}]}], NodeRevs2), - - % Make sure we don't capture anything on error - {ok, #acc{node_revs = NodeRevs3}} = handle_message(NFM, W1, InitAcc), - ?assertEqual([], NodeRevs3), - - % Make sure we accumulate node revs - Acc1 = InitAcc#acc{node_revs = [{w1, [{1, <<"foo">>}]}]}, - {ok, #acc{node_revs = NodeRevs4}} = handle_message(Foo2, W2, Acc1), - ?assertEqual( - [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], - NodeRevs4 - ), - - % Make sure rexi_DOWN doesn't modify node_revs - Down = {rexi_DOWN, nil, {nil, w1}, nil}, - {ok, #acc{node_revs = NodeRevs5}} = handle_message(Down, W2, Acc1), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs5), - - % Make sure rexi_EXIT doesn't modify node_revs - Exit = {rexi_EXIT, reason}, - {ok, #acc{node_revs = NodeRevs6}} = handle_message(Exit, W2, Acc1), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs6), - - % Make sure an error doesn't remove any node revs - {ok, #acc{node_revs = NodeRevs7}} = handle_message(NFM, W2, Acc1), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs7), - - % Make sure we have all of our node_revs when meeting - % quorum - {ok, Acc2} = handle_message(Foo1, W1, InitAcc), - {ok, Acc3} = handle_message(Foo2, W2, Acc2), - {stop, Acc4} = handle_message(NFM, W3, Acc3), - ?assertEqual( - [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], - Acc4#acc.node_revs - ) - end). - - -t_read_repair() -> - Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, - Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, - NFM = {not_found, missing}, - - ?_test(begin - meck:expect(couch_log, notice, fun(_, _) -> ok end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - - % Test when we have actual doc data to repair - meck:expect(fabric, update_docs, fun(_, [_], _) -> {ok, []} end), - Acc0 = #acc{ - dbname = <<"name">>, - replies = [fabric_util:kv(Foo1,1)] - }, - ?assertEqual(Foo1, read_repair(Acc0)), - - meck:expect(fabric, update_docs, fun(_, [_, _], _) -> {ok, []} end), - Acc1 = #acc{ - dbname = <<"name">>, - replies = [fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,1)] - }, - ?assertEqual(Foo2, read_repair(Acc1)), - - % Test when we have nothing but errors - Acc2 = #acc{replies=[fabric_util:kv(NFM, 1)]}, - ?assertEqual(NFM, read_repair(Acc2)), - - Acc3 = #acc{replies=[fabric_util:kv(NFM,1), fabric_util:kv(foo,2)]}, - ?assertEqual(NFM, read_repair(Acc3)), - - Acc4 = #acc{replies=[fabric_util:kv(foo,1), fabric_util:kv(bar,1)]}, - ?assertEqual(bar, read_repair(Acc4)) - end). - - -t_handle_response_quorum_met() -> - Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, - Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, - Bar1 = {ok, #doc{revs = {1,[<<"bar">>]}}}, - - ?_test(begin - meck:expect(couch_log, notice, fun(_, _) -> ok end), - meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - - BasicOkAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,2)], - q_reply=Foo1 - }, - ?assertEqual(Foo1, handle_response(BasicOkAcc)), - - WithAncestorsAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,2)], - q_reply=Foo2 - }, - ?assertEqual(Foo2, handle_response(WithAncestorsAcc)), - - % This also checks when the quorum isn't the most recent - % revision. - DeeperWinsAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,2), fabric_util:kv(Foo2,1)], - q_reply=Foo1 - }, - ?assertEqual(Foo2, handle_response(DeeperWinsAcc)), - - % Check that we return the proper doc based on rev - % (ie, pos is equal) - BiggerRevWinsAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Bar1,2)], - q_reply=Bar1 - }, - ?assertEqual(Foo1, handle_response(BiggerRevWinsAcc)) - - % r_not_met is a proxy to read_repair so we rely on - % read_repair_test for those conditions. - end). - - -t_get_doc_info() -> - ?_test(begin - meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - meck:expect(fabric_util, submit_jobs, fun(_, _, _) -> ok end), - meck:expect(fabric_util, create_monitors, fun(_) -> ok end), - meck:expect(rexi_monitor, stop, fun(_) -> ok end), - meck:expect(mem3, shards, fun(_, _) -> ok end), - meck:expect(mem3, n, fun(_) -> 3 end), - meck:expect(mem3, quorum, fun(_) -> 2 end), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - {ok, #acc{state = r_not_met}} - end), - Rsp1 = fabric_doc_open:go("test", "one", [doc_info]), - ?assertEqual({error, quorum_not_met}, Rsp1), - - Rsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), - ?assertEqual({error, quorum_not_met}, Rsp2), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - {ok, #acc{state = r_met, q_reply = not_found}} - end), - MissingRsp1 = fabric_doc_open:go("test", "one", [doc_info]), - ?assertEqual({not_found, missing}, MissingRsp1), - MissingRsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), - ?assertEqual({not_found, missing}, MissingRsp2), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - A = #doc_info{}, - {ok, #acc{state = r_met, q_reply = {ok, A}}} - end), - {ok, Rec1} = fabric_doc_open:go("test", "one", [doc_info]), - ?assert(is_record(Rec1, doc_info)), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - A = #full_doc_info{deleted = true}, - {ok, #acc{state = r_met, q_reply = {ok, A}}} - end), - Rsp3 = fabric_doc_open:go("test", "one", [{doc_info, full}]), - ?assertEqual({not_found, deleted}, Rsp3), - {ok, Rec2} = fabric_doc_open:go("test", "one", [{doc_info, full},deleted]), - ?assert(is_record(Rec2, full_doc_info)) - end). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% -define(MECK_MODS, [ +%% couch_log, +%% couch_stats, +%% fabric, +%% fabric_util, +%% mem3, +%% rexi, +%% rexi_monitor +%% ]). +%% +%% +%% setup_all() -> +%% meck:new(?MECK_MODS, [passthrough]). +%% +%% +%% teardown_all(_) -> +%% meck:unload(). +%% +%% +%% setup() -> +%% meck:reset(?MECK_MODS). +%% +%% +%% teardown(_) -> +%% ok. +%% +%% +%% open_doc_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% { +%% foreach, +%% fun setup/0, +%% fun teardown/1, +%% [ +%% t_is_r_met(), +%% t_handle_message_down(), +%% t_handle_message_exit(), +%% t_handle_message_reply(), +%% t_store_node_revs(), +%% t_read_repair(), +%% t_handle_response_quorum_met(), +%% t_get_doc_info() +%% ] +%% } +%% }. +%% +%% +%% t_is_r_met() -> +%% ?_test(begin +%% Workers0 = [], +%% Workers1 = [nil], +%% Workers2 = [nil, nil], +%% +%% SuccessCases = [ +%% {{true, foo}, [fabric_util:kv(foo, 2)], 2}, +%% {{true, foo}, [fabric_util:kv(foo, 3)], 2}, +%% {{true, foo}, [fabric_util:kv(foo, 1)], 1}, +%% {{true, foo}, [fabric_util:kv(foo, 2), fabric_util:kv(bar, 1)], 2}, +%% {{true, bar}, [fabric_util:kv(bar, 1), fabric_util:kv(bar, 2)], 2}, +%% {{true, bar}, [fabric_util:kv(bar, 2), fabric_util:kv(foo, 1)], 2} +%% ], +%% lists:foreach(fun({Expect, Replies, Q}) -> +%% ?assertEqual(Expect, is_r_met(Workers0, Replies, Q)) +%% end, SuccessCases), +%% +%% WaitForMoreCases = [ +%% {[fabric_util:kv(foo, 1)], 2}, +%% {[fabric_util:kv(foo, 2)], 3}, +%% {[fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2} +%% ], +%% lists:foreach(fun({Replies, Q}) -> +%% ?assertEqual(wait_for_more, is_r_met(Workers2, Replies, Q)) +%% end, WaitForMoreCases), +%% +%% FailureCases = [ +%% {Workers0, [fabric_util:kv(foo, 1)], 2}, +%% {Workers1, [fabric_util:kv(foo, 1)], 2}, +%% {Workers1, [fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2}, +%% {Workers1, [fabric_util:kv(foo, 2)], 3} +%% ], +%% lists:foreach(fun({Workers, Replies, Q}) -> +%% ?assertEqual(no_more_workers, is_r_met(Workers, Replies, Q)) +%% end, FailureCases) +%% end). +%% +%% +%% t_handle_message_down() -> +%% Node0 = 'foo@localhost', +%% Node1 = 'bar@localhost', +%% Down0 = {rexi_DOWN, nil, {nil, Node0}, nil}, +%% Down1 = {rexi_DOWN, nil, {nil, Node1}, nil}, +%% Workers0 = [#shard{node=Node0} || _ <- [a, b]], +%% Worker1 = #shard{node=Node1}, +%% Workers1 = Workers0 ++ [Worker1], +%% +%% ?_test(begin +%% % Stop when no more workers are left +%% ?assertEqual( +%% {stop, #acc{workers=[]}}, +%% handle_message(Down0, nil, #acc{workers=Workers0}) +%% ), +%% +%% % Continue when we have more workers +%% ?assertEqual( +%% {ok, #acc{workers=[Worker1]}}, +%% handle_message(Down0, nil, #acc{workers=Workers1}) +%% ), +%% +%% % A second DOWN removes the remaining workers +%% ?assertEqual( +%% {stop, #acc{workers=[]}}, +%% handle_message(Down1, nil, #acc{workers=[Worker1]}) +%% ) +%% end). +%% +%% +%% t_handle_message_exit() -> +%% Exit = {rexi_EXIT, nil}, +%% Worker0 = #shard{ref=erlang:make_ref()}, +%% Worker1 = #shard{ref=erlang:make_ref()}, +%% +%% ?_test(begin +%% % Only removes the specified worker +%% ?assertEqual( +%% {ok, #acc{workers=[Worker1]}}, +%% handle_message(Exit, Worker0, #acc{workers=[Worker0, Worker1]}) +%% ), +%% +%% ?assertEqual( +%% {ok, #acc{workers=[Worker0]}}, +%% handle_message(Exit, Worker1, #acc{workers=[Worker0, Worker1]}) +%% ), +%% +%% % We bail if it was the last worker +%% ?assertEqual( +%% {stop, #acc{workers=[]}}, +%% handle_message(Exit, Worker0, #acc{workers=[Worker0]}) +%% ) +%% end). +%% +%% +%% t_handle_message_reply() -> +%% Worker0 = #shard{ref=erlang:make_ref()}, +%% Worker1 = #shard{ref=erlang:make_ref()}, +%% Worker2 = #shard{ref=erlang:make_ref()}, +%% Workers = [Worker0, Worker1, Worker2], +%% Acc0 = #acc{workers=Workers, r=2, replies=[]}, +%% +%% ?_test(begin +%% meck:expect(rexi, kill_all, fun(_) -> ok end), +%% +%% % Test that we continue when we haven't met R yet +%% ?assertMatch( +%% {ok, #acc{ +%% workers=[Worker0, Worker1], +%% replies=[{foo, {foo, 1}}] +%% }}, +%% handle_message(foo, Worker2, Acc0) +%% ), +%% +%% ?assertMatch( +%% {ok, #acc{ +%% workers=[Worker0, Worker1], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] +%% }}, +%% handle_message(bar, Worker2, Acc0#acc{ +%% replies=[{foo, {foo, 1}}] +%% }) +%% ), +%% +%% % Test that we don't get a quorum when R isn't met. q_reply +%% % isn't set and state remains unchanged and {stop, NewAcc} +%% % is returned. Bit subtle on the assertions here. +%% +%% ?assertMatch( +%% {stop, #acc{workers=[], replies=[{foo, {foo, 1}}]}}, +%% handle_message(foo, Worker0, Acc0#acc{workers=[Worker0]}) +%% ), +%% +%% ?assertMatch( +%% {stop, #acc{ +%% workers=[], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] +%% }}, +%% handle_message(bar, Worker0, Acc0#acc{ +%% workers=[Worker0], +%% replies=[{foo, {foo, 1}}] +%% }) +%% ), +%% +%% % Check that when R is met we stop with a new state and +%% % a q_reply. +%% +%% ?assertMatch( +%% {stop, #acc{ +%% workers=[], +%% replies=[{foo, {foo, 2}}], +%% state=r_met, +%% q_reply=foo +%% }}, +%% handle_message(foo, Worker1, Acc0#acc{ +%% workers=[Worker0, Worker1], +%% replies=[{foo, {foo, 1}}] +%% }) +%% ), +%% +%% ?assertEqual( +%% {stop, #acc{ +%% workers=[], +%% r=1, +%% replies=[{foo, {foo, 1}}], +%% state=r_met, +%% q_reply=foo +%% }}, +%% handle_message(foo, Worker0, Acc0#acc{r=1}) +%% ), +%% +%% ?assertMatch( +%% {stop, #acc{ +%% workers=[], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 2}}], +%% state=r_met, +%% q_reply=foo +%% }}, +%% handle_message(foo, Worker0, Acc0#acc{ +%% workers=[Worker0], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] +%% }) +%% ) +%% end). +%% +%% +%% t_store_node_revs() -> +%% W1 = #shard{node = w1, ref = erlang:make_ref()}, +%% W2 = #shard{node = w2, ref = erlang:make_ref()}, +%% W3 = #shard{node = w3, ref = erlang:make_ref()}, +%% Foo1 = {ok, #doc{id = <<"bar">>, revs = {1, [<<"foo">>]}}}, +%% Foo2 = {ok, #doc{id = <<"bar">>, revs = {2, [<<"foo2">>, <<"foo">>]}}}, +%% NFM = {not_found, missing}, +%% +%% InitAcc = #acc{workers = [W1, W2, W3], replies = [], r = 2}, +%% +%% ?_test(begin +%% meck:expect(rexi, kill_all, fun(_) -> ok end), +%% +%% % Simple case +%% {ok, #acc{node_revs = NodeRevs1}} = handle_message(Foo1, W1, InitAcc), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs1), +%% +%% % Make sure we only hold the head rev +%% {ok, #acc{node_revs = NodeRevs2}} = handle_message(Foo2, W1, InitAcc), +%% ?assertEqual([{w1, [{2, <<"foo2">>}]}], NodeRevs2), +%% +%% % Make sure we don't capture anything on error +%% {ok, #acc{node_revs = NodeRevs3}} = handle_message(NFM, W1, InitAcc), +%% ?assertEqual([], NodeRevs3), +%% +%% % Make sure we accumulate node revs +%% Acc1 = InitAcc#acc{node_revs = [{w1, [{1, <<"foo">>}]}]}, +%% {ok, #acc{node_revs = NodeRevs4}} = handle_message(Foo2, W2, Acc1), +%% ?assertEqual( +%% [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], +%% NodeRevs4 +%% ), +%% +%% % Make sure rexi_DOWN doesn't modify node_revs +%% Down = {rexi_DOWN, nil, {nil, w1}, nil}, +%% {ok, #acc{node_revs = NodeRevs5}} = handle_message(Down, W2, Acc1), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs5), +%% +%% % Make sure rexi_EXIT doesn't modify node_revs +%% Exit = {rexi_EXIT, reason}, +%% {ok, #acc{node_revs = NodeRevs6}} = handle_message(Exit, W2, Acc1), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs6), +%% +%% % Make sure an error doesn't remove any node revs +%% {ok, #acc{node_revs = NodeRevs7}} = handle_message(NFM, W2, Acc1), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs7), +%% +%% % Make sure we have all of our node_revs when meeting +%% % quorum +%% {ok, Acc2} = handle_message(Foo1, W1, InitAcc), +%% {ok, Acc3} = handle_message(Foo2, W2, Acc2), +%% {stop, Acc4} = handle_message(NFM, W3, Acc3), +%% ?assertEqual( +%% [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], +%% Acc4#acc.node_revs +%% ) +%% end). +%% +%% +%% t_read_repair() -> +%% Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, +%% Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, +%% NFM = {not_found, missing}, +%% +%% ?_test(begin +%% meck:expect(couch_log, notice, fun(_, _) -> ok end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% +%% % Test when we have actual doc data to repair +%% meck:expect(fabric, update_docs, fun(_, [_], _) -> {ok, []} end), +%% Acc0 = #acc{ +%% dbname = <<"name">>, +%% replies = [fabric_util:kv(Foo1,1)] +%% }, +%% ?assertEqual(Foo1, read_repair(Acc0)), +%% +%% meck:expect(fabric, update_docs, fun(_, [_, _], _) -> {ok, []} end), +%% Acc1 = #acc{ +%% dbname = <<"name">>, +%% replies = [fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,1)] +%% }, +%% ?assertEqual(Foo2, read_repair(Acc1)), +%% +%% % Test when we have nothing but errors +%% Acc2 = #acc{replies=[fabric_util:kv(NFM, 1)]}, +%% ?assertEqual(NFM, read_repair(Acc2)), +%% +%% Acc3 = #acc{replies=[fabric_util:kv(NFM,1), fabric_util:kv(foo,2)]}, +%% ?assertEqual(NFM, read_repair(Acc3)), +%% +%% Acc4 = #acc{replies=[fabric_util:kv(foo,1), fabric_util:kv(bar,1)]}, +%% ?assertEqual(bar, read_repair(Acc4)) +%% end). +%% +%% +%% t_handle_response_quorum_met() -> +%% Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, +%% Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, +%% Bar1 = {ok, #doc{revs = {1,[<<"bar">>]}}}, +%% +%% ?_test(begin +%% meck:expect(couch_log, notice, fun(_, _) -> ok end), +%% meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% +%% BasicOkAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,2)], +%% q_reply=Foo1 +%% }, +%% ?assertEqual(Foo1, handle_response(BasicOkAcc)), +%% +%% WithAncestorsAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,2)], +%% q_reply=Foo2 +%% }, +%% ?assertEqual(Foo2, handle_response(WithAncestorsAcc)), +%% +%% % This also checks when the quorum isn't the most recent +%% % revision. +%% DeeperWinsAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,2), fabric_util:kv(Foo2,1)], +%% q_reply=Foo1 +%% }, +%% ?assertEqual(Foo2, handle_response(DeeperWinsAcc)), +%% +%% % Check that we return the proper doc based on rev +%% % (ie, pos is equal) +%% BiggerRevWinsAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Bar1,2)], +%% q_reply=Bar1 +%% }, +%% ?assertEqual(Foo1, handle_response(BiggerRevWinsAcc)) +%% +%% % r_not_met is a proxy to read_repair so we rely on +%% % read_repair_test for those conditions. +%% end). +%% +%% +%% t_get_doc_info() -> +%% ?_test(begin +%% meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% meck:expect(fabric_util, submit_jobs, fun(_, _, _) -> ok end), +%% meck:expect(fabric_util, create_monitors, fun(_) -> ok end), +%% meck:expect(rexi_monitor, stop, fun(_) -> ok end), +%% meck:expect(mem3, shards, fun(_, _) -> ok end), +%% meck:expect(mem3, n, fun(_) -> 3 end), +%% meck:expect(mem3, quorum, fun(_) -> 2 end), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% {ok, #acc{state = r_not_met}} +%% end), +%% Rsp1 = fabric_doc_open:go("test", "one", [doc_info]), +%% ?assertEqual({error, quorum_not_met}, Rsp1), +%% +%% Rsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), +%% ?assertEqual({error, quorum_not_met}, Rsp2), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% {ok, #acc{state = r_met, q_reply = not_found}} +%% end), +%% MissingRsp1 = fabric_doc_open:go("test", "one", [doc_info]), +%% ?assertEqual({not_found, missing}, MissingRsp1), +%% MissingRsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), +%% ?assertEqual({not_found, missing}, MissingRsp2), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% A = #doc_info{}, +%% {ok, #acc{state = r_met, q_reply = {ok, A}}} +%% end), +%% {ok, Rec1} = fabric_doc_open:go("test", "one", [doc_info]), +%% ?assert(is_record(Rec1, doc_info)), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% A = #full_doc_info{deleted = true}, +%% {ok, #acc{state = r_met, q_reply = {ok, A}}} +%% end), +%% Rsp3 = fabric_doc_open:go("test", "one", [{doc_info, full}]), +%% ?assertEqual({not_found, deleted}, Rsp3), +%% {ok, Rec2} = fabric_doc_open:go("test", "one", [{doc_info, full},deleted]), +%% ?assert(is_record(Rec2, full_doc_info)) +%% end). +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_open_revs.erl b/src/fabric/src/fabric_doc_open_revs.erl index 3d7b9dc3c..aa7f53e9b 100644 --- a/src/fabric/src/fabric_doc_open_revs.erl +++ b/src/fabric/src/fabric_doc_open_revs.erl @@ -313,487 +313,487 @@ collapse_duplicate_revs_int([Reply | Rest]) -> [Reply | collapse_duplicate_revs(Rest)]. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -setup_all() -> - config:start_link([]), - meck:new([fabric, couch_stats, couch_log]), - meck:new(fabric_util, [passthrough]), - meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, nil} end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - meck:expect(couch_log, notice, fun(_, _) -> ok end), - meck:expect(fabric_util, cleanup, fun(_) -> ok end). - - - -teardown_all(_) -> - meck:unload(), - config:stop(). - - -setup() -> - meck:reset([ - couch_log, - couch_stats, - fabric, - fabric_util - ]). - - -teardown(_) -> - ok. - - -state0(Revs, Latest) -> - #state{ - worker_count = 3, - workers = - [#shard{node='node1'}, #shard{node='node2'}, #shard{node='node3'}], - r = 2, - revs = Revs, - latest = Latest - }. - - -revs() -> [{1,<<"foo">>}, {1,<<"bar">>}, {1,<<"baz">>}]. - - -foo1() -> {ok, #doc{revs = {1, [<<"foo">>]}}}. -foo2() -> {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}. -foo2stemmed() -> {ok, #doc{revs = {2, [<<"foo2">>]}}}. -fooNF() -> {{not_found, missing}, {1,<<"foo">>}}. -foo2NF() -> {{not_found, missing}, {2, <<"foo2">>}}. -bar1() -> {ok, #doc{revs = {1, [<<"bar">>]}}}. -barNF() -> {{not_found, missing}, {1,<<"bar">>}}. -bazNF() -> {{not_found, missing}, {1,<<"baz">>}}. -baz1() -> {ok, #doc{revs = {1, [<<"baz">>]}}}. - - - -open_doc_revs_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - check_empty_response_not_quorum(), - check_basic_response(), - check_finish_quorum(), - check_finish_quorum_newer(), - check_no_quorum_on_second(), - check_done_on_third(), - check_specific_revs_first_msg(), - check_revs_done_on_agreement(), - check_latest_true(), - check_ancestor_counted_in_quorum(), - check_not_found_counts_for_descendant(), - check_worker_error_skipped(), - check_quorum_only_counts_valid_responses(), - check_empty_list_when_no_workers_reply(), - check_node_rev_stored(), - check_node_rev_store_head_only(), - check_node_rev_store_multiple(), - check_node_rev_dont_store_errors(), - check_node_rev_store_non_errors(), - check_node_rev_store_concatenate(), - check_node_rev_store_concantenate_multiple(), - check_node_rev_unmodified_on_down_or_exit(), - check_not_found_replies_are_removed_when_doc_found(), - check_not_found_returned_when_one_of_docs_not_found(), - check_not_found_returned_when_doc_not_found(), - check_longer_rev_list_returned(), - check_longer_rev_list_not_combined(), - check_not_found_removed_and_longer_rev_list() - ] - } - }. - - -% Tests for revs=all - - -check_empty_response_not_quorum() -> - % Simple smoke test that we don't think we're - % done with a first empty response - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - ?_assertMatch( - {ok, #state{workers = [W2, W3]}}, - handle_message({ok, []}, W1, state0(all, false)) - ). - - -check_basic_response() -> - % Check that we've handle a response - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - ?_assertMatch( - {ok, #state{reply_count = 1, workers = [W2, W3]}}, - handle_message({ok, [foo1(), bar1()]}, W1, state0(all, false)) - ). - - -check_finish_quorum() -> - % Two messages with the same revisions means we're done - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - Expect = {stop, [bar1(), foo1()]}, - ?assertEqual(Expect, handle_message({ok, [foo1(), bar1()]}, W2, S1)) - end). - - -check_finish_quorum_newer() -> - % We count a descendant of a revision for quorum so - % foo1 should count for foo2 which means we're finished. - % We also validate that read_repair was triggered. - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - Expect = {stop, [bar1(), foo2()]}, - ok = meck:reset(fabric), - ?assertEqual(Expect, handle_message({ok, [foo2(), bar1()]}, W2, S1)), - ok = meck:wait(fabric, update_docs, '_', 5000), - ?assertMatch( - [{_, {fabric, update_docs, [_, _, _]}, _}], - meck:history(fabric) - ) - end). - - -check_no_quorum_on_second() -> - % Quorum not yet met for the foo revision so we - % would wait for w3 - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - ?assertMatch( - {ok, #state{workers = [W3]}}, - handle_message({ok, [bar1()]}, W2, S1) - ) - end). - - -check_done_on_third() -> - % The third message of three means we're done no matter - % what. Every revision seen in this pattern should be - % included. - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - {ok, S2} = handle_message({ok, [bar1()]}, W2, S1), - Expect = {stop, [bar1(), foo1()]}, - ?assertEqual(Expect, handle_message({ok, [bar1()]}, W3, S2)) - end). - - -% Tests for a specific list of revs - - -check_specific_revs_first_msg() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), false), - ?assertMatch( - {ok, #state{reply_count = 1, workers = [W2, W3]}}, - handle_message({ok, [foo1(), bar1(), bazNF()]}, W1, S0) - ) - end). - - -check_revs_done_on_agreement() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), false), - Msg = {ok, [foo1(), bar1(), bazNF()]}, - {ok, S1} = handle_message(Msg, W1, S0), - Expect = {stop, [bar1(), foo1(), bazNF()]}, - ?assertEqual(Expect, handle_message(Msg, W2, S1)) - end). - - -check_latest_true() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo2(), bar1(), bazNF()]}, - Msg2 = {ok, [foo2(), bar1(), bazNF()]}, - {ok, S1} = handle_message(Msg1, W1, S0), - Expect = {stop, [bar1(), foo2(), bazNF()]}, - ?assertEqual(Expect, handle_message(Msg2, W2, S1)) - end). - - -check_ancestor_counted_in_quorum() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo1(), bar1(), bazNF()]}, - Msg2 = {ok, [foo2(), bar1(), bazNF()]}, - Expect = {stop, [bar1(), foo2(), bazNF()]}, - - % Older first - {ok, S1} = handle_message(Msg1, W1, S0), - ?assertEqual(Expect, handle_message(Msg2, W2, S1)), - - % Newer first - {ok, S2} = handle_message(Msg2, W2, S0), - ?assertEqual(Expect, handle_message(Msg1, W1, S2)) - end). - - -check_not_found_counts_for_descendant() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo1(), bar1(), bazNF()]}, - Msg2 = {ok, [foo1(), bar1(), baz1()]}, - Expect = {stop, [bar1(), baz1(), foo1()]}, - - % not_found first - {ok, S1} = handle_message(Msg1, W1, S0), - ?assertEqual(Expect, handle_message(Msg2, W2, S1)), - - % not_found second - {ok, S2} = handle_message(Msg2, W2, S0), - ?assertEqual(Expect, handle_message(Msg1, W1, S2)) - end). - - -check_worker_error_skipped() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo1(), bar1(), baz1()]}, - Msg2 = {rexi_EXIT, reason}, - Msg3 = {ok, [foo1(), bar1(), baz1()]}, - Expect = {stop, [bar1(), baz1(), foo1()]}, - - {ok, S1} = handle_message(Msg1, W1, S0), - {ok, S2} = handle_message(Msg2, W2, S1), - ?assertEqual(Expect, handle_message(Msg3, W3, S2)) - end). - - -check_quorum_only_counts_valid_responses() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), true), - Msg1 = {rexi_EXIT, reason}, - Msg2 = {rexi_EXIT, reason}, - Msg3 = {ok, [foo1(), bar1(), baz1()]}, - Expect = {stop, [bar1(), baz1(), foo1()]}, - - {ok, S1} = handle_message(Msg1, W1, S0), - {ok, S2} = handle_message(Msg2, W2, S1), - ?assertEqual(Expect, handle_message(Msg3, W3, S2)) - end). - - -check_empty_list_when_no_workers_reply() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), true), - Msg1 = {rexi_EXIT, reason}, - Msg2 = {rexi_EXIT, reason}, - Msg3 = {rexi_DOWN, nodedown, {nil, node()}, nil}, - Expect = {stop, all_workers_died}, - - {ok, S1} = handle_message(Msg1, W1, S0), - {ok, S2} = handle_message(Msg2, W2, S1), - ?assertEqual(Expect, handle_message(Msg3, W3, S2)) - end). - - -check_node_rev_stored() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo1()]}, W1, S0), - ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) - end). - - -check_node_rev_store_head_only() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo2()]}, W1, S0), - ?assertEqual([{node1, [{2, <<"foo2">>}]}], S1#state.node_revs) - end). - - -check_node_rev_store_multiple() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo1(), foo2()]}, W1, S0), - ?assertEqual( - [{node1, [{2, <<"foo2">>}, {1, <<"foo">>}]}], - S1#state.node_revs - ) - end). - - -check_node_rev_dont_store_errors() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [barNF()]}, W1, S0), - ?assertEqual([], S1#state.node_revs) - end). - - -check_node_rev_store_non_errors() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo1(), barNF()]}, W1, S0), - ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) - end). - - -check_node_rev_store_concatenate() -> - ?_test(begin - W2 = #shard{node = node2}, - S0 = state0([], true), - S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, - - {ok, S2} = handle_message({ok, [foo2()]}, W2, S1), - ?assertEqual( - [{node2, [{2, <<"foo2">>}]}, {node1, [{1, <<"foo">>}]}], - S2#state.node_revs - ) - end). - - -check_node_rev_store_concantenate_multiple() -> - ?_test(begin - W2 = #shard{node = node2}, - S0 = state0([], true), - S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, - - {ok, S2} = handle_message({ok, [foo2(), bar1()]}, W2, S1), - ?assertEqual( - [ - {node2, [{1, <<"bar">>}, {2, <<"foo2">>}]}, - {node1, [{1, <<"foo">>}]} - ], - S2#state.node_revs - ) - end). - - -check_node_rev_unmodified_on_down_or_exit() -> - ?_test(begin - W2 = #shard{node = node2}, - S0 = state0([], true), - S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, - - Down = {rexi_DOWN, nodedown, {nil, node()}, nil}, - {ok, S2} = handle_message(Down, W2, S1), - ?assertEqual( - [{node1, [{1, <<"foo">>}]}], - S2#state.node_revs - ), - - Exit = {rexi_EXIT, reason}, - {ok, S3} = handle_message(Exit, W2, S1), - ?assertEqual( - [{node1, [{1, <<"foo">>}]}], - S3#state.node_revs - ) - end). - - -check_not_found_replies_are_removed_when_doc_found() -> - ?_test(begin - Replies = replies_to_dict([foo1(), bar1(), fooNF()]), - Expect = [bar1(), foo1()], - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_not_found_returned_when_one_of_docs_not_found() -> - ?_test(begin - Replies = replies_to_dict([foo1(), foo2(), barNF()]), - Expect = [foo1(), foo2(), barNF()], - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_not_found_returned_when_doc_not_found() -> - ?_test(begin - Replies = replies_to_dict([fooNF(), barNF(), bazNF()]), - Expect = [barNF(), bazNF(), fooNF()], - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_longer_rev_list_returned() -> - ?_test(begin - Replies = replies_to_dict([foo2(), foo2stemmed()]), - Expect = [foo2()], - ?assertEqual(2, length(Replies)), - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_longer_rev_list_not_combined() -> - ?_test(begin - Replies = replies_to_dict([foo2(), foo2stemmed(), bar1()]), - Expect = [bar1(), foo2()], - ?assertEqual(3, length(Replies)), - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_not_found_removed_and_longer_rev_list() -> - ?_test(begin - Replies = replies_to_dict([foo2(), foo2stemmed(), foo2NF()]), - Expect = [foo2()], - ?assertEqual(3, length(Replies)), - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - - -replies_to_dict(Replies) -> - [reply_to_element(R) || R <- Replies]. - -reply_to_element({ok, #doc{revs = Revs}} = Reply) -> - {_, [Rev | _]} = Revs, - {{Rev, Revs}, {Reply, 1}}; -reply_to_element(Reply) -> - {Reply, {Reply, 1}}. - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% +%% setup_all() -> +%% config:start_link([]), +%% meck:new([fabric, couch_stats, couch_log]), +%% meck:new(fabric_util, [passthrough]), +%% meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, nil} end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% meck:expect(couch_log, notice, fun(_, _) -> ok end), +%% meck:expect(fabric_util, cleanup, fun(_) -> ok end). +%% +%% +%% +%% teardown_all(_) -> +%% meck:unload(), +%% config:stop(). +%% +%% +%% setup() -> +%% meck:reset([ +%% couch_log, +%% couch_stats, +%% fabric, +%% fabric_util +%% ]). +%% +%% +%% teardown(_) -> +%% ok. +%% +%% +%% state0(Revs, Latest) -> +%% #state{ +%% worker_count = 3, +%% workers = +%% [#shard{node='node1'}, #shard{node='node2'}, #shard{node='node3'}], +%% r = 2, +%% revs = Revs, +%% latest = Latest +%% }. +%% +%% +%% revs() -> [{1,<<"foo">>}, {1,<<"bar">>}, {1,<<"baz">>}]. +%% +%% +%% foo1() -> {ok, #doc{revs = {1, [<<"foo">>]}}}. +%% foo2() -> {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}. +%% foo2stemmed() -> {ok, #doc{revs = {2, [<<"foo2">>]}}}. +%% fooNF() -> {{not_found, missing}, {1,<<"foo">>}}. +%% foo2NF() -> {{not_found, missing}, {2, <<"foo2">>}}. +%% bar1() -> {ok, #doc{revs = {1, [<<"bar">>]}}}. +%% barNF() -> {{not_found, missing}, {1,<<"bar">>}}. +%% bazNF() -> {{not_found, missing}, {1,<<"baz">>}}. +%% baz1() -> {ok, #doc{revs = {1, [<<"baz">>]}}}. +%% +%% +%% +%% open_doc_revs_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% { +%% foreach, +%% fun setup/0, +%% fun teardown/1, +%% [ +%% check_empty_response_not_quorum(), +%% check_basic_response(), +%% check_finish_quorum(), +%% check_finish_quorum_newer(), +%% check_no_quorum_on_second(), +%% check_done_on_third(), +%% check_specific_revs_first_msg(), +%% check_revs_done_on_agreement(), +%% check_latest_true(), +%% check_ancestor_counted_in_quorum(), +%% check_not_found_counts_for_descendant(), +%% check_worker_error_skipped(), +%% check_quorum_only_counts_valid_responses(), +%% check_empty_list_when_no_workers_reply(), +%% check_node_rev_stored(), +%% check_node_rev_store_head_only(), +%% check_node_rev_store_multiple(), +%% check_node_rev_dont_store_errors(), +%% check_node_rev_store_non_errors(), +%% check_node_rev_store_concatenate(), +%% check_node_rev_store_concantenate_multiple(), +%% check_node_rev_unmodified_on_down_or_exit(), +%% check_not_found_replies_are_removed_when_doc_found(), +%% check_not_found_returned_when_one_of_docs_not_found(), +%% check_not_found_returned_when_doc_not_found(), +%% check_longer_rev_list_returned(), +%% check_longer_rev_list_not_combined(), +%% check_not_found_removed_and_longer_rev_list() +%% ] +%% } +%% }. +%% +%% +%% % Tests for revs=all +%% +%% +%% check_empty_response_not_quorum() -> +%% % Simple smoke test that we don't think we're +%% % done with a first empty response +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% ?_assertMatch( +%% {ok, #state{workers = [W2, W3]}}, +%% handle_message({ok, []}, W1, state0(all, false)) +%% ). +%% +%% +%% check_basic_response() -> +%% % Check that we've handle a response +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% ?_assertMatch( +%% {ok, #state{reply_count = 1, workers = [W2, W3]}}, +%% handle_message({ok, [foo1(), bar1()]}, W1, state0(all, false)) +%% ). +%% +%% +%% check_finish_quorum() -> +%% % Two messages with the same revisions means we're done +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% Expect = {stop, [bar1(), foo1()]}, +%% ?assertEqual(Expect, handle_message({ok, [foo1(), bar1()]}, W2, S1)) +%% end). +%% +%% +%% check_finish_quorum_newer() -> +%% % We count a descendant of a revision for quorum so +%% % foo1 should count for foo2 which means we're finished. +%% % We also validate that read_repair was triggered. +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% Expect = {stop, [bar1(), foo2()]}, +%% ok = meck:reset(fabric), +%% ?assertEqual(Expect, handle_message({ok, [foo2(), bar1()]}, W2, S1)), +%% ok = meck:wait(fabric, update_docs, '_', 5000), +%% ?assertMatch( +%% [{_, {fabric, update_docs, [_, _, _]}, _}], +%% meck:history(fabric) +%% ) +%% end). +%% +%% +%% check_no_quorum_on_second() -> +%% % Quorum not yet met for the foo revision so we +%% % would wait for w3 +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% ?assertMatch( +%% {ok, #state{workers = [W3]}}, +%% handle_message({ok, [bar1()]}, W2, S1) +%% ) +%% end). +%% +%% +%% check_done_on_third() -> +%% % The third message of three means we're done no matter +%% % what. Every revision seen in this pattern should be +%% % included. +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% {ok, S2} = handle_message({ok, [bar1()]}, W2, S1), +%% Expect = {stop, [bar1(), foo1()]}, +%% ?assertEqual(Expect, handle_message({ok, [bar1()]}, W3, S2)) +%% end). +%% +%% +%% % Tests for a specific list of revs +%% +%% +%% check_specific_revs_first_msg() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), false), +%% ?assertMatch( +%% {ok, #state{reply_count = 1, workers = [W2, W3]}}, +%% handle_message({ok, [foo1(), bar1(), bazNF()]}, W1, S0) +%% ) +%% end). +%% +%% +%% check_revs_done_on_agreement() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), false), +%% Msg = {ok, [foo1(), bar1(), bazNF()]}, +%% {ok, S1} = handle_message(Msg, W1, S0), +%% Expect = {stop, [bar1(), foo1(), bazNF()]}, +%% ?assertEqual(Expect, handle_message(Msg, W2, S1)) +%% end). +%% +%% +%% check_latest_true() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo2(), bar1(), bazNF()]}, +%% Msg2 = {ok, [foo2(), bar1(), bazNF()]}, +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% Expect = {stop, [bar1(), foo2(), bazNF()]}, +%% ?assertEqual(Expect, handle_message(Msg2, W2, S1)) +%% end). +%% +%% +%% check_ancestor_counted_in_quorum() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo1(), bar1(), bazNF()]}, +%% Msg2 = {ok, [foo2(), bar1(), bazNF()]}, +%% Expect = {stop, [bar1(), foo2(), bazNF()]}, +%% +%% % Older first +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% ?assertEqual(Expect, handle_message(Msg2, W2, S1)), +%% +%% % Newer first +%% {ok, S2} = handle_message(Msg2, W2, S0), +%% ?assertEqual(Expect, handle_message(Msg1, W1, S2)) +%% end). +%% +%% +%% check_not_found_counts_for_descendant() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo1(), bar1(), bazNF()]}, +%% Msg2 = {ok, [foo1(), bar1(), baz1()]}, +%% Expect = {stop, [bar1(), baz1(), foo1()]}, +%% +%% % not_found first +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% ?assertEqual(Expect, handle_message(Msg2, W2, S1)), +%% +%% % not_found second +%% {ok, S2} = handle_message(Msg2, W2, S0), +%% ?assertEqual(Expect, handle_message(Msg1, W1, S2)) +%% end). +%% +%% +%% check_worker_error_skipped() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo1(), bar1(), baz1()]}, +%% Msg2 = {rexi_EXIT, reason}, +%% Msg3 = {ok, [foo1(), bar1(), baz1()]}, +%% Expect = {stop, [bar1(), baz1(), foo1()]}, +%% +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% {ok, S2} = handle_message(Msg2, W2, S1), +%% ?assertEqual(Expect, handle_message(Msg3, W3, S2)) +%% end). +%% +%% +%% check_quorum_only_counts_valid_responses() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), true), +%% Msg1 = {rexi_EXIT, reason}, +%% Msg2 = {rexi_EXIT, reason}, +%% Msg3 = {ok, [foo1(), bar1(), baz1()]}, +%% Expect = {stop, [bar1(), baz1(), foo1()]}, +%% +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% {ok, S2} = handle_message(Msg2, W2, S1), +%% ?assertEqual(Expect, handle_message(Msg3, W3, S2)) +%% end). +%% +%% +%% check_empty_list_when_no_workers_reply() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), true), +%% Msg1 = {rexi_EXIT, reason}, +%% Msg2 = {rexi_EXIT, reason}, +%% Msg3 = {rexi_DOWN, nodedown, {nil, node()}, nil}, +%% Expect = {stop, all_workers_died}, +%% +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% {ok, S2} = handle_message(Msg2, W2, S1), +%% ?assertEqual(Expect, handle_message(Msg3, W3, S2)) +%% end). +%% +%% +%% check_node_rev_stored() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo1()]}, W1, S0), +%% ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_head_only() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo2()]}, W1, S0), +%% ?assertEqual([{node1, [{2, <<"foo2">>}]}], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_multiple() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo1(), foo2()]}, W1, S0), +%% ?assertEqual( +%% [{node1, [{2, <<"foo2">>}, {1, <<"foo">>}]}], +%% S1#state.node_revs +%% ) +%% end). +%% +%% +%% check_node_rev_dont_store_errors() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [barNF()]}, W1, S0), +%% ?assertEqual([], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_non_errors() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo1(), barNF()]}, W1, S0), +%% ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_concatenate() -> +%% ?_test(begin +%% W2 = #shard{node = node2}, +%% S0 = state0([], true), +%% S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, +%% +%% {ok, S2} = handle_message({ok, [foo2()]}, W2, S1), +%% ?assertEqual( +%% [{node2, [{2, <<"foo2">>}]}, {node1, [{1, <<"foo">>}]}], +%% S2#state.node_revs +%% ) +%% end). +%% +%% +%% check_node_rev_store_concantenate_multiple() -> +%% ?_test(begin +%% W2 = #shard{node = node2}, +%% S0 = state0([], true), +%% S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, +%% +%% {ok, S2} = handle_message({ok, [foo2(), bar1()]}, W2, S1), +%% ?assertEqual( +%% [ +%% {node2, [{1, <<"bar">>}, {2, <<"foo2">>}]}, +%% {node1, [{1, <<"foo">>}]} +%% ], +%% S2#state.node_revs +%% ) +%% end). +%% +%% +%% check_node_rev_unmodified_on_down_or_exit() -> +%% ?_test(begin +%% W2 = #shard{node = node2}, +%% S0 = state0([], true), +%% S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, +%% +%% Down = {rexi_DOWN, nodedown, {nil, node()}, nil}, +%% {ok, S2} = handle_message(Down, W2, S1), +%% ?assertEqual( +%% [{node1, [{1, <<"foo">>}]}], +%% S2#state.node_revs +%% ), +%% +%% Exit = {rexi_EXIT, reason}, +%% {ok, S3} = handle_message(Exit, W2, S1), +%% ?assertEqual( +%% [{node1, [{1, <<"foo">>}]}], +%% S3#state.node_revs +%% ) +%% end). +%% +%% +%% check_not_found_replies_are_removed_when_doc_found() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo1(), bar1(), fooNF()]), +%% Expect = [bar1(), foo1()], +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_not_found_returned_when_one_of_docs_not_found() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo1(), foo2(), barNF()]), +%% Expect = [foo1(), foo2(), barNF()], +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_not_found_returned_when_doc_not_found() -> +%% ?_test(begin +%% Replies = replies_to_dict([fooNF(), barNF(), bazNF()]), +%% Expect = [barNF(), bazNF(), fooNF()], +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_longer_rev_list_returned() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo2(), foo2stemmed()]), +%% Expect = [foo2()], +%% ?assertEqual(2, length(Replies)), +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_longer_rev_list_not_combined() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo2(), foo2stemmed(), bar1()]), +%% Expect = [bar1(), foo2()], +%% ?assertEqual(3, length(Replies)), +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_not_found_removed_and_longer_rev_list() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo2(), foo2stemmed(), foo2NF()]), +%% Expect = [foo2()], +%% ?assertEqual(3, length(Replies)), +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% +%% replies_to_dict(Replies) -> +%% [reply_to_element(R) || R <- Replies]. +%% +%% reply_to_element({ok, #doc{revs = Revs}} = Reply) -> +%% {_, [Rev | _]} = Revs, +%% {{Rev, Revs}, {Reply, 1}}; +%% reply_to_element(Reply) -> +%% {Reply, {Reply, 1}}. +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_purge.erl b/src/fabric/src/fabric_doc_purge.erl index 3492f88c5..bda9039ba 100644 --- a/src/fabric/src/fabric_doc_purge.erl +++ b/src/fabric/src/fabric_doc_purge.erl @@ -224,348 +224,348 @@ has_quorum(Resps, Count, W) -> end. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -purge_test_() -> - { - setup, - fun setup/0, - fun teardown/1, - [ - t_w2_ok(), - t_w3_ok(), - - t_w2_mixed_accepted(), - t_w3_mixed_accepted(), - - t_w2_exit1_ok(), - t_w2_exit2_accepted(), - t_w2_exit3_error(), - - t_w4_accepted(), - - t_mixed_ok_accepted(), - t_mixed_errors() - ] - }. - - -setup() -> - meck:new(couch_log), - meck:expect(couch_log, warning, fun(_, _) -> ok end), - meck:expect(couch_log, notice, fun(_, _) -> ok end). - - -teardown(_) -> - meck:unload(). - - -t_w2_ok() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {stop, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, true), - - Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), - ?assertEqual(Expect, Resps), - ?assertEqual(ok, resp_health(Resps)) - end). - - -t_w3_ok() -> - ?_test(begin - Acc0 = create_init_acc(3), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(ok, resp_health(Resps)) - end). - - -t_w2_mixed_accepted() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, - Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, - - {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg1, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [ - {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, - {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} - ], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_w3_mixed_accepted() -> - ?_test(begin - Acc0 = create_init_acc(3), - Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, - Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, - - {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg2, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [ - {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, - {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} - ], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_w2_exit1_ok() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(ok, resp_health(Resps)) - end). - - -t_w2_exit2_accepted() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_w2_exit3_error() -> - ?_test(begin - Acc0 = create_init_acc(2), - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(ExitMsg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [ - {error, internal_server_error}, - {error, internal_server_error} - ], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(error, resp_health(Resps)) - end). - - -t_w4_accepted() -> - % Make sure we return when all workers have responded - % rather than wait around for a timeout if a user asks - % for a qourum with more than the available number of - % shards. - ?_test(begin - Acc0 = create_init_acc(4), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_mixed_ok_accepted() -> - ?_test(begin - WorkerUUIDs = [ - {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, - - {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} - ], - - Acc0 = #acc{ - worker_uuids = WorkerUUIDs, - resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), - uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), - w = 2 - }, - - Msg1 = {ok, [{ok, [{1, <<"foo">>}]}]}, - Msg2 = {ok, [{ok, [{2, <<"bar">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), - {ok, Acc2} = handle_message(Msg1, worker(2, Acc0), Acc1), - {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), - {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), - {stop, Acc5} = handle_message(Msg2, worker(6, Acc0), Acc4), - - Expect = [{ok, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_mixed_errors() -> - ?_test(begin - WorkerUUIDs = [ - {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, - - {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} - ], - - Acc0 = #acc{ - worker_uuids = WorkerUUIDs, - resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), - uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), - w = 2 - }, - - Msg = {ok, [{ok, [{1, <<"foo">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), - {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), - {stop, Acc5} = handle_message(ExitMsg, worker(6, Acc0), Acc4), - - Expect = [{ok, [{1, <<"foo">>}]}, {error, internal_server_error}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), - ?assertEqual(Expect, Resps), - ?assertEqual(error, resp_health(Resps)) - end). - - -create_init_acc(W) -> - UUID1 = <<"uuid1">>, - UUID2 = <<"uuid2">>, - - Nodes = [node1, node2, node3], - Shards = mem3_util:create_partition_map(<<"foo">>, 3, 1, Nodes), - - % Create our worker_uuids. We're relying on the fact that - % we're using a fake Q=1 db so we don't have to worry - % about any hashing here. - WorkerUUIDs = lists:map(fun(Shard) -> - {Shard#shard{ref = erlang:make_ref()}, [UUID1, UUID2]} - end, Shards), - - #acc{ - worker_uuids = WorkerUUIDs, - resps = dict:from_list([{UUID1, []}, {UUID2, []}]), - uuid_counts = dict:from_list([{UUID1, 3}, {UUID2, 3}]), - w = W - }. - - -worker(N, #acc{worker_uuids = WorkerUUIDs}) -> - {Worker, _} = lists:nth(N, WorkerUUIDs), - Worker. - - -check_quorum(Acc, Expect) -> - dict:fold(fun(_Shard, Resps, _) -> - ?assertEqual(Expect, has_quorum(Resps, 3, Acc#acc.w)) - end, nil, Acc#acc.resps). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% purge_test_() -> +%% { +%% setup, +%% fun setup/0, +%% fun teardown/1, +%% [ +%% t_w2_ok(), +%% t_w3_ok(), +%% +%% t_w2_mixed_accepted(), +%% t_w3_mixed_accepted(), +%% +%% t_w2_exit1_ok(), +%% t_w2_exit2_accepted(), +%% t_w2_exit3_error(), +%% +%% t_w4_accepted(), +%% +%% t_mixed_ok_accepted(), +%% t_mixed_errors() +%% ] +%% }. +%% +%% +%% setup() -> +%% meck:new(couch_log), +%% meck:expect(couch_log, warning, fun(_, _) -> ok end), +%% meck:expect(couch_log, notice, fun(_, _) -> ok end). +%% +%% +%% teardown(_) -> +%% meck:unload(). +%% +%% +%% t_w2_ok() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {stop, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, true), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(ok, resp_health(Resps)) +%% end). +%% +%% +%% t_w3_ok() -> +%% ?_test(begin +%% Acc0 = create_init_acc(3), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(ok, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_mixed_accepted() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, +%% Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg1, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [ +%% {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, +%% {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} +%% ], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_w3_mixed_accepted() -> +%% ?_test(begin +%% Acc0 = create_init_acc(3), +%% Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, +%% Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg2, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [ +%% {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, +%% {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} +%% ], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_exit1_ok() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(ok, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_exit2_accepted() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_exit3_error() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(ExitMsg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [ +%% {error, internal_server_error}, +%% {error, internal_server_error} +%% ], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(error, resp_health(Resps)) +%% end). +%% +%% +%% t_w4_accepted() -> +%% % Make sure we return when all workers have responded +%% % rather than wait around for a timeout if a user asks +%% % for a qourum with more than the available number of +%% % shards. +%% ?_test(begin +%% Acc0 = create_init_acc(4), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_mixed_ok_accepted() -> +%% ?_test(begin +%% WorkerUUIDs = [ +%% {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, +%% +%% {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} +%% ], +%% +%% Acc0 = #acc{ +%% worker_uuids = WorkerUUIDs, +%% resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), +%% uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), +%% w = 2 +%% }, +%% +%% Msg1 = {ok, [{ok, [{1, <<"foo">>}]}]}, +%% Msg2 = {ok, [{ok, [{2, <<"bar">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), +%% {ok, Acc2} = handle_message(Msg1, worker(2, Acc0), Acc1), +%% {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), +%% {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), +%% {stop, Acc5} = handle_message(Msg2, worker(6, Acc0), Acc4), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_mixed_errors() -> +%% ?_test(begin +%% WorkerUUIDs = [ +%% {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, +%% +%% {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} +%% ], +%% +%% Acc0 = #acc{ +%% worker_uuids = WorkerUUIDs, +%% resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), +%% uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), +%% w = 2 +%% }, +%% +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), +%% {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), +%% {stop, Acc5} = handle_message(ExitMsg, worker(6, Acc0), Acc4), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {error, internal_server_error}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(error, resp_health(Resps)) +%% end). +%% +%% +%% create_init_acc(W) -> +%% UUID1 = <<"uuid1">>, +%% UUID2 = <<"uuid2">>, +%% +%% Nodes = [node1, node2, node3], +%% Shards = mem3_util:create_partition_map(<<"foo">>, 3, 1, Nodes), +%% +%% % Create our worker_uuids. We're relying on the fact that +%% % we're using a fake Q=1 db so we don't have to worry +%% % about any hashing here. +%% WorkerUUIDs = lists:map(fun(Shard) -> +%% {Shard#shard{ref = erlang:make_ref()}, [UUID1, UUID2]} +%% end, Shards), +%% +%% #acc{ +%% worker_uuids = WorkerUUIDs, +%% resps = dict:from_list([{UUID1, []}, {UUID2, []}]), +%% uuid_counts = dict:from_list([{UUID1, 3}, {UUID2, 3}]), +%% w = W +%% }. +%% +%% +%% worker(N, #acc{worker_uuids = WorkerUUIDs}) -> +%% {Worker, _} = lists:nth(N, WorkerUUIDs), +%% Worker. +%% +%% +%% check_quorum(Acc, Expect) -> +%% dict:fold(fun(_Shard, Resps, _) -> +%% ?assertEqual(Expect, has_quorum(Resps, 3, Acc#acc.w)) +%% end, nil, Acc#acc.resps). +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl index 69babc14b..d670e3ccf 100644 --- a/src/fabric/src/fabric_doc_update.erl +++ b/src/fabric/src/fabric_doc_update.erl @@ -220,158 +220,158 @@ validate_atomic_update(_DbName, AllDocs, true) -> throw({aborted, PreCommitFailures}). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -setup_all() -> - meck:new([couch_log, couch_stats]), - meck:expect(couch_log, warning, fun(_,_) -> ok end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end). - - -teardown_all(_) -> - meck:unload(). - - -doc_update_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - [ - fun doc_update1/0, - fun doc_update2/0, - fun doc_update3/0 - ] - }. - - -% eunits -doc_update1() -> - Doc1 = #doc{revs = {1,[<<"foo">>]}}, - Doc2 = #doc{revs = {1,[<<"bar">>]}}, - Docs = [Doc1], - Docs2 = [Doc2, Doc1], - Dict = dict:from_list([{Doc,[]} || Doc <- Docs]), - Dict2 = dict:from_list([{Doc,[]} || Doc <- Docs2]), - - Shards = - mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), - GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), - - - % test for W = 2 - AccW2 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, - Dict}, - - {ok,{WaitingCountW2_1,_,_,_,_}=AccW2_1} = - handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW2), - ?assertEqual(WaitingCountW2_1,2), - {stop, FinalReplyW2 } = - handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW2_1), - ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW2), - - % test for W = 3 - AccW3 = {length(Shards), length(Docs), list_to_integer("3"), GroupedDocs, - Dict}, - - {ok,{WaitingCountW3_1,_,_,_,_}=AccW3_1} = - handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW3), - ?assertEqual(WaitingCountW3_1,2), - - {ok,{WaitingCountW3_2,_,_,_,_}=AccW3_2} = - handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW3_1), - ?assertEqual(WaitingCountW3_2,1), - - {stop, FinalReplyW3 } = - handle_message({ok, [{ok, Doc1}]},lists:nth(3,Shards),AccW3_2), - ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW3), - - % test w quorum > # shards, which should fail immediately - - Shards2 = mem3_util:create_partition_map("foo",1,1,["node1"]), - GroupedDocs2 = group_docs_by_shard_hack(<<"foo">>,Shards2,Docs), - - AccW4 = - {length(Shards2), length(Docs), list_to_integer("2"), GroupedDocs2, Dict}, - Bool = - case handle_message({ok, [{ok, Doc1}]},hd(Shards2),AccW4) of - {stop, _Reply} -> - true; - _ -> false - end, - ?assertEqual(Bool,true), - - % Docs with no replies should end up as {error, internal_server_error} - SA1 = #shard{node=a, range=1}, - SB1 = #shard{node=b, range=1}, - SA2 = #shard{node=a, range=2}, - SB2 = #shard{node=b, range=2}, - GroupedDocs3 = [{SA1,[Doc1]}, {SB1,[Doc1]}, {SA2,[Doc2]}, {SB2,[Doc2]}], - StW5_0 = {length(GroupedDocs3), length(Docs2), 2, GroupedDocs3, Dict2}, - {ok, StW5_1} = handle_message({ok, [{ok, "A"}]}, SA1, StW5_0), - {ok, StW5_2} = handle_message({rexi_EXIT, nil}, SB1, StW5_1), - {ok, StW5_3} = handle_message({rexi_EXIT, nil}, SA2, StW5_2), - {stop, ReplyW5} = handle_message({rexi_EXIT, nil}, SB2, StW5_3), - ?assertEqual( - {error, [{Doc1,{accepted,"A"}},{Doc2,{error,internal_server_error}}]}, - ReplyW5 - ). - -doc_update2() -> - Doc1 = #doc{revs = {1,[<<"foo">>]}}, - Doc2 = #doc{revs = {1,[<<"bar">>]}}, - Docs = [Doc2, Doc1], - Shards = - mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), - GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), - Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, - dict:from_list([{Doc,[]} || Doc <- Docs])}, - - {ok,{WaitingCount1,_,_,_,_}=Acc1} = - handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), - ?assertEqual(WaitingCount1,2), - - {ok,{WaitingCount2,_,_,_,_}=Acc2} = - handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), - ?assertEqual(WaitingCount2,1), - - {stop, Reply} = - handle_message({rexi_EXIT, 1},lists:nth(3,Shards),Acc2), - - ?assertEqual({accepted, [{Doc1,{accepted,Doc2}}, {Doc2,{accepted,Doc1}}]}, - Reply). - -doc_update3() -> - Doc1 = #doc{revs = {1,[<<"foo">>]}}, - Doc2 = #doc{revs = {1,[<<"bar">>]}}, - Docs = [Doc2, Doc1], - Shards = - mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), - GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), - Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, - dict:from_list([{Doc,[]} || Doc <- Docs])}, - - {ok,{WaitingCount1,_,_,_,_}=Acc1} = - handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), - ?assertEqual(WaitingCount1,2), - - {ok,{WaitingCount2,_,_,_,_}=Acc2} = - handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), - ?assertEqual(WaitingCount2,1), - - {stop, Reply} = - handle_message({ok, [{ok, Doc1},{ok, Doc2}]},lists:nth(3,Shards),Acc2), - - ?assertEqual({ok, [{Doc1, {ok, Doc2}},{Doc2, {ok,Doc1}}]},Reply). - -% needed for testing to avoid having to start the mem3 application -group_docs_by_shard_hack(_DbName, Shards, Docs) -> - dict:to_list(lists:foldl(fun(#doc{id=_Id} = Doc, D0) -> - lists:foldl(fun(Shard, D1) -> - dict:append(Shard, Doc, D1) - end, D0, Shards) - end, dict:new(), Docs)). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% +%% setup_all() -> +%% meck:new([couch_log, couch_stats]), +%% meck:expect(couch_log, warning, fun(_,_) -> ok end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end). +%% +%% +%% teardown_all(_) -> +%% meck:unload(). +%% +%% +%% doc_update_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% [ +%% fun doc_update1/0, +%% fun doc_update2/0, +%% fun doc_update3/0 +%% ] +%% }. +%% +%% +%% % eunits +%% doc_update1() -> +%% Doc1 = #doc{revs = {1,[<<"foo">>]}}, +%% Doc2 = #doc{revs = {1,[<<"bar">>]}}, +%% Docs = [Doc1], +%% Docs2 = [Doc2, Doc1], +%% Dict = dict:from_list([{Doc,[]} || Doc <- Docs]), +%% Dict2 = dict:from_list([{Doc,[]} || Doc <- Docs2]), +%% +%% Shards = +%% mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), +%% GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), +%% +%% +%% % test for W = 2 +%% AccW2 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, +%% Dict}, +%% +%% {ok,{WaitingCountW2_1,_,_,_,_}=AccW2_1} = +%% handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW2), +%% ?assertEqual(WaitingCountW2_1,2), +%% {stop, FinalReplyW2 } = +%% handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW2_1), +%% ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW2), +%% +%% % test for W = 3 +%% AccW3 = {length(Shards), length(Docs), list_to_integer("3"), GroupedDocs, +%% Dict}, +%% +%% {ok,{WaitingCountW3_1,_,_,_,_}=AccW3_1} = +%% handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW3), +%% ?assertEqual(WaitingCountW3_1,2), +%% +%% {ok,{WaitingCountW3_2,_,_,_,_}=AccW3_2} = +%% handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW3_1), +%% ?assertEqual(WaitingCountW3_2,1), +%% +%% {stop, FinalReplyW3 } = +%% handle_message({ok, [{ok, Doc1}]},lists:nth(3,Shards),AccW3_2), +%% ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW3), +%% +%% % test w quorum > # shards, which should fail immediately +%% +%% Shards2 = mem3_util:create_partition_map("foo",1,1,["node1"]), +%% GroupedDocs2 = group_docs_by_shard_hack(<<"foo">>,Shards2,Docs), +%% +%% AccW4 = +%% {length(Shards2), length(Docs), list_to_integer("2"), GroupedDocs2, Dict}, +%% Bool = +%% case handle_message({ok, [{ok, Doc1}]},hd(Shards2),AccW4) of +%% {stop, _Reply} -> +%% true; +%% _ -> false +%% end, +%% ?assertEqual(Bool,true), +%% +%% % Docs with no replies should end up as {error, internal_server_error} +%% SA1 = #shard{node=a, range=1}, +%% SB1 = #shard{node=b, range=1}, +%% SA2 = #shard{node=a, range=2}, +%% SB2 = #shard{node=b, range=2}, +%% GroupedDocs3 = [{SA1,[Doc1]}, {SB1,[Doc1]}, {SA2,[Doc2]}, {SB2,[Doc2]}], +%% StW5_0 = {length(GroupedDocs3), length(Docs2), 2, GroupedDocs3, Dict2}, +%% {ok, StW5_1} = handle_message({ok, [{ok, "A"}]}, SA1, StW5_0), +%% {ok, StW5_2} = handle_message({rexi_EXIT, nil}, SB1, StW5_1), +%% {ok, StW5_3} = handle_message({rexi_EXIT, nil}, SA2, StW5_2), +%% {stop, ReplyW5} = handle_message({rexi_EXIT, nil}, SB2, StW5_3), +%% ?assertEqual( +%% {error, [{Doc1,{accepted,"A"}},{Doc2,{error,internal_server_error}}]}, +%% ReplyW5 +%% ). +%% +%% doc_update2() -> +%% Doc1 = #doc{revs = {1,[<<"foo">>]}}, +%% Doc2 = #doc{revs = {1,[<<"bar">>]}}, +%% Docs = [Doc2, Doc1], +%% Shards = +%% mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), +%% GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), +%% Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, +%% dict:from_list([{Doc,[]} || Doc <- Docs])}, +%% +%% {ok,{WaitingCount1,_,_,_,_}=Acc1} = +%% handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), +%% ?assertEqual(WaitingCount1,2), +%% +%% {ok,{WaitingCount2,_,_,_,_}=Acc2} = +%% handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), +%% ?assertEqual(WaitingCount2,1), +%% +%% {stop, Reply} = +%% handle_message({rexi_EXIT, 1},lists:nth(3,Shards),Acc2), +%% +%% ?assertEqual({accepted, [{Doc1,{accepted,Doc2}}, {Doc2,{accepted,Doc1}}]}, +%% Reply). +%% +%% doc_update3() -> +%% Doc1 = #doc{revs = {1,[<<"foo">>]}}, +%% Doc2 = #doc{revs = {1,[<<"bar">>]}}, +%% Docs = [Doc2, Doc1], +%% Shards = +%% mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), +%% GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), +%% Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, +%% dict:from_list([{Doc,[]} || Doc <- Docs])}, +%% +%% {ok,{WaitingCount1,_,_,_,_}=Acc1} = +%% handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), +%% ?assertEqual(WaitingCount1,2), +%% +%% {ok,{WaitingCount2,_,_,_,_}=Acc2} = +%% handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), +%% ?assertEqual(WaitingCount2,1), +%% +%% {stop, Reply} = +%% handle_message({ok, [{ok, Doc1},{ok, Doc2}]},lists:nth(3,Shards),Acc2), +%% +%% ?assertEqual({ok, [{Doc1, {ok, Doc2}},{Doc2, {ok,Doc1}}]},Reply). +%% +%% % needed for testing to avoid having to start the mem3 application +%% group_docs_by_shard_hack(_DbName, Shards, Docs) -> +%% dict:to_list(lists:foldl(fun(#doc{id=_Id} = Doc, D0) -> +%% lists:foldl(fun(Shard, D1) -> +%% dict:append(Shard, Doc, D1) +%% end, D0, Shards) +%% end, dict:new(), Docs)). +%% +%% -endif. diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index 7b688b2b9..b537c2317 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -642,22 +642,22 @@ uuid(Db) -> uuid_prefix_len() -> list_to_integer(config:get("fabric", "uuid_prefix_len", "7")). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -maybe_filtered_json_doc_no_filter_test() -> - Body = {[{<<"a">>, 1}]}, - Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, - {JDocProps} = maybe_filtered_json_doc(Doc, [], x), - ExpectedProps = [{<<"_id">>, <<"1">>}, {<<"_rev">>, <<"1-r1">>}, {<<"a">>, 1}], - ?assertEqual(lists:keysort(1, JDocProps), ExpectedProps). - -maybe_filtered_json_doc_with_filter_test() -> - Body = {[{<<"a">>, 1}]}, - Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, - Fields = [<<"a">>, <<"nonexistent">>], - Filter = {selector, main_only, {some_selector, Fields}}, - {JDocProps} = maybe_filtered_json_doc(Doc, [], Filter), - ?assertEqual(JDocProps, [{<<"a">>, 1}]). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% maybe_filtered_json_doc_no_filter_test() -> +%% Body = {[{<<"a">>, 1}]}, +%% Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, +%% {JDocProps} = maybe_filtered_json_doc(Doc, [], x), +%% ExpectedProps = [{<<"_id">>, <<"1">>}, {<<"_rev">>, <<"1-r1">>}, {<<"a">>, 1}], +%% ?assertEqual(lists:keysort(1, JDocProps), ExpectedProps). +%% +%% maybe_filtered_json_doc_with_filter_test() -> +%% Body = {[{<<"a">>, 1}]}, +%% Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, +%% Fields = [<<"a">>, <<"nonexistent">>], +%% Filter = {selector, main_only, {some_selector, Fields}}, +%% {JDocProps} = maybe_filtered_json_doc(Doc, [], Filter), +%% ?assertEqual(JDocProps, [{<<"a">>, 1}]). +%% +%% -endif. diff --git a/src/fabric/src/fabric_streams.erl b/src/fabric/src/fabric_streams.erl index 59c8b8a6b..98e285081 100644 --- a/src/fabric/src/fabric_streams.erl +++ b/src/fabric/src/fabric_streams.erl @@ -192,82 +192,83 @@ add_worker_to_cleaner(CoordinatorPid, Worker) -> --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -worker_cleaner_test_() -> - { - "Fabric spawn_worker_cleaner test", { - setup, fun setup/0, fun teardown/1, - fun(_) -> [ - should_clean_workers(), - does_not_fire_if_cleanup_called(), - should_clean_additional_worker_too() - ] end - } - }. - - -should_clean_workers() -> - ?_test(begin - meck:reset(rexi), - erase(?WORKER_CLEANER), - Workers = [ - #shard{node = 'n1', ref = make_ref()}, - #shard{node = 'n2', ref = make_ref()} - ], - {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), - Cleaner = spawn_worker_cleaner(Coord, Workers), - Ref = erlang:monitor(process, Cleaner), - Coord ! die, - receive {'DOWN', Ref, _, Cleaner, _} -> ok end, - ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) - end). - - -does_not_fire_if_cleanup_called() -> - ?_test(begin - meck:reset(rexi), - erase(?WORKER_CLEANER), - Workers = [ - #shard{node = 'n1', ref = make_ref()}, - #shard{node = 'n2', ref = make_ref()} - ], - {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), - Cleaner = spawn_worker_cleaner(Coord, Workers), - Ref = erlang:monitor(process, Cleaner), - cleanup(Workers), - Coord ! die, - receive {'DOWN', Ref, _, _, _} -> ok end, - % 2 calls would be from cleanup/1 function. If cleanup process fired - % too it would have been 4 calls total. - ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) - end). - - -should_clean_additional_worker_too() -> - ?_test(begin - meck:reset(rexi), - erase(?WORKER_CLEANER), - Workers = [ - #shard{node = 'n1', ref = make_ref()} - ], - {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), - Cleaner = spawn_worker_cleaner(Coord, Workers), - add_worker_to_cleaner(Coord, #shard{node = 'n2', ref = make_ref()}), - Ref = erlang:monitor(process, Cleaner), - Coord ! die, - receive {'DOWN', Ref, _, Cleaner, _} -> ok end, - ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) - end). - - -setup() -> - ok = meck:expect(rexi, kill_all, fun(_) -> ok end). - - -teardown(_) -> - meck:unload(). - --endif. +%% -ifdef(TEST). +%% +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% worker_cleaner_test_() -> +%% { +%% "Fabric spawn_worker_cleaner test", { +%% setup, fun setup/0, fun teardown/1, +%% fun(_) -> [ +%% should_clean_workers(), +%% does_not_fire_if_cleanup_called(), +%% should_clean_additional_worker_too() +%% ] end +%% } +%% }. +%% +%% +%% should_clean_workers() -> +%% ?_test(begin +%% meck:reset(rexi), +%% erase(?WORKER_CLEANER), +%% Workers = [ +%% #shard{node = 'n1', ref = make_ref()}, +%% #shard{node = 'n2', ref = make_ref()} +%% ], +%% {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), +%% Cleaner = spawn_worker_cleaner(Coord, Workers), +%% Ref = erlang:monitor(process, Cleaner), +%% Coord ! die, +%% receive {'DOWN', Ref, _, Cleaner, _} -> ok end, +%% ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) +%% end). +%% +%% +%% does_not_fire_if_cleanup_called() -> +%% ?_test(begin +%% meck:reset(rexi), +%% erase(?WORKER_CLEANER), +%% Workers = [ +%% #shard{node = 'n1', ref = make_ref()}, +%% #shard{node = 'n2', ref = make_ref()} +%% ], +%% {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), +%% Cleaner = spawn_worker_cleaner(Coord, Workers), +%% Ref = erlang:monitor(process, Cleaner), +%% cleanup(Workers), +%% Coord ! die, +%% receive {'DOWN', Ref, _, _, _} -> ok end, +%% % 2 calls would be from cleanup/1 function. If cleanup process fired +%% % too it would have been 4 calls total. +%% ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) +%% end). +%% +%% +%% should_clean_additional_worker_too() -> +%% ?_test(begin +%% meck:reset(rexi), +%% erase(?WORKER_CLEANER), +%% Workers = [ +%% #shard{node = 'n1', ref = make_ref()} +%% ], +%% {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), +%% Cleaner = spawn_worker_cleaner(Coord, Workers), +%% add_worker_to_cleaner(Coord, #shard{node = 'n2', ref = make_ref()}), +%% Ref = erlang:monitor(process, Cleaner), +%% Coord ! die, +%% receive {'DOWN', Ref, _, Cleaner, _} -> ok end, +%% ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) +%% end). +%% +%% +%% setup() -> +%% ok = meck:expect(rexi, kill_all, fun(_) -> ok end). +%% +%% +%% teardown(_) -> +%% meck:unload(). +%% +%% -endif. diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl index 8aa14e73a..1c1ee80b7 100644 --- a/src/fabric/src/fabric_util.erl +++ b/src/fabric/src/fabric_util.erl @@ -192,30 +192,30 @@ create_monitors(Shards) -> ]), rexi_monitor:start(MonRefs). -%% verify only id and rev are used in key. -update_counter_test() -> - Reply = {ok, #doc{id = <<"id">>, revs = <<"rev">>, - body = <<"body">>, atts = <<"atts">>}}, - ?assertEqual([{{<<"id">>,<<"rev">>}, {Reply, 1}}], - update_counter(Reply, 1, [])). - -remove_ancestors_test() -> - Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}}, - Foo2 = {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}, - Bar1 = {ok, #doc{revs = {1, [<<"bar">>]}}}, - Bar2 = {not_found, {1,<<"bar">>}}, - ?assertEqual( - [kv(Bar1,1), kv(Foo1,1)], - remove_ancestors([kv(Bar1,1), kv(Foo1,1)], []) - ), - ?assertEqual( - [kv(Bar1,1), kv(Foo2,2)], - remove_ancestors([kv(Bar1,1), kv(Foo1,1), kv(Foo2,1)], []) - ), - ?assertEqual( - [kv(Bar1,2)], - remove_ancestors([kv(Bar2,1), kv(Bar1,1)], []) - ). +%% %% verify only id and rev are used in key. +%% update_counter_test() -> +%% Reply = {ok, #doc{id = <<"id">>, revs = <<"rev">>, +%% body = <<"body">>, atts = <<"atts">>}}, +%% ?assertEqual([{{<<"id">>,<<"rev">>}, {Reply, 1}}], +%% update_counter(Reply, 1, [])). +%% +%% remove_ancestors_test() -> +%% Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}}, +%% Foo2 = {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}, +%% Bar1 = {ok, #doc{revs = {1, [<<"bar">>]}}}, +%% Bar2 = {not_found, {1,<<"bar">>}}, +%% ?assertEqual( +%% [kv(Bar1,1), kv(Foo1,1)], +%% remove_ancestors([kv(Bar1,1), kv(Foo1,1)], []) +%% ), +%% ?assertEqual( +%% [kv(Bar1,1), kv(Foo2,2)], +%% remove_ancestors([kv(Bar1,1), kv(Foo1,1), kv(Foo2,1)], []) +%% ), +%% ?assertEqual( +%% [kv(Bar1,2)], +%% remove_ancestors([kv(Bar2,1), kv(Bar1,1)], []) +%% ). is_replicator_db(DbName) -> path_ends_with(DbName, <<"_replicator">>). diff --git a/src/fabric/src/fabric_view.erl b/src/fabric/src/fabric_view.erl index 425f864c4..6c33e1e32 100644 --- a/src/fabric/src/fabric_view.erl +++ b/src/fabric/src/fabric_view.erl @@ -413,66 +413,66 @@ remove_finalizer(Args) -> couch_mrview_util:set_extra(Args, finalizer, null). -remove_overlapping_shards_test() -> - Cb = undefined, - - Shards = mk_cnts([[0, 10], [11, 20], [21, ?RING_END]], 3), - - % Simple (exact) overlap - Shard1 = mk_shard("node-3", [11, 20]), - Shards1 = fabric_dict:store(Shard1, nil, Shards), - R1 = remove_overlapping_shards(Shard1, Shards1, Cb), - ?assertEqual([{0, 10}, {11, 20}, {21, ?RING_END}], - fabric_util:worker_ranges(R1)), - ?assert(fabric_dict:is_key(Shard1, R1)), - - % Split overlap (shard overlap multiple workers) - Shard2 = mk_shard("node-3", [0, 20]), - Shards2 = fabric_dict:store(Shard2, nil, Shards), - R2 = remove_overlapping_shards(Shard2, Shards2, Cb), - ?assertEqual([{0, 20}, {21, ?RING_END}], - fabric_util:worker_ranges(R2)), - ?assert(fabric_dict:is_key(Shard2, R2)). - - -get_shard_replacements_test() -> - Unused = [mk_shard(N, [B, E]) || {N, B, E} <- [ - {"n1", 11, 20}, {"n1", 21, ?RING_END}, - {"n2", 0, 4}, {"n2", 5, 10}, {"n2", 11, 20}, - {"n3", 0, 21, ?RING_END} - ]], - Used = [mk_shard(N, [B, E]) || {N, B, E} <- [ - {"n2", 21, ?RING_END}, - {"n3", 0, 10}, {"n3", 11, 20} - ]], - Res = lists:sort(get_shard_replacements_int(Unused, Used)), - % Notice that [0, 10] range can be replaced by spawning the [0, 4] and [5, - % 10] workers on n1 - Expect = [ - {[0, 10], [mk_shard("n2", [0, 4]), mk_shard("n2", [5, 10])]}, - {[11, 20], [mk_shard("n1", [11, 20]), mk_shard("n2", [11, 20])]}, - {[21, ?RING_END], [mk_shard("n1", [21, ?RING_END])]} - ], - ?assertEqual(Expect, Res). - - -mk_cnts(Ranges, NoNodes) -> - orddict:from_list([{Shard,nil} - || Shard <- - lists:flatten(lists:map( - fun(Range) -> - mk_shards(NoNodes,Range,[]) - end, Ranges))] - ). - -mk_shards(0,_Range,Shards) -> - Shards; -mk_shards(NoNodes,Range,Shards) -> - Name ="node-" ++ integer_to_list(NoNodes), - mk_shards(NoNodes-1,Range, [mk_shard(Name, Range) | Shards]). - - -mk_shard(Name, Range) -> - Node = list_to_atom(Name), - BName = list_to_binary(Name), - #shard{name = BName, node = Node, range = Range}. +%% remove_overlapping_shards_test() -> +%% Cb = undefined, +%% +%% Shards = mk_cnts([[0, 10], [11, 20], [21, ?RING_END]], 3), +%% +%% % Simple (exact) overlap +%% Shard1 = mk_shard("node-3", [11, 20]), +%% Shards1 = fabric_dict:store(Shard1, nil, Shards), +%% R1 = remove_overlapping_shards(Shard1, Shards1, Cb), +%% ?assertEqual([{0, 10}, {11, 20}, {21, ?RING_END}], +%% fabric_util:worker_ranges(R1)), +%% ?assert(fabric_dict:is_key(Shard1, R1)), +%% +%% % Split overlap (shard overlap multiple workers) +%% Shard2 = mk_shard("node-3", [0, 20]), +%% Shards2 = fabric_dict:store(Shard2, nil, Shards), +%% R2 = remove_overlapping_shards(Shard2, Shards2, Cb), +%% ?assertEqual([{0, 20}, {21, ?RING_END}], +%% fabric_util:worker_ranges(R2)), +%% ?assert(fabric_dict:is_key(Shard2, R2)). +%% +%% +%% get_shard_replacements_test() -> +%% Unused = [mk_shard(N, [B, E]) || {N, B, E} <- [ +%% {"n1", 11, 20}, {"n1", 21, ?RING_END}, +%% {"n2", 0, 4}, {"n2", 5, 10}, {"n2", 11, 20}, +%% {"n3", 0, 21, ?RING_END} +%% ]], +%% Used = [mk_shard(N, [B, E]) || {N, B, E} <- [ +%% {"n2", 21, ?RING_END}, +%% {"n3", 0, 10}, {"n3", 11, 20} +%% ]], +%% Res = lists:sort(get_shard_replacements_int(Unused, Used)), +%% % Notice that [0, 10] range can be replaced by spawning the [0, 4] and [5, +%% % 10] workers on n1 +%% Expect = [ +%% {[0, 10], [mk_shard("n2", [0, 4]), mk_shard("n2", [5, 10])]}, +%% {[11, 20], [mk_shard("n1", [11, 20]), mk_shard("n2", [11, 20])]}, +%% {[21, ?RING_END], [mk_shard("n1", [21, ?RING_END])]} +%% ], +%% ?assertEqual(Expect, Res). +%% +%% +%% mk_cnts(Ranges, NoNodes) -> +%% orddict:from_list([{Shard,nil} +%% || Shard <- +%% lists:flatten(lists:map( +%% fun(Range) -> +%% mk_shards(NoNodes,Range,[]) +%% end, Ranges))] +%% ). +%% +%% mk_shards(0,_Range,Shards) -> +%% Shards; +%% mk_shards(NoNodes,Range,Shards) -> +%% Name ="node-" ++ integer_to_list(NoNodes), +%% mk_shards(NoNodes-1,Range, [mk_shard(Name, Range) | Shards]). +%% +%% +%% mk_shard(Name, Range) -> +%% Node = list_to_atom(Name), +%% BName = list_to_binary(Name), +%% #shard{name = BName, node = Node, range = Range}. diff --git a/src/fabric/src/fabric_view_changes.erl b/src/fabric/src/fabric_view_changes.erl index febbd3169..3f684a3cc 100644 --- a/src/fabric/src/fabric_view_changes.erl +++ b/src/fabric/src/fabric_view_changes.erl @@ -637,184 +637,184 @@ increment_changes_epoch() -> application:set_env(fabric, changes_epoch, os:timestamp()). -unpack_seq_setup() -> - meck:new(mem3), - meck:new(fabric_view), - meck:expect(mem3, get_shard, fun(_, _, _) -> {ok, #shard{}} end), - meck:expect(fabric_ring, is_progress_possible, fun(_) -> true end), - ok. - - -unpack_seqs_test_() -> - { - setup, - fun unpack_seq_setup/0, - fun (_) -> meck:unload() end, - [ - t_unpack_seqs() - ] - }. - - -t_unpack_seqs() -> - ?_test(begin - % BigCouch 0.3 style. - assert_shards("23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA"), - - % BigCouch 0.4 style. - assert_shards([23423,<<"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA">>]), - - % BigCouch 0.4 style (as string). - assert_shards("[23423,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - assert_shards("[23423 ,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - assert_shards("[23423, \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - assert_shards("[23423 , \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - - % with internal hypen - assert_shards("651-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" - "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" - "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"), - assert_shards([651,"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" - "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" - "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"]), - - % CouchDB 1.2 style - assert_shards("\"23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"") - end). - - -assert_shards(Packed) -> - ?assertMatch([{#shard{},_}|_], unpack_seqs(Packed, <<"foo">>)). - - -find_replacements_test() -> - % None of the workers are in the live list of shard but there is a - % replacement on n3 for the full range. It should get picked instead of - % the two smaller one on n2. - Workers1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), - AllShards1 = [ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10), - mk_shard("n3", 0, ?RING_END) - ], - {WorkersRes1, Dead1, Reps1} = find_replacements(Workers1, AllShards1), - ?assertEqual([], WorkersRes1), - ?assertEqual(Workers1, Dead1), - ?assertEqual([mk_shard("n3", 0, ?RING_END)], Reps1), - - % None of the workers are in the live list of shards and there is a - % split replacement from n2 (range [0, 10] replaced with [0, 4], [5, 10]) - Workers2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), - AllShards2 = [ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10) - ], - {WorkersRes2, Dead2, Reps2} = find_replacements(Workers2, AllShards2), - ?assertEqual([], WorkersRes2), - ?assertEqual(Workers2, Dead2), - ?assertEqual([ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10) - ], lists:sort(Reps2)), - - % One worker is available and one needs to be replaced. Replacement will be - % from two split shards - Workers3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), - AllShards3 = [ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10), - mk_shard("n2", 11, ?RING_END) - ], - {WorkersRes3, Dead3, Reps3} = find_replacements(Workers3, AllShards3), - ?assertEqual(mk_workers([{"n2", 11, ?RING_END}]), WorkersRes3), - ?assertEqual(mk_workers([{"n1", 0, 10}]), Dead3), - ?assertEqual([ - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10) - ], lists:sort(Reps3)), - - % All workers are available. Make sure they are not killed even if there is - % a longer (single) shard to replace them. - Workers4 = mk_workers([{"n1", 0, 10}, {"n1", 11, ?RING_END}]), - AllShards4 = [ - mk_shard("n1", 0, 10), - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10), - mk_shard("n3", 0, ?RING_END) - ], - {WorkersRes4, Dead4, Reps4} = find_replacements(Workers4, AllShards4), - ?assertEqual(Workers4, WorkersRes4), - ?assertEqual([], Dead4), - ?assertEqual([], Reps4). - - -mk_workers(NodesRanges) -> - mk_workers(NodesRanges, nil). - -mk_workers(NodesRanges, Val) -> - orddict:from_list([{mk_shard(N, B, E), Val} || {N, B, E} <- NodesRanges]). - - -mk_shard(Name, B, E) -> - Node = list_to_atom(Name), - BName = list_to_binary(Name), - #shard{name = BName, node = Node, range = [B, E]}. - - -find_split_shard_replacements_test() -> - % One worker is can be replaced and one can't - Dead1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), - Shards1 = [ - mk_shard("n1", 0, 4), - mk_shard("n1", 5, 10), - mk_shard("n3", 11, ?RING_END) - ], - {Workers1, ShardsLeft1} = find_split_shard_replacements(Dead1, Shards1), - ?assertEqual(mk_workers([{"n1", 0, 4}, {"n1", 5, 10}], 42), Workers1), - ?assertEqual([mk_shard("n3", 11, ?RING_END)], ShardsLeft1), - - % All workers can be replaced - one by 1 shard, another by 3 smaller shards - Dead2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), - Shards2 = [ - mk_shard("n1", 0, 10), - mk_shard("n2", 11, 12), - mk_shard("n2", 13, 14), - mk_shard("n2", 15, ?RING_END) - ], - {Workers2, ShardsLeft2} = find_split_shard_replacements(Dead2, Shards2), - ?assertEqual(mk_workers([ - {"n1", 0, 10}, - {"n2", 11, 12}, - {"n2", 13, 14}, - {"n2", 15, ?RING_END} - ], 42), Workers2), - ?assertEqual([], ShardsLeft2), - - % No workers can be replaced. Ranges match but they are on different nodes - Dead3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), - Shards3 = [ - mk_shard("n2", 0, 10), - mk_shard("n3", 11, ?RING_END) - ], - {Workers3, ShardsLeft3} = find_split_shard_replacements(Dead3, Shards3), - ?assertEqual([], Workers3), - ?assertEqual(Shards3, ShardsLeft3). +%% unpack_seq_setup() -> +%% meck:new(mem3), +%% meck:new(fabric_view), +%% meck:expect(mem3, get_shard, fun(_, _, _) -> {ok, #shard{}} end), +%% meck:expect(fabric_ring, is_progress_possible, fun(_) -> true end), +%% ok. +%% +%% +%% unpack_seqs_test_() -> +%% { +%% setup, +%% fun unpack_seq_setup/0, +%% fun (_) -> meck:unload() end, +%% [ +%% t_unpack_seqs() +%% ] +%% }. +%% +%% +%% t_unpack_seqs() -> +%% ?_test(begin +%% % BigCouch 0.3 style. +%% assert_shards("23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA"), +%% +%% % BigCouch 0.4 style. +%% assert_shards([23423,<<"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA">>]), +%% +%% % BigCouch 0.4 style (as string). +%% assert_shards("[23423,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% assert_shards("[23423 ,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% assert_shards("[23423, \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% assert_shards("[23423 , \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% +%% % with internal hypen +%% assert_shards("651-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" +%% "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" +%% "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"), +%% assert_shards([651,"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" +%% "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" +%% "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"]), +%% +%% % CouchDB 1.2 style +%% assert_shards("\"23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"") +%% end). +%% +%% +%% assert_shards(Packed) -> +%% ?assertMatch([{#shard{},_}|_], unpack_seqs(Packed, <<"foo">>)). +%% +%% +%% find_replacements_test() -> +%% % None of the workers are in the live list of shard but there is a +%% % replacement on n3 for the full range. It should get picked instead of +%% % the two smaller one on n2. +%% Workers1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), +%% AllShards1 = [ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10), +%% mk_shard("n3", 0, ?RING_END) +%% ], +%% {WorkersRes1, Dead1, Reps1} = find_replacements(Workers1, AllShards1), +%% ?assertEqual([], WorkersRes1), +%% ?assertEqual(Workers1, Dead1), +%% ?assertEqual([mk_shard("n3", 0, ?RING_END)], Reps1), +%% +%% % None of the workers are in the live list of shards and there is a +%% % split replacement from n2 (range [0, 10] replaced with [0, 4], [5, 10]) +%% Workers2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), +%% AllShards2 = [ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10) +%% ], +%% {WorkersRes2, Dead2, Reps2} = find_replacements(Workers2, AllShards2), +%% ?assertEqual([], WorkersRes2), +%% ?assertEqual(Workers2, Dead2), +%% ?assertEqual([ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10) +%% ], lists:sort(Reps2)), +%% +%% % One worker is available and one needs to be replaced. Replacement will be +%% % from two split shards +%% Workers3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), +%% AllShards3 = [ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10), +%% mk_shard("n2", 11, ?RING_END) +%% ], +%% {WorkersRes3, Dead3, Reps3} = find_replacements(Workers3, AllShards3), +%% ?assertEqual(mk_workers([{"n2", 11, ?RING_END}]), WorkersRes3), +%% ?assertEqual(mk_workers([{"n1", 0, 10}]), Dead3), +%% ?assertEqual([ +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10) +%% ], lists:sort(Reps3)), +%% +%% % All workers are available. Make sure they are not killed even if there is +%% % a longer (single) shard to replace them. +%% Workers4 = mk_workers([{"n1", 0, 10}, {"n1", 11, ?RING_END}]), +%% AllShards4 = [ +%% mk_shard("n1", 0, 10), +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10), +%% mk_shard("n3", 0, ?RING_END) +%% ], +%% {WorkersRes4, Dead4, Reps4} = find_replacements(Workers4, AllShards4), +%% ?assertEqual(Workers4, WorkersRes4), +%% ?assertEqual([], Dead4), +%% ?assertEqual([], Reps4). +%% +%% +%% mk_workers(NodesRanges) -> +%% mk_workers(NodesRanges, nil). +%% +%% mk_workers(NodesRanges, Val) -> +%% orddict:from_list([{mk_shard(N, B, E), Val} || {N, B, E} <- NodesRanges]). +%% +%% +%% mk_shard(Name, B, E) -> +%% Node = list_to_atom(Name), +%% BName = list_to_binary(Name), +%% #shard{name = BName, node = Node, range = [B, E]}. +%% +%% +%% find_split_shard_replacements_test() -> +%% % One worker is can be replaced and one can't +%% Dead1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), +%% Shards1 = [ +%% mk_shard("n1", 0, 4), +%% mk_shard("n1", 5, 10), +%% mk_shard("n3", 11, ?RING_END) +%% ], +%% {Workers1, ShardsLeft1} = find_split_shard_replacements(Dead1, Shards1), +%% ?assertEqual(mk_workers([{"n1", 0, 4}, {"n1", 5, 10}], 42), Workers1), +%% ?assertEqual([mk_shard("n3", 11, ?RING_END)], ShardsLeft1), +%% +%% % All workers can be replaced - one by 1 shard, another by 3 smaller shards +%% Dead2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), +%% Shards2 = [ +%% mk_shard("n1", 0, 10), +%% mk_shard("n2", 11, 12), +%% mk_shard("n2", 13, 14), +%% mk_shard("n2", 15, ?RING_END) +%% ], +%% {Workers2, ShardsLeft2} = find_split_shard_replacements(Dead2, Shards2), +%% ?assertEqual(mk_workers([ +%% {"n1", 0, 10}, +%% {"n2", 11, 12}, +%% {"n2", 13, 14}, +%% {"n2", 15, ?RING_END} +%% ], 42), Workers2), +%% ?assertEqual([], ShardsLeft2), +%% +%% % No workers can be replaced. Ranges match but they are on different nodes +%% Dead3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), +%% Shards3 = [ +%% mk_shard("n2", 0, 10), +%% mk_shard("n3", 11, ?RING_END) +%% ], +%% {Workers3, ShardsLeft3} = find_split_shard_replacements(Dead3, Shards3), +%% ?assertEqual([], Workers3), +%% ?assertEqual(Shards3, ShardsLeft3). diff --git a/src/fabric/test/eunit/fabric_rpc_purge_tests.erl b/src/fabric/test/eunit/fabric_rpc_purge_tests.erl deleted file mode 100644 index 6db6a70aa..000000000 --- a/src/fabric/test/eunit/fabric_rpc_purge_tests.erl +++ /dev/null @@ -1,307 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(fabric_rpc_purge_tests). - - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). - - --define(TDEF(A), {A, fun A/1}). - -% TODO: Add tests: -% - filter some updates -% - allow for an update that was filtered by a node -% - ignore lagging nodes - -main_test_() -> - { - setup, - spawn, - fun setup_all/0, - fun teardown_all/1, - [ - { - foreach, - fun setup_no_purge/0, - fun teardown_no_purge/1, - lists:map(fun wrap/1, [ - ?TDEF(t_no_purge_no_filter) - ]) - }, - { - foreach, - fun setup_single_purge/0, - fun teardown_single_purge/1, - lists:map(fun wrap/1, [ - ?TDEF(t_filter), - ?TDEF(t_filter_unknown_node), - ?TDEF(t_filter_local_node), - ?TDEF(t_no_filter_old_node), - ?TDEF(t_no_filter_different_node), - ?TDEF(t_no_filter_after_repl) - ]) - }, - { - foreach, - fun setup_multi_purge/0, - fun teardown_multi_purge/1, - lists:map(fun wrap/1, [ - ?TDEF(t_filter), - ?TDEF(t_filter_unknown_node), - ?TDEF(t_filter_local_node), - ?TDEF(t_no_filter_old_node), - ?TDEF(t_no_filter_different_node), - ?TDEF(t_no_filter_after_repl) - ]) - } - ] - }. - - -setup_all() -> - test_util:start_couch(). - - -teardown_all(Ctx) -> - test_util:stop_couch(Ctx). - - -setup_no_purge() -> - {ok, Db} = create_db(), - populate_db(Db), - couch_db:name(Db). - - -teardown_no_purge(DbName) -> - ok = couch_server:delete(DbName, []). - - -setup_single_purge() -> - DbName = setup_no_purge(), - DocId = <<"0003">>, - {ok, OldDoc} = open_doc(DbName, DocId), - purge_doc(DbName, DocId), - {DbName, DocId, OldDoc, 1}. - - -teardown_single_purge({DbName, _, _, _}) -> - teardown_no_purge(DbName). - - -setup_multi_purge() -> - DbName = setup_no_purge(), - DocId = <<"0003">>, - {ok, OldDoc} = open_doc(DbName, DocId), - lists:foreach(fun(I) -> - PDocId = iolist_to_binary(io_lib:format("~4..0b", [I])), - purge_doc(DbName, PDocId) - end, lists:seq(1, 5)), - {DbName, DocId, OldDoc, 3}. - - -teardown_multi_purge(Ctx) -> - teardown_single_purge(Ctx). - - -t_no_purge_no_filter(DbName) -> - DocId = <<"0003">>, - - {ok, OldDoc} = open_doc(DbName, DocId), - NewDoc = create_update(OldDoc, 2), - - rpc_update_doc(DbName, NewDoc), - - {ok, CurrDoc} = open_doc(DbName, DocId), - ?assert(CurrDoc /= OldDoc), - ?assert(CurrDoc == NewDoc). - - -t_filter({DbName, DocId, OldDoc, _PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, 0), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)). - - -t_filter_unknown_node({DbName, DocId, OldDoc, _PSeq}) -> - % Unknown nodes are assumed to start at PurgeSeq = 0 - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, 0), - - {Pos, [Rev | _]} = OldDoc#doc.revs, - RROpt = {read_repair, [{'blargh@127.0.0.1', [{Pos, Rev}]}]}, - rpc_update_doc(DbName, OldDoc, [RROpt]), - - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)). - - -t_no_filter_old_node({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - % The random UUID is to generate a badarg exception when - % we try and convert it to an existing atom. - create_purge_checkpoint(DbName, 0, couch_uuids:random()), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -t_no_filter_different_node({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - % Create a valid purge for a different node - TgtNode = list_to_binary(atom_to_list('notfoo@127.0.0.1')), - create_purge_checkpoint(DbName, 0, TgtNode), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -t_filter_local_node({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - % Create a valid purge for a different node - TgtNode = list_to_binary(atom_to_list('notfoo@127.0.0.1')), - create_purge_checkpoint(DbName, 0, TgtNode), - - % Add a local node rev to the list of node revs. It should - % be filtered out - {Pos, [Rev | _]} = OldDoc#doc.revs, - RROpts = [{read_repair, [ - {tgt_node(), [{Pos, Rev}]}, - {node(), [{1, <<"123">>}]} - ]}], - rpc_update_doc(DbName, OldDoc, RROpts), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -t_no_filter_after_repl({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -wrap({Name, Fun}) -> - fun(Arg) -> - {timeout, 60, {atom_to_list(Name), fun() -> - process_flag(trap_exit, true), - Fun(Arg) - end}} - end. - - -create_db() -> - DbName = ?tempdb(), - couch_db:create(DbName, [?ADMIN_CTX]). - - -populate_db(Db) -> - Docs = lists:map(fun(Idx) -> - DocId = lists:flatten(io_lib:format("~4..0b", [Idx])), - #doc{ - id = list_to_binary(DocId), - body = {[{<<"int">>, Idx}, {<<"vsn">>, 2}]} - } - end, lists:seq(1, 100)), - {ok, _} = couch_db:update_docs(Db, Docs). - - -open_doc(DbName, DocId) -> - couch_util:with_db(DbName, fun(Db) -> - couch_db:open_doc(Db, DocId, []) - end). - - -create_update(Doc, NewVsn) -> - #doc{ - id = DocId, - revs = {Pos, [Rev | _] = Revs}, - body = {Props} - } = Doc, - NewProps = lists:keyreplace(<<"vsn">>, 1, Props, {<<"vsn">>, NewVsn}), - NewRev = couch_hash:md5_hash(term_to_binary({DocId, Rev, {NewProps}})), - Doc#doc{ - revs = {Pos + 1, [NewRev | Revs]}, - body = {NewProps} - }. - - -purge_doc(DbName, DocId) -> - {ok, Doc} = open_doc(DbName, DocId), - {Pos, [Rev | _]} = Doc#doc.revs, - PInfo = {couch_uuids:random(), DocId, [{Pos, Rev}]}, - Resp = couch_util:with_db(DbName, fun(Db) -> - couch_db:purge_docs(Db, [PInfo], []) - end), - ?assertEqual({ok, [{ok, [{Pos, Rev}]}]}, Resp). - - -create_purge_checkpoint(DbName, PurgeSeq) -> - create_purge_checkpoint(DbName, PurgeSeq, tgt_node_bin()). - - -create_purge_checkpoint(DbName, PurgeSeq, TgtNode) when is_binary(TgtNode) -> - Resp = couch_util:with_db(DbName, fun(Db) -> - SrcUUID = couch_db:get_uuid(Db), - TgtUUID = couch_uuids:random(), - CPDoc = #doc{ - id = mem3_rep:make_purge_id(SrcUUID, TgtUUID), - body = {[ - {<<"target_node">>, TgtNode}, - {<<"purge_seq">>, PurgeSeq} - ]} - }, - couch_db:update_docs(Db, [CPDoc], []) - end), - ?assertMatch({ok, [_]}, Resp). - - -rpc_update_doc(DbName, Doc) -> - {Pos, [Rev | _]} = Doc#doc.revs, - RROpt = {read_repair, [{tgt_node(), [{Pos, Rev}]}]}, - rpc_update_doc(DbName, Doc, [RROpt]). - - -rpc_update_doc(DbName, Doc, Opts) -> - Ref = erlang:make_ref(), - put(rexi_from, {self(), Ref}), - fabric_rpc:update_docs(DbName, [Doc], Opts), - Reply = test_util:wait(fun() -> - receive - {Ref, Reply} -> - Reply - after 0 -> - wait - end - end), - ?assertEqual({ok, []}, Reply). - - -tgt_node() -> - 'foo@127.0.0.1'. - - -tgt_node_bin() -> - iolist_to_binary(atom_to_list(tgt_node())). -- cgit v1.2.1 From fb8f2a0aeecdeaf6e0841df18e51d81a2c257096 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Jun 2019 13:29:33 -0500 Subject: Initial fabric2 implementation on FoundationDB This provides a base implementation of a fabric API backed by FoundationDB. While a lot of functionality is provided there are a number of places that still require work. An incomplete list includes: 1. Document bodies are currently a single key/value 2. Attachments are stored as a range of key/value pairs 3. There is no support for indexing 4. Request size limits are not enforced directly 5. Auth is still backed by a legacy CouchDB database 6. No support for before_doc_update/after_doc_read 7. Various implementation shortcuts need to be expanded for full API support. --- FDB_NOTES.md | 57 ++ src/couch/src/couch_att.erl | 661 ++++++++----------- src/couch/src/couch_doc.erl | 11 + src/fabric/src/fabric.app.src | 8 +- src/fabric/src/fabric2.hrl | 66 ++ src/fabric/src/fabric2_app.erl | 32 + src/fabric/src/fabric2_db.erl | 1299 +++++++++++++++++++++++++++++++++++++ src/fabric/src/fabric2_events.erl | 84 +++ src/fabric/src/fabric2_fdb.erl | 1187 +++++++++++++++++++++++++++++++++ src/fabric/src/fabric2_server.erl | 104 +++ src/fabric/src/fabric2_sup.erl | 47 ++ src/fabric/src/fabric2_txids.erl | 144 ++++ src/fabric/src/fabric2_util.erl | 203 ++++++ 13 files changed, 3502 insertions(+), 401 deletions(-) create mode 100644 FDB_NOTES.md create mode 100644 src/fabric/src/fabric2.hrl create mode 100644 src/fabric/src/fabric2_app.erl create mode 100644 src/fabric/src/fabric2_db.erl create mode 100644 src/fabric/src/fabric2_events.erl create mode 100644 src/fabric/src/fabric2_fdb.erl create mode 100644 src/fabric/src/fabric2_server.erl create mode 100644 src/fabric/src/fabric2_sup.erl create mode 100644 src/fabric/src/fabric2_txids.erl create mode 100644 src/fabric/src/fabric2_util.erl diff --git a/FDB_NOTES.md b/FDB_NOTES.md new file mode 100644 index 000000000..c0cdc8cc2 --- /dev/null +++ b/FDB_NOTES.md @@ -0,0 +1,57 @@ +Things of Note +=== + + +1. If a replication sends us two revisions A and B where one is an + ancestor of the other, we likely have divergent behavior. However, + this should never happen In Theory. + +2. Multiple updates to the same document in a _bulk_docs (or if they + just happen to be in the same update batch in non-fdb CouchDB) + we likely have subtly different behavior. + +3. I'm relying on repeated reads in an fdb transaction to be "cheap" + in that the reads would be cached in the fdb_transaction object. + This needs to be checked for certainty but that appeared to + be how things behaved in testing. + +4. When attempting to create a doc from scratch in an interacitve_edit + update, with revisions specified *and* attachment stubs, the reported + error is now a conflict. Previously the missing_stubs error was + raised earlier. + +5. There may be a difference in behavior if a) there are no VDU functions + set on a db and no design documents in a batch. This is because in + this situation we don't run the prep_and_validate code on pre-fdb + CouchDB. The new code always checks stubs before merging revision trees. + I'm sure the old way would fail somehow, but it would fail further on + which means we may have failed with a different reason (conflict, etc) + before we got to the next place we check for missing stubs. + +6. For multi-doc updates we'll need to investigate user versions on + versionstamps within a transaction. Also this likely prevents the + ability to have multiple updates to the same doc in a single + _bulk_docs transaction + +7. Document body storage needs to be implemented beyond the single + key/value approach. + +8. We'll want to look at how we currently apply open options to individual + elements of an open_revs call. Might turn out that we have to grab a + full FDI even if we could look up a rev directly. (i.e., revs_info + would require us having the entire FDI, however it'd be wasteful to return + all of that in an open_revs call, but bug compatibility ftw!) + +9. Is it possible that a server_admin can delete a db without being able + to open it? If so that's probably changed behavior. + +10. All docs on large active databases might be a thing getting the doc + count. If we allow range requests up to 5s, and we continue to return + the doc count total we may have to play games with snapshot reads on + the doc count key or else it'll whack any _all_docs range requests + +11. Revision infos need to track their size f we want to maintain a database + size counter we'll want to store the size of a given doc body for each + revision so that we don't have to read the old body when updating the tree. + +12. Update sequences do not yet include an incarnation value. \ No newline at end of file diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index a24de21d6..0dc5fa56b 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -29,7 +29,7 @@ -export([ size_info/1, to_disk_term/1, - from_disk_term/2 + from_disk_term/3 ]). -export([ @@ -38,18 +38,13 @@ ]). -export([ - flush/2, + flush/3, foldl/3, range_foldl/5, foldl_decode/3, to_binary/1 ]). --export([ - upgrade/1, - downgrade/1 -]). - -export([ max_attachment_size/0, validate_attachment_size/3 @@ -58,137 +53,61 @@ -compile(nowarn_deprecated_type). -export_type([att/0]). --include_lib("couch/include/couch_db.hrl"). - - -%% Legacy attachment record. This is going to be phased out by the new proplist -%% based structure. It's needed for now to allow code to perform lazy upgrades -%% while the patch is rolled out to the cluster. Attachments passed as records -%% will remain so until they are required to be represented as property lists. -%% Once this has been widely deployed, this record will be removed entirely and -%% property lists will be the main format. --record(att, { - name :: binary(), - type :: binary(), - att_len :: non_neg_integer(), - - %% length of the attachment in its identity form - %% (that is, without a content encoding applied to it) - %% differs from att_len when encoding /= identity - disk_len :: non_neg_integer(), - - md5 = <<>> :: binary(), - revpos = 0 :: non_neg_integer(), - data :: stub | follows | binary() | {any(), any()} | - {follows, pid(), reference()} | fun(() -> binary()), - - %% Encoding of the attachment - %% currently supported values are: - %% identity, gzip - %% additional values to support in the future: - %% deflate, compress - encoding = identity :: identity | gzip -}). - - -%% Extensible Attachment Type -%% -%% The following types describe the known properties for attachment fields -%% encoded as property lists to allow easier upgrades. Values not in this list -%% should be accepted at runtime but should be treated as opaque data as might -%% be used by upgraded code. If you plan on operating on new data, please add -%% an entry here as documentation. - - -%% The name of the attachment is also used as the mime-part name for file -%% downloads. These must be unique per document. --type name_prop() :: {name, binary()}. - - -%% The mime type of the attachment. This does affect compression of certain -%% attachments if the type is found to be configured as a compressable type. -%% This is commonly reserved for text/* types but could include other custom -%% cases as well. See definition and use of couch_util:compressable_att_type/1. --type type_prop() :: {type, binary()}. - - -%% The attachment length is similar to disk-length but ignores additional -%% encoding that may have occurred. --type att_len_prop() :: {att_len, non_neg_integer()}. - - -%% The size of the attachment as stored in a disk stream. --type disk_len_prop() :: {disk_len, non_neg_integer()}. - - -%% This is a digest of the original attachment data as uploaded by the client. -%% it's useful for checking validity of contents against other attachment data -%% as well as quick digest computation of the enclosing document. --type md5_prop() :: {md5, binary()}. - --type revpos_prop() :: {revpos, 0}. +-include_lib("couch/include/couch_db.hrl"). -%% This field is currently overloaded with just about everything. The -%% {any(), any()} type is just there until I have time to check the actual -%% values expected. Over time this should be split into more than one property -%% to allow simpler handling. --type data_prop() :: { - data, stub | follows | binary() | {any(), any()} | - {follows, pid(), reference()} | fun(() -> binary()) -}. +-define(CURRENT_ATT_FORMAT, 0). -%% We will occasionally compress our data. See type_prop() for more information -%% on when this happens. --type encoding_prop() :: {encoding, identity | gzip}. +-type prop_name() :: + name | + type | + att_len | + disk_len | + md5 | + revpos | + data | + encoding. --type attachment() :: [ - name_prop() | type_prop() | - att_len_prop() | disk_len_prop() | - md5_prop() | revpos_prop() | - data_prop() | encoding_prop() -]. +-type data_prop_type() :: + {loc, #{}, binary(), binary()} | + stub | + follows | + binary() | + {follows, pid(), reference()} | + fun(() -> binary()). --type disk_att_v1() :: { - Name :: binary(), - Type :: binary(), - Sp :: any(), - AttLen :: non_neg_integer(), - RevPos :: non_neg_integer(), - Md5 :: binary() -}. --type disk_att_v2() :: { - Name :: binary(), - Type :: binary(), - Sp :: any(), - AttLen :: non_neg_integer(), - DiskLen :: non_neg_integer(), - RevPos :: non_neg_integer(), - Md5 :: binary(), - Enc :: identity | gzip +-type att() :: #{ + name := binary(), + type := binary(), + att_len := non_neg_integer() | undefined, + disk_len := non_neg_integer() | undefined, + md5 := binary() | undefined, + revpos := non_neg_integer(), + data := data_prop_type(), + encoding := identity | gzip | undefined, + headers := [{binary(), binary()}] | undefined }. --type disk_att_v3() :: {Base :: tuple(), Extended :: list()}. - --type disk_att() :: disk_att_v1() | disk_att_v2() | disk_att_v3(). - --type att() :: #att{} | attachment() | disk_att(). new() -> - %% We construct a record by default for compatability. This will be - %% upgraded on demand. A subtle effect this has on all attachments - %% constructed via new is that it will pick up the proper defaults - %% from the #att record definition given above. Newer properties do - %% not support special default values and will all be treated as - %% undefined. - #att{}. + #{ + name => <<>>, + type => <<>>, + att_len => undefined, + disk_len => undefined, + md5 => undefined, + revpos => 0, + data => undefined, + encoding => undefined, + headers => undefined + }. --spec new([{atom(), any()}]) -> att(). +-spec new([{prop_name(), any()}]) -> att(). new(Props) -> store(Props, new()). @@ -197,71 +116,28 @@ new(Props) -> (atom(), att()) -> any(). fetch(Fields, Att) when is_list(Fields) -> [fetch(Field, Att) || Field <- Fields]; -fetch(Field, Att) when is_list(Att) -> - case lists:keyfind(Field, 1, Att) of - {Field, Value} -> Value; - false -> undefined - end; -fetch(name, #att{name = Name}) -> - Name; -fetch(type, #att{type = Type}) -> - Type; -fetch(att_len, #att{att_len = AttLen}) -> - AttLen; -fetch(disk_len, #att{disk_len = DiskLen}) -> - DiskLen; -fetch(md5, #att{md5 = Digest}) -> - Digest; -fetch(revpos, #att{revpos = RevPos}) -> - RevPos; -fetch(data, #att{data = Data}) -> - Data; -fetch(encoding, #att{encoding = Encoding}) -> - Encoding; -fetch(_, _) -> - undefined. +fetch(Field, Att) -> + maps:get(Field, Att). -spec store([{atom(), any()}], att()) -> att(). store(Props, Att0) -> lists:foldl(fun({Field, Value}, Att) -> - store(Field, Value, Att) + maps:update(Field, Value, Att) end, Att0, Props). --spec store(atom(), any(), att()) -> att(). -store(Field, undefined, Att) when is_list(Att) -> - lists:keydelete(Field, 1, Att); -store(Field, Value, Att) when is_list(Att) -> - lists:keystore(Field, 1, Att, {Field, Value}); -store(name, Name, Att) -> - Att#att{name = Name}; -store(type, Type, Att) -> - Att#att{type = Type}; -store(att_len, AttLen, Att) -> - Att#att{att_len = AttLen}; -store(disk_len, DiskLen, Att) -> - Att#att{disk_len = DiskLen}; -store(md5, Digest, Att) -> - Att#att{md5 = Digest}; -store(revpos, RevPos, Att) -> - Att#att{revpos = RevPos}; -store(data, Data, Att) -> - Att#att{data = Data}; -store(encoding, Encoding, Att) -> - Att#att{encoding = Encoding}; store(Field, Value, Att) -> - store(Field, Value, upgrade(Att)). + maps:update(Field, Value, Att). -spec transform(atom(), fun(), att()) -> att(). transform(Field, Fun, Att) -> - NewValue = Fun(fetch(Field, Att)), - store(Field, NewValue, Att). + maps:update_with(Field, Fun, Att). -is_stub(Att) -> - stub == fetch(data, Att). +is_stub(#{data := stub}) -> true; +is_stub(#{}) -> false. %% merge_stubs takes all stub attachments and replaces them with on disk @@ -275,8 +151,7 @@ merge_stubs(MemAtts, DiskAtts) -> merge_stubs(MemAtts, OnDisk, []). -%% restore spec when R14 support is dropped -%% -spec merge_stubs([att()], dict:dict(), [att()]) -> [att()]. +-spec merge_stubs([att()], dict:dict(), [att()]) -> [att()]. merge_stubs([Att | Rest], OnDisk, Merged) -> case fetch(data, Att) of stub -> @@ -308,14 +183,8 @@ size_info([]) -> {ok, []}; size_info(Atts) -> Info = lists:map(fun(Att) -> - AttLen = fetch(att_len, Att), - case fetch(data, Att) of - {stream, StreamEngine} -> - {ok, SPos} = couch_stream:to_disk_term(StreamEngine), - {SPos, AttLen}; - {_, SPos} -> - {SPos, AttLen} - end + [{loc, _Db, _DocId, AttId}, AttLen] = fetch([data, att_len], Att), + {AttId, AttLen} end, Atts), {ok, lists:usort(Info)}. @@ -324,89 +193,44 @@ size_info(Atts) -> %% old format when possible. This should help make the attachment lazy upgrade %% as safe as possible, avoiding the need for complicated disk versioning %% schemes. -to_disk_term(#att{} = Att) -> - {stream, StreamEngine} = fetch(data, Att), - {ok, Sp} = couch_stream:to_disk_term(StreamEngine), - { +to_disk_term(Att) -> + {loc, #{}, _DocId, AttId} = fetch(data, Att), + {?CURRENT_ATT_FORMAT, { fetch(name, Att), fetch(type, Att), - Sp, + AttId, fetch(att_len, Att), fetch(disk_len, Att), fetch(revpos, Att), fetch(md5, Att), - fetch(encoding, Att) - }; -to_disk_term(Att) -> - BaseProps = [name, type, data, att_len, disk_len, revpos, md5, encoding], - {Extended, Base} = lists:foldl( - fun - (data, {Props, Values}) -> - case lists:keytake(data, 1, Props) of - {value, {_, {stream, StreamEngine}}, Other} -> - {ok, Sp} = couch_stream:to_disk_term(StreamEngine), - {Other, [Sp | Values]}; - {value, {_, Value}, Other} -> - {Other, [Value | Values]}; - false -> - {Props, [undefined | Values]} - end; - (Key, {Props, Values}) -> - case lists:keytake(Key, 1, Props) of - {value, {_, Value}, Other} -> {Other, [Value | Values]}; - false -> {Props, [undefined | Values]} - end - end, - {Att, []}, - BaseProps - ), - {list_to_tuple(lists:reverse(Base)), Extended}. - - -%% The new disk term format is a simple wrapper around the legacy format. Base -%% properties will remain in a tuple while the new fields and possibly data from -%% future extensions will be stored in a list of atom/value pairs. While this is -%% slightly less efficient, future work should be able to make use of -%% compression to remove these sorts of common bits (block level compression -%% with something like a shared dictionary that is checkpointed every now and -%% then). -from_disk_term(StreamSrc, {Base, Extended}) - when is_tuple(Base), is_list(Extended) -> - store(Extended, from_disk_term(StreamSrc, Base)); -from_disk_term(StreamSrc, {Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) -> - {ok, Stream} = open_stream(StreamSrc, Sp), - #att{ - name=Name, - type=Type, - att_len=AttLen, - disk_len=DiskLen, - md5=Md5, - revpos=RevPos, - data={stream, Stream}, - encoding=upgrade_encoding(Enc) - }; -from_disk_term(StreamSrc, {Name,Type,Sp,AttLen,RevPos,Md5}) -> - {ok, Stream} = open_stream(StreamSrc, Sp), - #att{ - name=Name, - type=Type, - att_len=AttLen, - disk_len=AttLen, - md5=Md5, - revpos=RevPos, - data={stream, Stream} - }; -from_disk_term(StreamSrc, {Name,{Type,Sp,AttLen}}) -> - {ok, Stream} = open_stream(StreamSrc, Sp), - #att{ - name=Name, - type=Type, - att_len=AttLen, - disk_len=AttLen, - md5= <<>>, - revpos=0, - data={stream, Stream} - }. + fetch(encoding, Att), + fetch(headers, Att) + }}. + + +from_disk_term(#{} = Db, DocId, {?CURRENT_ATT_FORMAT, Props}) -> + { + Name, + Type, + AttId, + AttLen, + DiskLen, + RevPos, + Md5, + Encoding, + Headers + } = Props, + new([ + {name, Name}, + {type, Type}, + {data, {loc, Db#{tx := undefined}, DocId, AttId}}, + {att_len, AttLen}, + {disk_len, DiskLen}, + {revpos, RevPos}, + {md5, Md5}, + {encoding, Encoding}, + {headers, Headers} + ]). %% from_json reads in embedded JSON attachments and creates usable attachment @@ -433,8 +257,12 @@ stub_from_json(Att, Props) -> %% json object. See merge_stubs/3 for the stub check. RevPos = couch_util:get_value(<<"revpos">>, Props), store([ - {md5, Digest}, {revpos, RevPos}, {data, stub}, {disk_len, DiskLen}, - {att_len, EncodedLen}, {encoding, Encoding} + {data, stub}, + {disk_len, DiskLen}, + {att_len, EncodedLen}, + {revpos, RevPos}, + {md5, Digest}, + {encoding, Encoding} ], Att). @@ -443,8 +271,12 @@ follow_from_json(Att, Props) -> Digest = digest_from_json(Props), RevPos = couch_util:get_value(<<"revpos">>, Props, 0), store([ - {md5, Digest}, {revpos, RevPos}, {data, follows}, {disk_len, DiskLen}, - {att_len, EncodedLen}, {encoding, Encoding} + {data, follows}, + {disk_len, DiskLen}, + {att_len, EncodedLen}, + {revpos, RevPos}, + {md5, Digest}, + {encoding, Encoding} ], Att). @@ -455,8 +287,10 @@ inline_from_json(Att, Props) -> Length = size(Data), RevPos = couch_util:get_value(<<"revpos">>, Props, 0), store([ - {data, Data}, {revpos, RevPos}, {disk_len, Length}, - {att_len, Length} + {data, Data}, + {disk_len, Length}, + {att_len, Length}, + {revpos, RevPos} ], Att) catch _:_ -> @@ -466,7 +300,6 @@ inline_from_json(Att, Props) -> end. - encoded_lengths_from_json(Props) -> Len = couch_util:get_value(<<"length">>, Props), case couch_util:get_value(<<"encoding">>, Props) of @@ -488,9 +321,17 @@ digest_from_json(Props) -> to_json(Att, OutputData, DataToFollow, ShowEncoding) -> - [Name, Data, DiskLen, AttLen, Enc, Type, RevPos, Md5] = fetch( - [name, data, disk_len, att_len, encoding, type, revpos, md5], Att - ), + #{ + name := Name, + type := Type, + data := Data, + disk_len := DiskLen, + att_len := AttLen, + revpos := RevPos, + md5 := Md5, + encoding := Encoding, + headers := Headers + } = Att, Props = [ {<<"content_type">>, Type}, {<<"revpos">>, RevPos} @@ -505,71 +346,74 @@ to_json(Att, OutputData, DataToFollow, ShowEncoding) -> DataToFollow -> [{<<"length">>, DiskLen}, {<<"follows">>, true}]; true -> - AttData = case Enc of + AttData = case Encoding of gzip -> zlib:gunzip(to_binary(Att)); identity -> to_binary(Att) end, [{<<"data">>, base64:encode(AttData)}] end, EncodingProps = if - ShowEncoding andalso Enc /= identity -> + ShowEncoding andalso Encoding /= identity -> [ - {<<"encoding">>, couch_util:to_binary(Enc)}, + {<<"encoding">>, couch_util:to_binary(Encoding)}, {<<"encoded_length">>, AttLen} ]; true -> [] end, - HeadersProp = case fetch(headers, Att) of + HeadersProp = case Headers of undefined -> []; Headers -> [{<<"headers">>, Headers}] end, {Name, {Props ++ DigestProp ++ DataProps ++ EncodingProps ++ HeadersProp}}. -flush(Db, Att) -> - flush_data(Db, fetch(data, Att), Att). +flush(Db, DocId, Att1) -> + Att2 = read_data(fetch(data, Att1), Att1), + [ + Data, + AttLen, + DiskLen, + ReqMd5, + Encoding + ] = fetch([data, att_len, disk_len, md5, encoding], Att2), + + % Eventually, we'll check if we can compress this + % attachment here and do so if possible. + + % If we were sent a gzip'ed attachment with no + % length data, we have to set it here. + Att3 = case AttLen of + undefined -> store(att_len, DiskLen, Att2); + _ -> Att2 + end, + + % If no encoding has been set, default to + % identity + Att4 = case Encoding of + undefined -> store(encoding, identity, Att3); + _ -> Att3 + end, + + case Data of + {loc, _, _, _} -> + % Already flushed + Att1; + _ when is_binary(Data) -> + IdentMd5 = get_identity_md5(Data, fetch(encoding, Att4)), + if ReqMd5 == undefined -> ok; true -> + couch_util:check_md5(IdentMd5, ReqMd5) + end, + Att5 = store(md5, IdentMd5, Att4), + fabric2_db:write_attachment(Db, DocId, Att5) + end. -flush_data(Db, Data, Att) when is_binary(Data) -> - couch_db:with_stream(Db, Att, fun(OutputStream) -> - couch_stream:write(OutputStream, Data) - end); -flush_data(Db, Fun, Att) when is_function(Fun) -> - AttName = fetch(name, Att), - MaxAttSize = max_attachment_size(), - case fetch(att_len, Att) of - undefined -> - couch_db:with_stream(Db, Att, fun(OutputStream) -> - % Fun(MaxChunkSize, WriterFun) must call WriterFun - % once for each chunk of the attachment, - Fun(4096, - % WriterFun({Length, Binary}, State) - % WriterFun({0, _Footers}, State) - % Called with Length == 0 on the last time. - % WriterFun returns NewState. - fun({0, Footers}, _Total) -> - F = mochiweb_headers:from_binary(Footers), - case mochiweb_headers:get_value("Content-MD5", F) of - undefined -> - ok; - Md5 -> - {md5, base64:decode(Md5)} - end; - ({Length, Chunk}, Total0) -> - Total = Total0 + Length, - validate_attachment_size(AttName, Total, MaxAttSize), - couch_stream:write(OutputStream, Chunk), - Total - end, 0) - end); - AttLen -> - validate_attachment_size(AttName, AttLen, MaxAttSize), - couch_db:with_stream(Db, Att, fun(OutputStream) -> - write_streamed_attachment(OutputStream, Fun, AttLen) - end) - end; -flush_data(Db, {follows, Parser, Ref}, Att) -> +read_data({loc, #{}, _DocId, _AttId}, Att) -> + % Attachment already written to fdb + Att; + +read_data({follows, Parser, Ref}, Att) -> ParserRef = erlang:monitor(process, Parser), Fun = fun() -> Parser ! {get_bytes, Ref, self()}, @@ -583,41 +427,72 @@ flush_data(Db, {follows, Parser, Ref}, Att) -> end end, try - flush_data(Db, Fun, store(data, Fun, Att)) + read_data(Fun, store(data, Fun, Att)) after erlang:demonitor(ParserRef, [flush]) end; -flush_data(Db, {stream, StreamEngine}, Att) -> - case couch_db:is_active_stream(Db, StreamEngine) of - true -> - % Already written - Att; - false -> - NewAtt = couch_db:with_stream(Db, Att, fun(OutputStream) -> - couch_stream:copy(StreamEngine, OutputStream) - end), - InMd5 = fetch(md5, Att), - OutMd5 = fetch(md5, NewAtt), - couch_util:check_md5(OutMd5, InMd5), - NewAtt + +read_data(Data, Att) when is_binary(Data) -> + Att; + +read_data(Fun, Att) when is_function(Fun) -> + [AttName, AttLen, InMd5] = fetch([name, att_len, md5], Att), + MaxAttSize = max_attachment_size(), + case AttLen of + undefined -> + % Fun(MaxChunkSize, WriterFun) must call WriterFun + % once for each chunk of the attachment, + WriterFun = fun + ({0, Footers}, {Len, Acc}) -> + F = mochiweb_headers:from_binary(Footers), + Md5 = case mochiweb_headers:get_value("Content-MD5", F) of + undefined -> undefined; + Value -> base64:decode(Value) + end, + Props0 = [ + {data, iolist_to_binary(lists:reverse(Acc))}, + {disk_len, Len} + ], + Props1 = if InMd5 /= md5_in_footer -> Props0; true -> + [{md5, Md5} | Props0] + end, + store(Props1, Att); + ({ChunkLen, Chunk}, {Len, Acc}) -> + NewLen = Len + ChunkLen, + validate_attachment_size(AttName, NewLen, MaxAttSize), + {NewLen, [Chunk | Acc]} + end, + Fun(8192, WriterFun, {0, []}); + AttLen -> + validate_attachment_size(AttName, AttLen, MaxAttSize), + read_streamed_attachment(Att, Fun, AttLen, []) end. -write_streamed_attachment(_Stream, _F, 0) -> - ok; -write_streamed_attachment(_Stream, _F, LenLeft) when LenLeft < 0 -> +read_streamed_attachment(Att, _F, 0, Acc) -> + Bin = iolist_to_binary(lists:reverse(Acc)), + store([ + {data, Bin}, + {disk_len, size(Bin)} + ], Att); + +read_streamed_attachment(_Att, _F, LenLeft, _Acc) when LenLeft < 0 -> throw({bad_request, <<"attachment longer than expected">>}); -write_streamed_attachment(Stream, F, LenLeft) when LenLeft > 0 -> - Bin = try read_next_chunk(F, LenLeft) + +read_streamed_attachment(Att, F, LenLeft, Acc) when LenLeft > 0 -> + Bin = try + read_next_chunk(F, LenLeft) catch {mp_parser_died, normal} -> throw({bad_request, <<"attachment shorter than expected">>}) end, - ok = couch_stream:write(Stream, Bin), - write_streamed_attachment(Stream, F, LenLeft - iolist_size(Bin)). + Size = iolist_size(Bin), + read_streamed_attachment(Att, F, LenLeft - Size, [Bin | Acc]). + read_next_chunk(F, _) when is_function(F, 0) -> F(); + read_next_chunk(F, LenLeft) when is_function(F, 1) -> F(lists:min([LenLeft, 16#2000])). @@ -626,14 +501,17 @@ foldl(Att, Fun, Acc) -> foldl(fetch(data, Att), Att, Fun, Acc). +foldl({loc, Db, DocId, AttId}, _Att, Fun, Acc) -> + Bin = fabric2_db:read_attachment(Db#{tx := undefined}, DocId, AttId), + Fun(Bin, Acc); + foldl(Bin, _Att, Fun, Acc) when is_binary(Bin) -> Fun(Bin, Acc); -foldl({stream, StreamEngine}, Att, Fun, Acc) -> - Md5 = fetch(md5, Att), - couch_stream:foldl(StreamEngine, Md5, Fun, Acc); + foldl(DataFun, Att, Fun, Acc) when is_function(DataFun) -> Len = fetch(att_len, Att), fold_streamed_data(DataFun, Len, Fun, Acc); + foldl({follows, Parser, Ref}, Att, Fun, Acc) -> ParserRef = erlang:monitor(process, Parser), DataFun = fun() -> @@ -654,19 +532,26 @@ foldl({follows, Parser, Ref}, Att, Fun, Acc) -> end. +range_foldl(Bin1, From, To, Fun, Acc) when is_binary(Bin1) -> + ReadLen = To - From, + Bin2 = case Bin1 of + _ when size(Bin1) < From -> <<>>; + <<_:From/binary, B2>> -> B2 + end, + Bin3 = case Bin2 of + _ when size(Bin2) < ReadLen -> Bin2; + <> -> B3 + end, + Fun(Bin3, Acc); + range_foldl(Att, From, To, Fun, Acc) -> - {stream, StreamEngine} = fetch(data, Att), - couch_stream:range_foldl(StreamEngine, From, To, Fun, Acc). + {loc, Db, DocId, AttId} = fetch(data, Att), + Bin = fabric2_db:read_attachment(Db, DocId, AttId), + range_foldl(Bin, From, To, Fun, Acc). -foldl_decode(Att, Fun, Acc) -> - case fetch([data, encoding], Att) of - [{stream, StreamEngine}, Enc] -> - couch_stream:foldl_decode( - StreamEngine, fetch(md5, Att), Enc, Fun, Acc); - [Fun2, identity] -> - fold_streamed_data(Fun2, fetch(att_len, Att), Fun, Acc) - end. +foldl_decode(_Att, _Fun, _Acc) -> + erlang:error(not_supported). to_binary(Att) -> @@ -677,10 +562,8 @@ to_binary(Bin, _Att) when is_binary(Bin) -> Bin; to_binary(Iolist, _Att) when is_list(Iolist) -> iolist_to_binary(Iolist); -to_binary({stream, _StreamEngine}, Att) -> - iolist_to_binary( - lists:reverse(foldl(Att, fun(Bin,Acc) -> [Bin|Acc] end, [])) - ); +to_binary({loc, Db, DocId, AttId}, _Att) -> + fabric2_db:read_attachmet(Db, DocId, AttId); to_binary(DataFun, Att) when is_function(DataFun)-> Len = fetch(att_len, Att), iolist_to_binary( @@ -695,46 +578,22 @@ to_binary(DataFun, Att) when is_function(DataFun)-> fold_streamed_data(_RcvFun, 0, _Fun, Acc) -> Acc; + fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> Bin = RcvFun(), ResultAcc = Fun(Bin, Acc), fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). -%% Upgrade an attachment record to a property list on demand. This is a one-way -%% operation as downgrading potentially truncates fields with important data. --spec upgrade(#att{}) -> attachment(). -upgrade(#att{} = Att) -> - Map = lists:zip( - record_info(fields, att), - lists:seq(2, record_info(size, att)) - ), - %% Don't store undefined elements since that is default - [{F, element(I, Att)} || {F, I} <- Map, element(I, Att) /= undefined]; -upgrade(Att) -> - Att. - - -%% Downgrade is exposed for interactive convenience. In practice, unless done -%% manually, upgrades are always one-way. -downgrade(#att{} = Att) -> - Att; -downgrade(Att) -> - #att{ - name = fetch(name, Att), - type = fetch(type, Att), - att_len = fetch(att_len, Att), - disk_len = fetch(disk_len, Att), - md5 = fetch(md5, Att), - revpos = fetch(revpos, Att), - data = fetch(data, Att), - encoding = fetch(encoding, Att) - }. - - -upgrade_encoding(true) -> gzip; -upgrade_encoding(false) -> identity; -upgrade_encoding(Encoding) -> Encoding. +get_identity_md5(Bin, gzip) -> + Z = zlib:open(), + ok = zlib:inflateInit(Z, 16 + 15), + Inflated = zlib:inflate(Z, Bin), + ok = zlib:inflateEnd(Z), + ok = zlib:close(Z), + couch_hash:md5_hash(Inflated); +get_identity_md5(Bin, _) -> + couch_hash:md5_hash(Bin). max_attachment_size() -> @@ -753,18 +612,22 @@ validate_attachment_size(_AttName, _AttSize, _MAxAttSize) -> ok. -open_stream(StreamSrc, Data) -> - case couch_db:is_db(StreamSrc) of - true -> - couch_db:open_read_stream(StreamSrc, Data); - false -> - case is_function(StreamSrc, 1) of - true -> - StreamSrc(Data); - false -> - erlang:error({invalid_stream_source, StreamSrc}) - end - end. +%% is_compressible(Type) when is_binary(Type) -> +%% is_compressible(binary_to_list(Type)); +%% is_compressible(Type) -> +%% TypeExpList = re:split( +%% config:get("attachments", "compressible_types", ""), +%% "\\s*,\\s*", +%% [{return, list}] +%% ), +%% lists:any( +%% fun(TypeExp) -> +%% Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"), +%% "(?:\\s*;.*?)?\\s*", $$], +%% re:run(Type, Regexp, [caseless]) =/= nomatch +%% end, +%% [T || T <- TypeExpList, T /= []] +%% ). -ifdef(TEST). diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 33ad14f0b..073006c77 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -379,6 +379,17 @@ rev_info({#doc{} = Doc, {Pos, [RevId | _]}}) -> body_sp = undefined, seq = undefined, rev = {Pos, RevId} + }; +rev_info({#{} = RevInfo, {Pos, [RevId | _]}}) -> + #{ + deleted := Deleted, + sequence := Sequence + } = RevInfo, + #rev_info{ + deleted = Deleted, + body_sp = undefined, + seq = Sequence, + rev = {Pos, RevId} }. is_deleted(#full_doc_info{rev_tree=Tree}) -> diff --git a/src/fabric/src/fabric.app.src b/src/fabric/src/fabric.app.src index d7686ca1a..20fbb1e2a 100644 --- a/src/fabric/src/fabric.app.src +++ b/src/fabric/src/fabric.app.src @@ -13,7 +13,10 @@ {application, fabric, [ {description, "Routing and proxying layer for CouchDB cluster"}, {vsn, git}, - {registered, []}, + {mod, {fabric2_app, []}}, + {registered, [ + fabric_server + ]}, {applications, [ kernel, stdlib, @@ -22,6 +25,7 @@ rexi, mem3, couch_log, - couch_stats + couch_stats, + erlfdb ]} ]}. diff --git a/src/fabric/src/fabric2.hrl b/src/fabric/src/fabric2.hrl new file mode 100644 index 000000000..de1d3d177 --- /dev/null +++ b/src/fabric/src/fabric2.hrl @@ -0,0 +1,66 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-define(uint2bin(I), binary:encode_unsigned(I, little)). +-define(bin2uint(I), binary:decode_unsigned(I, little)). + +% This will eventually be the `\xFFmetadataVersion` key that is +% currently only available in FoundationDB master. +% +% https://forums.foundationdb.org/t/a-new-tool-for-managing-layer-metadata/1191 +% +% Until then we'll fake the same behavior using a randomish +% key for tracking metadata changse. Once we get to the +% new feature this will be more performant by updating +% this define. +-define(METADATA_VERSION_KEY, <<"$metadata_version_key$">>). + + +% Prefix Definitions + +% Layer Level: (LayerPrefix, X, ...) + +-define(CLUSTER_CONFIG, 0). +-define(ALL_DBS, 1). +-define(DBS, 15). +-define(TX_IDS, 255). + +% Database Level: (LayerPrefix, ?DBS, DbPrefix, X, ...) + +-define(DB_VERSION, 0). +-define(DB_CONFIG, 16). +-define(DB_STATS, 17). +-define(DB_ALL_DOCS, 18). +-define(DB_CHANGES, 19). +-define(DB_REVS, 20). +-define(DB_DOCS, 21). +-define(DB_LOCAL_DOCS, 22). +-define(DB_ATTS, 23). + + +% Versions + +-define(CURR_REV_FORMAT, 0). + + +% Misc constants + +-define(PDICT_DB_KEY, '$fabric_db_handle'). +-define(PDICT_LAYER_CACHE, '$fabric_layer_id'). +-define(PDICT_CHECKED_DB_IS_CURRENT, '$fabric_checked_db_is_current'). +-define(PDICT_TX_ID_KEY, '$fabric_tx_id'). +-define(PDICT_TX_RES_KEY, '$fabric_tx_result'). +-define(COMMIT_UNKNOWN_RESULT, 1021). + + +-define(ATTACHMENT_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2_app.erl b/src/fabric/src/fabric2_app.erl new file mode 100644 index 000000000..da95acb53 --- /dev/null +++ b/src/fabric/src/fabric2_app.erl @@ -0,0 +1,32 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_app). +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_Type, StartArgs) -> + fabric2_sup:start_link(StartArgs). + + +stop(_State) -> + case application:get_env(erlfdb, test_cluster_pid) of + {ok, Pid} -> Pid ! close; + _ -> ok + end, + ok. diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl new file mode 100644 index 000000000..02a18fa23 --- /dev/null +++ b/src/fabric/src/fabric2_db.erl @@ -0,0 +1,1299 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db). + + +-export([ + create/2, + open/2, + delete/2, + + list_dbs/0, + list_dbs/1, + + is_admin/1, + check_is_admin/1, + check_is_member/1, + + name/1, + get_after_doc_read_fun/1, + get_before_doc_update_fun/1, + get_committed_update_seq/1, + get_compacted_seq/1, + get_compactor_pid/1, + get_db_info/1, + %% get_partition_info/2, + get_del_doc_count/1, + get_doc_count/1, + get_doc_count/2, + %% get_epochs/1, + %% get_filepath/1, + get_instance_start_time/1, + get_pid/1, + get_revs_limit/1, + get_security/1, + get_update_seq/1, + get_user_ctx/1, + get_uuid/1, + %% get_purge_seq/1, + %% get_oldest_purge_seq/1, + %% get_purge_infos_limit/1, + + is_clustered/1, + is_db/1, + is_partitioned/1, + is_system_db/1, + is_system_db_name/1, + + set_revs_limit/2, + %% set_purge_infos_limit/2, + set_security/2, + set_user_ctx/2, + + ensure_full_commit/1, + ensure_full_commit/2, + + %% load_validation_funs/1, + %% reload_validation_funs/1, + + open_doc/2, + open_doc/3, + open_doc_revs/4, + %% open_doc_int/3, + get_doc_info/2, + get_full_doc_info/2, + get_full_doc_infos/2, + get_missing_revs/2, + %% get_design_doc/2, + %% get_design_docs/1, + %% get_design_doc_count/1, + %% get_purge_infos/2, + + %% get_minimum_purge_seq/1, + %% purge_client_exists/3, + + %% validate_docid/2, + %% doc_from_json_obj_validate/2, + + update_doc/2, + update_doc/3, + update_docs/2, + update_docs/3, + %% delete_doc/3, + + %% purge_docs/2, + %% purge_docs/3, + + read_attachment/3, + write_attachment/3, + + fold_docs/3, + fold_docs/4, + %% fold_local_docs/4, + %% fold_design_docs/4, + fold_changes/4, + fold_changes/5, + %% count_changes_since/2, + %% fold_purge_infos/4, + %% fold_purge_infos/5, + + %% calculate_start_seq/3, + %% owner_of/2, + + %% start_compact/1, + %% cancel_compact/1, + %% wait_for_compaction/1, + %% wait_for_compaction/2, + + %% dbname_suffix/1, + %% normalize_dbname/1, + %% validate_dbname/1, + + %% make_doc/5, + new_revid/1 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2.hrl"). + + +-define(DBNAME_REGEX, + "^[a-z][a-z0-9\\_\\$()\\+\\-\\/]*" % use the stock CouchDB regex + "(\\.[0-9]{10,})?$" % but allow an optional shard timestamp at the end +). + + +-define(RETURN(Term), throw({?MODULE, Term})). + + +create(DbName, Options) -> + Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> + case fabric2_fdb:exists(TxDb) of + true -> + {error, file_exists}; + false -> + fabric2_fdb:create(TxDb, Options) + end + end), + % We cache outside of the transaction so that we're sure + % that the transaction was committed. + case Result of + #{} = Db -> + ok = fabric2_server:store(Db), + {ok, Db#{tx := undefined}}; + Error -> + Error + end. + + +open(DbName, Options) -> + case fabric2_server:fetch(DbName) of + #{} = Db -> + {ok, maybe_set_user_ctx(Db, Options)}; + undefined -> + Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> + fabric2_fdb:open(TxDb, Options) + end), + % Cache outside the transaction retry loop + case Result of + #{} = Db -> + ok = fabric2_server:store(Db), + {ok, Db#{tx := undefined}}; + Error -> + Error + end + end. + + +delete(DbName, Options) -> + % This will throw if the db does not exist + {ok, Db} = open(DbName, Options), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:delete(TxDb) + end), + if Resp /= ok -> Resp; true -> + fabric2_server:remove(DbName) + end. + + +list_dbs() -> + list_dbs([]). + + +list_dbs(Options) -> + fabric2_fdb:transactional(fun(Tx) -> + fabric2_fdb:list_dbs(Tx, Options) + end). + + +is_admin(Db) -> + % TODO: Need to re-consider couch_db_plugin:check_is_admin/1 + {SecProps} = get_security(Db), + UserCtx = get_user_ctx(Db), + {Admins} = get_admins(SecProps), + is_authorized(Admins, UserCtx). + + +check_is_admin(Db) -> + case is_admin(Db) of + true -> + ok; + false -> + UserCtx = get_user_ctx(Db), + Reason = <<"You are not a db or server admin.">>, + throw_security_error(UserCtx, Reason) + end. + + +check_is_member(Db) -> + case is_member(Db) of + true -> + ok; + false -> + UserCtx = get_user_ctx(Db), + throw_security_error(UserCtx) + end. + + +name(#{name := DbName}) -> + DbName. + + +get_after_doc_read_fun(#{after_doc_read := AfterDocRead}) -> + AfterDocRead. + + +get_before_doc_update_fun(#{before_doc_update := BeforeDocUpdate}) -> + BeforeDocUpdate. + +get_committed_update_seq(#{} = Db) -> + get_update_seq(Db). + + +get_compacted_seq(#{} = Db) -> + get_update_seq(Db). + + +get_compactor_pid(#{} = _Db) -> + nil. + + +get_db_info(#{} = Db) -> + DbProps = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_info(TxDb) + end), + + BaseProps = [ + {cluster, {[{n, 0}, {q, 0}, {r, 0}, {w, 0}]}}, + {compact_running, false}, + {data_size, 0}, + {db_name, name(Db)}, + {disk_format_version, 0}, + {disk_size, 0}, + {instance_start_time, <<"0">>}, + {purge_seq, 0} + ], + + {ok, lists:foldl(fun({Key, Val}, Acc) -> + lists:keystore(Key, 1, Acc, {Key, Val}) + end, BaseProps, DbProps)}. + + +get_del_doc_count(#{} = Db) -> + get_doc_count(Db, <<"doc_del_count">>). + + +get_doc_count(Db) -> + get_doc_count(Db, <<"doc_count">>). + + +get_doc_count(Db, <<"_all_docs">>) -> + get_doc_count(Db, <<"doc_count">>); + +get_doc_count(DbName, <<"_design">>) -> + get_doc_count(DbName, <<"doc_design_count">>); + +get_doc_count(DbName, <<"_local">>) -> + get_doc_count(DbName, <<"doc_local_count">>); + +get_doc_count(Db, Key) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_stat(TxDb, Key) + end). + + +get_instance_start_time(#{}) -> + 0. + + +get_pid(#{}) -> + nil. + + +get_revs_limit(#{revs_limit := RevsLimit}) -> + RevsLimit. + + +get_security(#{security_doc := SecurityDoc}) -> + SecurityDoc. + + +get_update_seq(#{} = Db) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_last_change(TxDb) + end). + + +get_user_ctx(#{user_ctx := UserCtx}) -> + UserCtx. + + +get_uuid(#{uuid := UUID}) -> + UUID. + + +is_clustered(#{}) -> + false. + + +is_db(#{name := _}) -> + true; +is_db(_) -> + false. + + +is_partitioned(#{}) -> + false. + + +is_system_db(#{name := DbName}) -> + is_system_db_name(DbName). + + +is_system_db_name(DbName) when is_list(DbName) -> + is_system_db_name(?l2b(DbName)); +is_system_db_name(DbName) when is_binary(DbName) -> + Suffix = filename:basename(DbName), + case {filename:dirname(DbName), lists:member(Suffix, ?SYSTEM_DATABASES)} of + {<<".">>, Result} -> Result; + {_Prefix, false} -> false; + {Prefix, true} -> + ReOpts = [{capture,none}, dollar_endonly], + re:run(Prefix, ?DBNAME_REGEX, ReOpts) == match + end. + + +set_revs_limit(#{} = Db, RevsLimit) -> + check_is_admin(Db), + RevsLimBin = ?uint2bin(RevsLimit), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin) + end), + if Resp /= ok -> Resp; true -> + fabric2_server:store(Db#{revs_limit := RevsLimit}) + end. + + +set_security(#{} = Db, Security) -> + check_is_admin(Db), + ok = fabric2_util:validate_security_object(Security), + SecBin = ?JSON_ENCODE(Security), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, <<"security_doc">>, SecBin) + end), + if Resp /= ok -> Resp; true -> + fabric2_server:store(Db#{security_doc := Security}) + end. + + +set_user_ctx(#{} = Db, UserCtx) -> + Db#{user_ctx := UserCtx}. + + +ensure_full_commit(#{}) -> + {ok, 0}. + + +ensure_full_commit(#{}, _Timeout) -> + {ok, 0}. + + +open_doc(#{} = Db, DocId) -> + open_doc(Db, DocId, []). + + +open_doc(#{} = Db, <> = DocId, _Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + case fabric2_fdb:get_local_doc(TxDb, DocId) of + #doc{} = Doc -> {ok, Doc}; + Else -> Else + end + end); + +open_doc(#{} = Db, DocId, Options) -> + NeedsTreeOpts = [revs_info, conflicts, deleted_conflicts], + NeedsTree = (Options -- NeedsTreeOpts /= Options), + fabric2_fdb:transactional(Db, fun(TxDb) -> + Revs = case NeedsTree of + true -> fabric2_fdb:get_all_revs(TxDb, DocId); + false -> fabric2_fdb:get_winning_revs(TxDb, DocId, 1) + end, + if Revs == [] -> {not_found, missing}; true -> + #{winner := true} = RI = lists:last(Revs), + case fabric2_fdb:get_doc_body(TxDb, DocId, RI) of + #doc{} = Doc -> + apply_open_doc_opts(Doc, Revs, Options); + Else -> + Else + end + end + end). + + +open_doc_revs(Db, DocId, Revs, Options) -> + Latest = lists:member(latest, Options), + fabric2_fdb:transactional(Db, fun(TxDb) -> + AllRevInfos = fabric2_fdb:get_all_revs(TxDb, DocId), + RevTree = lists:foldl(fun(RI, TreeAcc) -> + RIPath = fabric2_util:revinfo_to_path(RI), + {Merged, _} = couch_key_tree:merge(TreeAcc, RIPath), + Merged + end, [], AllRevInfos), + {Found, Missing} = case Revs of + all -> + {couch_key_tree:get_all_leafs(RevTree), []}; + _ when Latest -> + couch_key_tree:get_key_leafs(RevTree, Revs); + _ -> + couch_key_tree:get(RevTree, Revs) + end, + Docs = lists:map(fun({Value, {Pos, [Rev | RevPath]}}) -> + case Value of + ?REV_MISSING -> + % We have the rev in our list but know nothing about it + {{not_found, missing}, {Pos, Rev}}; + _ -> + RevInfo = #{ + rev_id => {Pos, Rev}, + rev_path => RevPath + }, + case fabric2_fdb:get_doc_body(TxDb, DocId, RevInfo) of + #doc{} = Doc -> {ok, Doc}; + Else -> {Else, {Pos, Rev}} + end + end + end, Found), + MissingDocs = [{{not_found, missing}, MRev} || MRev <- Missing], + {ok, Docs ++ MissingDocs} + end). + + +get_doc_info(Db, DocId) -> + case get_full_doc_info(Db, DocId) of + not_found -> not_found; + FDI -> couch_doc:to_doc_info(FDI) + end. + + +get_full_doc_info(Db, DocId) -> + RevInfos = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_all_revs(TxDb, DocId) + end), + if RevInfos == [] -> not_found; true -> + #{winner := true} = Winner = lists:last(RevInfos), + RevTree = lists:foldl(fun(RI, TreeAcc) -> + RIPath = fabric2_util:revinfo_to_path(RI), + {Merged, _} = couch_key_tree:merge(TreeAcc, RIPath), + Merged + end, [], RevInfos), + #full_doc_info{ + id = DocId, + update_seq = fabric2_fdb:vs_to_seq(maps:get(sequence, Winner)), + deleted = maps:get(deleted, Winner), + rev_tree = RevTree + } + end. + + +get_full_doc_infos(Db, DocIds) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + lists:map(fun(DocId) -> + get_full_doc_info(TxDb, DocId) + end, DocIds) + end). + + +get_missing_revs(Db, JsonIdRevs) -> + IdRevs = [idrevs(IdR) || IdR <- JsonIdRevs], + AllRevInfos = fabric2_fdb:transactional(Db, fun(TxDb) -> + lists:foldl(fun({Id, _Revs}, Acc) -> + case maps:is_key(Id, Acc) of + true -> + Acc; + false -> + RevInfos = fabric2_fdb:get_all_revs(TxDb, Id), + Acc#{Id => RevInfos} + end + end, #{}, IdRevs) + end), + AllMissing = lists:flatmap(fun({Id, Revs}) -> + #{Id := RevInfos} = AllRevInfos, + Missing = try + lists:foldl(fun(RevInfo, RevAcc) -> + if RevAcc /= [] -> ok; true -> + throw(all_found) + end, + filter_found_revs(RevInfo, RevAcc) + end, Revs, RevInfos) + catch throw:all_found -> + [] + end, + if Missing == [] -> []; true -> + PossibleAncestors = find_possible_ancestors(RevInfos, Missing), + [{Id, Missing, PossibleAncestors}] + end + end, IdRevs), + {ok, AllMissing}. + + +update_doc(Db, Doc) -> + update_doc(Db, Doc, []). + + +update_doc(Db, Doc, Options) -> + case update_docs(Db, [Doc], Options) of + {ok, [{ok, NewRev}]} -> + {ok, NewRev}; + {ok, [{{_Id, _Rev}, Error}]} -> + throw(Error); + {error, [{{_Id, _Rev}, Error}]} -> + throw(Error); + {error, [Error]} -> + throw(Error); + {ok, []} -> + % replication success + {Pos, [RevId | _]} = Doc#doc.revs, + {ok, {Pos, RevId}} + end. + + +update_docs(Db, Docs) -> + update_docs(Db, Docs, []). + + +update_docs(Db, Docs, Options) -> + Resps0 = case lists:member(replicated_changes, Options) of + false -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + update_docs_interactive(TxDb, Docs, Options) + end); + true -> + lists:map(fun(Doc) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + update_doc_int(TxDb, Doc, Options) + end) + end, Docs) + end, + % Convert errors + Resps1 = lists:map(fun(Resp) -> + case Resp of + {#doc{} = Doc, Error} -> + #doc{ + id = DocId, + revs = Revs + } = Doc, + RevId = case Revs of + {RevPos, [Rev | _]} -> {RevPos, Rev}; + {0, []} -> {0, <<>>} + end, + {{DocId, RevId}, Error}; + Else -> + Else + end + end, Resps0), + case lists:member(replicated_changes, Options) of + true -> + {ok, [R || R <- Resps1, R /= {ok, []}]}; + false -> + Status = lists:foldl(fun(Resp, Acc) -> + case Resp of + {ok, _} -> Acc; + _ -> error + end + end, ok, Resps1), + {Status, Resps1} + end. + + +read_attachment(Db, DocId, AttId) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:read_attachment(TxDb, DocId, AttId) + end). + + +write_attachment(Db, DocId, Att) -> + Data = couch_att:fetch(data, Att), + {ok, AttId} = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:write_attachment(TxDb, DocId, Data) + end), + couch_att:store(data, {loc, Db, DocId, AttId}, Att). + + +fold_docs(Db, UserFun, UserAcc) -> + fold_docs(Db, UserFun, UserAcc, []). + + +fold_docs(Db, UserFun, UserAcc, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:fold_docs(TxDb, UserFun, UserAcc, Options) + end). + + +fold_changes(Db, SinceSeq, UserFun, UserAcc) -> + fold_changes(Db, SinceSeq, UserFun, UserAcc, []). + + +fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:fold_changes(TxDb, SinceSeq, UserFun, UserAcc, Options) + end). + + +new_revid(Doc) -> + #doc{ + body = Body, + revs = {OldStart, OldRevs}, + atts = Atts, + deleted = Deleted + } = Doc, + + DigestedAtts = lists:foldl(fun(Att, Acc) -> + [N, T, M] = couch_att:fetch([name, type, md5], Att), + case M == <<>> of + true -> Acc; + false -> [{N, T, M} | Acc] + end + end, [], Atts), + + Rev = case DigestedAtts of + Atts2 when length(Atts) =/= length(Atts2) -> + % We must have old style non-md5 attachments + list_to_binary(integer_to_list(couch_util:rand32())); + Atts2 -> + OldRev = case OldRevs of [] -> 0; [OldRev0 | _] -> OldRev0 end, + SigTerm = [Deleted, OldStart, OldRev, Body, Atts2], + couch_hash:md5_hash(term_to_binary(SigTerm, [{minor_version, 1}])) + end, + + Doc#doc{revs = {OldStart + 1, [Rev | OldRevs]}}. + + +maybe_set_user_ctx(Db, Options) -> + case fabric2_util:get_value(user_ctx, Options) of + #user_ctx{} = UserCtx -> + set_user_ctx(Db, UserCtx); + undefined -> + Db + end. + + +is_member(Db) -> + {SecProps} = get_security(Db), + case is_admin(Db) of + true -> + true; + false -> + case is_public_db(SecProps) of + true -> + true; + false -> + {Members} = get_members(SecProps), + UserCtx = get_user_ctx(Db), + is_authorized(Members, UserCtx) + end + end. + + +is_authorized(Group, UserCtx) -> + #user_ctx{ + name = UserName, + roles = UserRoles + } = UserCtx, + Names = fabric2_util:get_value(<<"names">>, Group, []), + Roles = fabric2_util:get_value(<<"roles">>, Group, []), + case check_security(roles, UserRoles, [<<"_admin">> | Roles]) of + true -> + true; + false -> + check_security(names, UserName, Names) + end. + + +check_security(roles, [], _) -> + false; +check_security(roles, UserRoles, Roles) -> + UserRolesSet = ordsets:from_list(UserRoles), + RolesSet = ordsets:from_list(Roles), + not ordsets:is_disjoint(UserRolesSet, RolesSet); +check_security(names, _, []) -> + false; +check_security(names, null, _) -> + false; +check_security(names, UserName, Names) -> + lists:member(UserName, Names). + + +throw_security_error(#user_ctx{name = null} = UserCtx) -> + Reason = <<"You are not authorized to access this db.">>, + throw_security_error(UserCtx, Reason); +throw_security_error(#user_ctx{name = _} = UserCtx) -> + Reason = <<"You are not allowed to access this db.">>, + throw_security_error(UserCtx, Reason). + + +throw_security_error(#user_ctx{} = UserCtx, Reason) -> + Error = security_error_type(UserCtx), + throw({Error, Reason}). + + +security_error_type(#user_ctx{name = null}) -> + unauthorized; +security_error_type(#user_ctx{name = _}) -> + forbidden. + + +is_public_db(SecProps) -> + {Members} = get_members(SecProps), + Names = fabric2_util:get_value(<<"names">>, Members, []), + Roles = fabric2_util:get_value(<<"roles">>, Members, []), + Names =:= [] andalso Roles =:= []. + + +get_admins(SecProps) -> + fabric2_util:get_value(<<"admins">>, SecProps, {[]}). + + +get_members(SecProps) -> + % we fallback to readers here for backwards compatibility + case fabric2_util:get_value(<<"members">>, SecProps) of + undefined -> + fabric2_util:get_value(<<"readers">>, SecProps, {[]}); + Members -> + Members + end. + + +apply_open_doc_opts(Doc, Revs, Options) -> + IncludeRevsInfo = lists:member(revs_info, Options), + IncludeConflicts = lists:member(conflicts, Options), + IncludeDelConflicts = lists:member(deleted_conflicts, Options), + IncludeLocalSeq = lists:member(local_seq, Options), + ReturnDeleted = lists:member(deleted, Options), + + % This revs_info becomes fairly useless now that we're + % not keeping old document bodies around... + Meta1 = if not IncludeRevsInfo -> []; true -> + {Pos, [Rev | RevPath]} = Doc#doc.revs, + RevPathMissing = lists:map(fun(R) -> {R, missing} end, RevPath), + [{revs_info, Pos, [{Rev, available} | RevPathMissing]}] + end, + + Meta2 = if not IncludeConflicts -> []; true -> + Conflicts = [RI || RI = #{winner := false, deleted := false} <- Revs], + if Conflicts == [] -> []; true -> + ConflictRevs = [maps:get(rev_id, RI) || RI <- Conflicts], + [{conflicts, ConflictRevs}] + end + end, + + Meta3 = if not IncludeDelConflicts -> []; true -> + DelConflicts = [RI || RI = #{winner := false, deleted := true} <- Revs], + if DelConflicts == [] -> []; true -> + DelConflictRevs = [maps:get(rev_id, RI) || RI <- DelConflicts], + [{deleted_conflicts, DelConflictRevs}] + end + end, + + Meta4 = if not IncludeLocalSeq -> []; true -> + #{winner := true, sequence := SeqVS} = lists:last(Revs), + [{local_seq, fabric2_fdb:vs_to_seq(SeqVS)}] + end, + + case Doc#doc.deleted and not ReturnDeleted of + true -> + {not_found, deleted}; + false -> + {ok, Doc#doc{ + meta = Meta1 ++ Meta2 ++ Meta3 ++ Meta4 + }} + end. + + +filter_found_revs(RevInfo, Revs) -> + #{ + rev_id := {Pos, Rev}, + rev_path := RevPath + } = RevInfo, + FullRevPath = [Rev | RevPath], + lists:flatmap(fun({FindPos, FindRev} = RevIdToFind) -> + if FindPos > Pos -> [RevIdToFind]; true -> + % Add 1 because lists:nth is 1 based + Idx = Pos - FindPos + 1, + case Idx > length(FullRevPath) of + true -> + [RevIdToFind]; + false -> + case lists:nth(Idx, FullRevPath) == FindRev of + true -> []; + false -> [RevIdToFind] + end + end + end + end, Revs). + + +find_possible_ancestors(RevInfos, MissingRevs) -> + % Find any revinfos that are possible ancestors + % of the missing revs. A possible ancestor is + % any rev that has a start position less than + % any missing revision. Stated alternatively, + % find any revinfo that could theoretically + % extended to be one or more of the missing + % revisions. + % + % Since we are looking at any missing revision + % we can just compare against the maximum missing + % start position. + MaxMissingPos = case MissingRevs of + [] -> 0; + [_ | _] -> lists:max([Start || {Start, _Rev} <- MissingRevs]) + end, + lists:flatmap(fun(RevInfo) -> + #{rev_id := {RevPos, _} = RevId} = RevInfo, + case RevPos < MaxMissingPos of + true -> [RevId]; + false -> [] + end + end, RevInfos). + + +update_doc_int(#{} = Db, #doc{} = Doc, Options) -> + IsLocal = case Doc#doc.id of + <> -> true; + _ -> false + end, + IsReplicated = lists:member(replicated_changes, Options), + try + case {IsLocal, IsReplicated} of + {false, false} -> update_doc_interactive(Db, Doc, Options); + {false, true} -> update_doc_replicated(Db, Doc, Options); + {true, _} -> update_local_doc(Db, Doc, Options) + end + catch throw:{?MODULE, Return} -> + Return + end. + + +update_docs_interactive(Db, Docs0, Options) -> + Docs = tag_docs(Docs0), + Futures = get_winning_rev_futures(Db, Docs), + {Result, _} = lists:mapfoldl(fun(Doc, SeenIds) -> + try + update_docs_interactive(Db, Doc, Options, Futures, SeenIds) + catch throw:{?MODULE, Return} -> + {Return, SeenIds} + end + end, [], Docs), + Result. + + +update_docs_interactive(Db, #doc{id = <>} = Doc, + Options, _Futures, SeenIds) -> + {update_local_doc(Db, Doc, Options), SeenIds}; + +update_docs_interactive(Db, Doc, Options, Futures, SeenIds) -> + case lists:member(Doc#doc.id, SeenIds) of + true -> + {{error, conflict}, SeenIds}; + false -> + Future = maps:get(doc_tag(Doc), Futures), + case update_doc_interactive(Db, Doc, Future, Options) of + {ok, _} = Resp -> + {Resp, [Doc#doc.id | SeenIds]}; + _ = Resp -> + {Resp, SeenIds} + end + end. + + +update_doc_interactive(Db, Doc0, Options) -> + % Get the current winning revision. This is needed + % regardless of which branch we're updating. The extra + % revision we're grabbing is an optimization to + % save us a round trip if we end up deleting + % the winning revision branch. + NumRevs = if Doc0#doc.deleted -> 2; true -> 1 end, + Future = fabric2_fdb:get_winning_revs_future(Db, Doc0#doc.id, NumRevs), + update_doc_interactive(Db, Doc0, Future, Options). + + +update_doc_interactive(Db, Doc0, Future, _Options) -> + RevInfos = fabric2_fdb:get_winning_revs_wait(Db, Future), + {Winner, SecondPlace} = case RevInfos of + [] -> {not_found, not_found}; + [WRI] -> {WRI, not_found}; + [WRI, SPRI] -> {WRI, SPRI} + end, + WinnerRevId = case Winner of + not_found -> + {0, <<>>}; + _ -> + case maps:get(deleted, Winner) of + true -> {0, <<>>}; + false -> maps:get(rev_id, Winner) + end + end, + + % Check that a revision was specified if required + Doc0RevId = doc_to_revid(Doc0), + if Doc0RevId /= {0, <<>>} orelse WinnerRevId == {0, <<>>} -> ok; true -> + ?RETURN({error, conflict}) + end, + + % Check that we're not trying to create a deleted doc + if Doc0RevId /= {0, <<>>} orelse not Doc0#doc.deleted -> ok; true -> + ?RETURN({error, conflict}) + end, + + % Get the target revision to update + Target = case Doc0RevId == WinnerRevId of + true -> + Winner; + false -> + case fabric2_fdb:get_non_deleted_rev(Db, Doc0#doc.id, Doc0RevId) of + #{deleted := false} = Target0 -> + Target0; + not_found -> + % Either a missing revision or a deleted + % revision. Either way a conflict. Note + % that we get not_found for a deleted revision + % because we only check for the non-deleted + % key in fdb + ?RETURN({error, conflict}) + end + end, + + % When recreating a deleted document we want to extend + % the winning revision branch rather than create a + % new branch. If we did not do this we could be + % recreating into a state that previously existed. + Doc1 = case Winner of + #{deleted := true} when not Doc0#doc.deleted -> + {WinnerRevPos, WinnerRev} = maps:get(rev_id, Winner), + WinnerRevPath = maps:get(rev_path, Winner), + Doc0#doc{revs = {WinnerRevPos, [WinnerRev | WinnerRevPath]}}; + _ -> + Doc0 + end, + + % Validate the doc update and create the + % new revinfo map + Doc2 = prep_and_validate(Db, Doc1, Target), + #doc{ + deleted = NewDeleted, + revs = {NewRevPos, [NewRev | NewRevPath]} + } = Doc3 = new_revid(Doc2), + + Doc4 = update_attachment_revpos(Doc3), + + NewRevInfo = #{ + winner => undefined, + deleted => NewDeleted, + rev_id => {NewRevPos, NewRev}, + rev_path => NewRevPath, + sequence => undefined, + branch_count => undefined + }, + + % Gather the list of possible winnig revisions + Possible = case Target == Winner of + true when not Doc4#doc.deleted -> + [NewRevInfo]; + true when Doc4#doc.deleted -> + case SecondPlace of + #{} -> [NewRevInfo, SecondPlace]; + not_found -> [NewRevInfo] + end; + false -> + [NewRevInfo, Winner] + end, + + % Sort the rev infos such that the winner is first + {NewWinner0, NonWinner} = case fabric2_util:sort_revinfos(Possible) of + [W] -> {W, not_found}; + [W, NW] -> {W, NW} + end, + + BranchCount = case Winner of + not_found -> 1; + #{branch_count := BC} -> BC + end, + NewWinner = NewWinner0#{branch_count := BranchCount}, + ToUpdate = if NonWinner == not_found -> []; true -> [NonWinner] end, + ToRemove = if Target == not_found -> []; true -> [Target] end, + + ok = fabric2_fdb:write_doc( + Db, + Doc4, + NewWinner, + Winner, + ToUpdate, + ToRemove + ), + + {ok, {NewRevPos, NewRev}}. + + +update_doc_replicated(Db, Doc0, _Options) -> + #doc{ + id = DocId, + deleted = Deleted, + revs = {RevPos, [Rev | RevPath]} + } = Doc0, + + DocRevInfo0 = #{ + winner => undefined, + deleted => Deleted, + rev_id => {RevPos, Rev}, + rev_path => RevPath, + sequence => undefined, + branch_count => undefined + }, + + AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId), + + RevTree = lists:foldl(fun(RI, TreeAcc) -> + RIPath = fabric2_util:revinfo_to_path(RI), + {Merged, _} = couch_key_tree:merge(TreeAcc, RIPath), + Merged + end, [], AllRevInfos), + + DocRevPath = fabric2_util:revinfo_to_path(DocRevInfo0), + {NewTree, Status} = couch_key_tree:merge(RevTree, DocRevPath), + if Status /= internal_node -> ok; true -> + % We already know this revision so nothing + % left to do. + ?RETURN({ok, []}) + end, + + % Its possible to have a replication with fewer than $revs_limit + % revisions which extends an existing branch. To avoid + % losing revision history we extract the new node from the + % tree and use the combined path after stemming. + {[{_, {RevPos, UnstemmedRevs}}], []} + = couch_key_tree:get(NewTree, [{RevPos, Rev}]), + RevsLimit = fabric2_db:get_revs_limit(Db), + Doc1 = Doc0#doc{ + revs = {RevPos, lists:sublist(UnstemmedRevs, RevsLimit)} + }, + {RevPos, [Rev | NewRevPath]} = Doc1#doc.revs, + DocRevInfo1 = DocRevInfo0#{rev_path := NewRevPath}, + + % Find any previous revision we knew about for + % validation and attachment handling. + AllLeafsFull = couch_key_tree:get_all_leafs_full(NewTree), + LeafPath = get_leaf_path(RevPos, Rev, AllLeafsFull), + PrevRevInfo = find_prev_revinfo(RevPos, LeafPath), + Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo), + + % Possible winners are the previous winner and + % the new DocRevInfo + Winner = case fabric2_util:sort_revinfos(AllRevInfos) of + [#{winner := true} = WRI | _] -> WRI; + [] -> not_found + end, + {NewWinner0, NonWinner} = case Winner == PrevRevInfo of + true -> + {DocRevInfo1, not_found}; + false -> + [W, NW] = fabric2_util:sort_revinfos([Winner, DocRevInfo1]), + {W, NW} + end, + + NewWinner = NewWinner0#{branch_count := length(AllLeafsFull)}, + ToUpdate = if NonWinner == not_found -> []; true -> [NonWinner] end, + ToRemove = if PrevRevInfo == not_found -> []; true -> [PrevRevInfo] end, + + ok = fabric2_fdb:write_doc( + Db, + Doc2, + NewWinner, + Winner, + ToUpdate, + ToRemove + ), + + {ok, []}. + + +update_local_doc(Db, Doc0, _Options) -> + Doc1 = case increment_local_doc_rev(Doc0) of + {ok, Updated} -> Updated; + {error, _} = Error -> ?RETURN(Error) + end, + + ok = fabric2_fdb:write_local_doc(Db, Doc1), + + #doc{revs = {0, [Rev]}} = Doc1, + {ok, {0, integer_to_binary(Rev)}}. + + +update_attachment_revpos(#doc{revs = {RevPos, _Revs}, atts = Atts0} = Doc) -> + Atts = lists:map(fun(Att) -> + case couch_att:fetch(data, Att) of + {loc, _Db, _DocId, _AttId} -> + % Attachment was already on disk + Att; + _ -> + % We will write this attachment with this update + % so mark it with the RevPos that will be written + couch_att:store(revpos, RevPos, Att) + end + end, Atts0), + Doc#doc{atts = Atts}. + + +get_winning_rev_futures(Db, Docs) -> + lists:foldl(fun(Doc, Acc) -> + #doc{ + id = DocId, + deleted = Deleted + } = Doc, + IsLocal = case DocId of + <> -> true; + _ -> false + end, + if IsLocal -> Acc; true -> + NumRevs = if Deleted -> 2; true -> 1 end, + Future = fabric2_fdb:get_winning_revs_future(Db, DocId, NumRevs), + DocTag = doc_tag(Doc), + Acc#{DocTag => Future} + end + end, #{}, Docs). + + +prep_and_validate(Db, NewDoc, PrevRevInfo) -> + HasStubs = couch_doc:has_stubs(NewDoc), + HasVDUs = [] /= maps:get(validate_doc_update_funs, Db), + IsDDoc = case NewDoc#doc.id of + <> -> true; + _ -> false + end, + + PrevDoc = case HasStubs orelse (HasVDUs and not IsDDoc) of + true when PrevRevInfo /= not_found -> + case fabric2_fdb:get_doc_body(Db, NewDoc#doc.id, PrevRevInfo) of + #doc{} = PDoc -> PDoc; + {not_found, _} -> nil + end; + _ -> + nil + end, + + MergedDoc = if not HasStubs -> NewDoc; true -> + % This will throw an error if we have any + % attachment stubs missing data + couch_doc:merge_stubs(NewDoc, PrevDoc) + end, + check_duplicate_attachments(MergedDoc), + validate_doc_update(Db, MergedDoc, PrevDoc), + MergedDoc. + + +validate_doc_update(Db, #doc{id = <<"_design/", _/binary>>} = Doc, _) -> + case catch check_is_admin(Db) of + ok -> validate_ddoc(Db, Doc); + Error -> ?RETURN({Doc, Error}) + end; +validate_doc_update(Db, Doc, PrevDoc) -> + #{ + security_doc := Security, + validate_doc_update_funs := VDUs + } = Db, + Fun = fun() -> + JsonCtx = fabric2_util:user_ctx_to_json(Db), + lists:map(fun(VDU) -> + try + case VDU(Doc, PrevDoc, JsonCtx, Security) of + ok -> ok; + Error1 -> throw(Error1) + end + catch throw:Error2 -> + ?RETURN({Doc, Error2}) + end + end, VDUs) + end, + Stat = [couchdb, query_server, vdu_process_time], + if VDUs == [] -> ok; true -> + couch_stats:update_histogram(Stat, Fun) + end. + + +validate_ddoc(Db, DDoc) -> + try + ok = couch_index_server:validate(Db, couch_doc:with_ejson_body(DDoc)) + catch + throw:{invalid_design_doc, Reason} -> + throw({bad_request, invalid_design_doc, Reason}); + throw:{compilation_error, Reason} -> + throw({bad_request, compilation_error, Reason}); + throw:Error -> + ?RETURN({DDoc, Error}) + end. + + +check_duplicate_attachments(#doc{atts = Atts}) -> + lists:foldl(fun(Att, Names) -> + Name = couch_att:fetch(name, Att), + case ordsets:is_element(Name, Names) of + true -> throw({bad_request, <<"Duplicate attachments">>}); + false -> ordsets:add_element(Name, Names) + end + end, ordsets:new(), Atts). + + +get_leaf_path(Pos, Rev, [{Pos, [{Rev, _RevInfo} | LeafPath]} | _]) -> + LeafPath; +get_leaf_path(Pos, Rev, [_WrongLeaf | RestLeafs]) -> + get_leaf_path(Pos, Rev, RestLeafs). + + +find_prev_revinfo(_Pos, []) -> + not_found; +find_prev_revinfo(Pos, [{_Rev, ?REV_MISSING} | RestPath]) -> + find_prev_revinfo(Pos - 1, RestPath); +find_prev_revinfo(_Pos, [{_Rev, #{} = RevInfo} | _]) -> + RevInfo. + + +increment_local_doc_rev(#doc{deleted = true} = Doc) -> + {ok, Doc#doc{revs = {0, [0]}}}; +increment_local_doc_rev(#doc{revs = {0, []}} = Doc) -> + {ok, Doc#doc{revs = {0, [1]}}}; +increment_local_doc_rev(#doc{revs = {0, [RevStr | _]}} = Doc) -> + try + PrevRev = binary_to_integer(RevStr), + {ok, Doc#doc{revs = {0, [PrevRev + 1]}}} + catch error:badarg -> + {error, <<"Invalid rev format">>} + end; +increment_local_doc_rev(#doc{}) -> + {error, <<"Invalid rev format">>}. + + +doc_to_revid(#doc{revs = Revs}) -> + case Revs of + {0, []} -> {0, <<>>}; + {RevPos, [Rev | _]} -> {RevPos, Rev} + end. + + +tag_docs([]) -> + []; +tag_docs([#doc{meta = Meta} = Doc | Rest]) -> + NewDoc = Doc#doc{ + meta = [{ref, make_ref()} | Meta] + }, + [NewDoc | tag_docs(Rest)]. + + +doc_tag(#doc{meta = Meta}) -> + fabric2_util:get_value(ref, Meta). + + +idrevs({Id, Revs}) when is_list(Revs) -> + {docid(Id), [rev(R) || R <- Revs]}. + + +docid(DocId) when is_list(DocId) -> + list_to_binary(DocId); +docid(DocId) -> + DocId. + + +rev(Rev) when is_list(Rev); is_binary(Rev) -> + couch_doc:parse_rev(Rev); +rev({Seq, Hash} = Rev) when is_integer(Seq), is_binary(Hash) -> + Rev. + diff --git a/src/fabric/src/fabric2_events.erl b/src/fabric/src/fabric2_events.erl new file mode 100644 index 000000000..a5717147f --- /dev/null +++ b/src/fabric/src/fabric2_events.erl @@ -0,0 +1,84 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_events). + + +-export([ + link_listener/4, + stop_listener/1 +]). + +-export([ + init/5, + poll/5 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +link_listener(Mod, Fun, St, Options) -> + DbName = fabric2_util:get_value(dbname, Options), + Pid = spawn_link(?MODULE, init, [self(), DbName, Mod, Fun, St]), + receive + {Pid, initialized} -> ok + end, + {ok, Pid}. + + +stop_listener(Pid) -> + Pid ! stop_listening. + + +init(Parent, DbName, Mod, Fun, St) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + Since = fabric2_db:get_update_seq(Db), + couch_log:error("XKCD: START LISTENER: ~s : ~p for ~p", [DbName, Since, Parent]), + erlang:monitor(process, Parent), + Parent ! {self(), initialized}, + poll(DbName, Since, Mod, Fun, St), + couch_log:error("XKCD: STOP LISTENER for ~p", [Parent]). + + +poll(DbName, Since, Mod, Fun, St) -> + {Resp, NewSince} = try + case fabric2_db:open(DbName, [?ADMIN_CTX]) of + {ok, Db} -> + case fabric2_db:get_update_seq(Db) of + Since -> + couch_log:error("XKCD: NO UPDATE: ~s :: ~p", [DbName, Since]), + {{ok, St}, Since}; + Other -> + couch_log:error("XKCD: UPDATED: ~s :: ~p -> ~p", [DbName, Since, Other]), + {Mod:Fun(DbName, updated, St), Other} + end; + Error -> + exit(Error) + end + catch error:database_does_not_exist -> + Mod:Fun(DbName, deleted, St) + end, + receive + stop_listening -> + ok; + {'DOWN', _, _, _, _} -> + ok + after 0 -> + case Resp of + {ok, NewSt} -> + timer:sleep(1000), + ?MODULE:poll(DbName, NewSince, Mod, Fun, NewSt); + {stop, _} -> + ok + end + end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl new file mode 100644 index 000000000..0a4f2981b --- /dev/null +++ b/src/fabric/src/fabric2_fdb.erl @@ -0,0 +1,1187 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_fdb). + + +-export([ + transactional/1, + transactional/3, + transactional/2, + + create/2, + open/2, + reopen/1, + delete/1, + exists/1, + + list_dbs/2, + + get_info/1, + get_config/1, + set_config/3, + + get_stat/2, + incr_stat/3, + + get_all_revs/2, + get_winning_revs/3, + get_winning_revs_future/3, + get_winning_revs_wait/2, + get_non_deleted_rev/3, + + get_doc_body/3, + get_doc_body_future/3, + get_doc_body_wait/4, + get_local_doc/2, + + write_doc/6, + write_local_doc/2, + + read_attachment/3, + write_attachment/3, + + fold_docs/4, + fold_changes/5, + get_last_change/1, + + vs_to_seq/1, + + debug_cluster/0, + debug_cluster/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2.hrl"). + + +transactional(Fun) -> + do_transaction(Fun, undefined). + + +transactional(DbName, Options, Fun) when is_binary(DbName) -> + transactional(fun(Tx) -> + Fun(init_db(Tx, DbName, Options)) + end). + + +transactional(#{tx := undefined} = Db, Fun) -> + #{layer_prefix := LayerPrefix} = Db, + do_transaction(fun(Tx) -> + Fun(Db#{tx => Tx}) + end, LayerPrefix); + +transactional(#{tx := {erlfdb_transaction, _}} = Db, Fun) -> + Fun(Db). + + +do_transaction(Fun, LayerPrefix) when is_function(Fun, 1) -> + Db = get_db_handle(), + try + erlfdb:transactional(Db, fun(Tx) -> + case get(erlfdb_trace) of + Name when is_binary(Name) -> + erlfdb:set_option(Tx, transaction_logging_enable, Name); + _ -> + ok + end, + case is_transaction_applied(Tx) of + true -> + get_previous_transaction_result(); + false -> + execute_transaction(Tx, Fun, LayerPrefix) + end + end) + after + clear_transaction() + end. + + +create(#{} = Db0, Options) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = Db = ensure_current(Db0, false), + + % Eventually DbPrefix will be HCA allocated. For now + % we're just using the DbName so that debugging is easier. + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + DbPrefix = erlfdb_tuple:pack({?DBS, DbName}, LayerPrefix), + erlfdb:set(Tx, DbKey, DbPrefix), + + % This key is responsible for telling us when something in + % the database cache (i.e., fabric2_server's ets table) has + % changed and requires re-loading. This currently includes + % revs_limit and validate_doc_update functions. There's + % no order to versioning here. Its just a value that changes + % that is used in the ensure_current check. + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + DbVersion = fabric2_util:uuid(), + erlfdb:set(Tx, DbVersionKey, DbVersion), + + UUID = fabric2_util:uuid(), + + Defaults = [ + {?DB_CONFIG, <<"uuid">>, UUID}, + {?DB_CONFIG, <<"revs_limit">>, ?uint2bin(1000)}, + {?DB_CONFIG, <<"security_doc">>, <<"{}">>}, + {?DB_STATS, <<"doc_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"doc_del_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"doc_design_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"doc_local_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"size">>, ?uint2bin(2)} + ], + lists:foreach(fun({P, K, V}) -> + Key = erlfdb_tuple:pack({P, K}, DbPrefix), + erlfdb:set(Tx, Key, V) + end, Defaults), + + UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), + + Db#{ + uuid => UUID, + db_prefix => DbPrefix, + db_version => DbVersion, + + revs_limit => 1000, + security_doc => {[]}, + user_ctx => UserCtx, + + validate_doc_update_funs => [], + before_doc_update => undefined, + after_doc_read => undefined, + % All other db things as we add features, + + db_options => Options + }. + + +open(#{} = Db0, Options) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = Db1 = ensure_current(Db0, false), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + DbPrefix = case erlfdb:wait(erlfdb:get(Tx, DbKey)) of + Bin when is_binary(Bin) -> Bin; + not_found -> erlang:error(database_does_not_exist) + end, + + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + DbVersion = erlfdb:wait(erlfdb:get(Tx, DbVersionKey)), + + UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), + + Db2 = Db1#{ + db_prefix => DbPrefix, + db_version => DbVersion, + + revs_limit => 1000, + security_doc => {[]}, + user_ctx => UserCtx, + + % Place holders until we implement these + % bits. + validate_doc_update_funs => [], + before_doc_update => undefined, + after_doc_read => undefined, + + db_options => Options + }, + + Db3 = lists:foldl(fun({Key, Val}, DbAcc) -> + case Key of + <<"uuid">> -> + DbAcc#{uuid => Val}; + <<"revs_limit">> -> + DbAcc#{revs_limit => ?bin2uint(Val)}; + <<"security_doc">> -> + DbAcc#{security_doc => ?JSON_DECODE(Val)} + end + end, Db2, get_config(Db2)), + + load_validate_doc_funs(Db3). + + +reopen(#{} = OldDb) -> + require_transaction(OldDb), + #{ + tx := Tx, + name := DbName, + db_options := Options + } = OldDb, + open(init_db(Tx, DbName, Options), Options). + + +delete(#{} = Db) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix, + db_prefix := DbPrefix + } = ensure_current(Db), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + erlfdb:clear(Tx, DbKey), + erlfdb:clear_range_startswith(Tx, DbPrefix), + bump_metadata_version(Tx), + ok. + + +exists(#{name := DbName} = Db) when is_binary(DbName) -> + #{ + tx := Tx, + layer_prefix := LayerPrefix + } = ensure_current(Db, false), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DbKey)) of + Bin when is_binary(Bin) -> true; + not_found -> false + end. + + +list_dbs(Tx, _Options) -> + Root = erlfdb_directory:root(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + LayerPrefix = erlfdb_directory:get_name(CouchDB), + {Start, End} = erlfdb_tuple:range({?ALL_DBS}, LayerPrefix), + Future = erlfdb:get_range(Tx, Start, End), + lists:map(fun({K, _V}) -> + {?ALL_DBS, DbName} = erlfdb_tuple:unpack(K, LayerPrefix), + DbName + end, erlfdb:wait(Future)). + + +get_info(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {CStart, CEnd} = erlfdb_tuple:range({?DB_CHANGES}, DbPrefix), + ChangesFuture = erlfdb:get_range(Tx, CStart, CEnd, [ + {streaming_mode, exact}, + {limit, 1}, + {reverse, true} + ]), + + StatsPrefix = erlfdb_tuple:pack({?DB_STATS}, DbPrefix), + MetaFuture = erlfdb:get_range_startswith(Tx, StatsPrefix), + + RawSeq = case erlfdb:wait(ChangesFuture) of + [] -> + vs_to_seq(fabric2_util:seq_zero_vs()); + [{SeqKey, _}] -> + {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(SeqKey, DbPrefix), + vs_to_seq(SeqVS) + end, + CProp = {update_seq, RawSeq}, + + MProps = lists:flatmap(fun({K, V}) -> + case erlfdb_tuple:unpack(K, DbPrefix) of + {?DB_STATS, <<"doc_count">>} -> + [{doc_count, ?bin2uint(V)}]; + {?DB_STATS, <<"doc_del_count">>} -> + [{doc_del_count, ?bin2uint(V)}]; + {?DB_STATS, <<"size">>} -> + Val = ?bin2uint(V), + [ + {other, {[{data_size, Val}]}}, + {sizes, {[ + {active, 0}, + {external, Val}, + {file, 0} + ]}} + ]; + {?DB_STATS, _} -> + [] + end + end, erlfdb:wait(MetaFuture)), + + [CProp | MProps]. + + +get_config(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db = ensure_current(Db), + + {Start, End} = erlfdb_tuple:range({?DB_CONFIG}, DbPrefix), + Future = erlfdb:get_range(Tx, Start, End), + + lists:map(fun({K, V}) -> + {?DB_CONFIG, Key} = erlfdb_tuple:unpack(K, DbPrefix), + {Key, V} + end, erlfdb:wait(Future)). + + +set_config(#{} = Db, ConfigKey, ConfigVal) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_CONFIG, ConfigKey}, DbPrefix), + erlfdb:set(Tx, Key, ConfigVal), + bump_metadata_version(Tx). + + +get_stat(#{} = Db, StatKey) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_STATS, StatKey}, DbPrefix), + + % Might need to figure out some sort of type + % system here. Uints are because stats are all + % atomic op adds for the moment. + ?bin2uint(erlfdb:wait(erlfdb:get(Tx, Key))). + + +incr_stat(_Db, _StatKey, 0) -> + ok; + +incr_stat(#{} = Db, StatKey, Increment) when is_integer(Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_STATS, StatKey}, DbPrefix), + erlfdb:add(Tx, Key, Increment). + + +get_all_revs(#{} = Db, DocId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Prefix = erlfdb_tuple:pack({?DB_REVS, DocId}, DbPrefix), + Options = [{streaming_mode, want_all}], + Future = erlfdb:get_range_startswith(Tx, Prefix, Options), + lists:map(fun({K, V}) -> + Key = erlfdb_tuple:unpack(K, DbPrefix), + Val = erlfdb_tuple:unpack(V), + fdb_to_revinfo(Key, Val) + end, erlfdb:wait(Future)). + + +get_winning_revs(Db, DocId, NumRevs) -> + Future = get_winning_revs_future(Db, DocId, NumRevs), + get_winning_revs_wait(Db, Future). + + +get_winning_revs_future(#{} = Db, DocId, NumRevs) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {StartKey, EndKey} = erlfdb_tuple:range({?DB_REVS, DocId}, DbPrefix), + Options = [{reverse, true}, {limit, NumRevs}], + erlfdb:get_range_raw(Tx, StartKey, EndKey, Options). + + +get_winning_revs_wait(#{} = Db, Future) -> + #{ + db_prefix := DbPrefix + } = ensure_current(Db), + {Rows, _, _} = erlfdb:wait(Future), + lists:map(fun({K, V}) -> + Key = erlfdb_tuple:unpack(K, DbPrefix), + Val = erlfdb_tuple:unpack(V), + fdb_to_revinfo(Key, Val) + end, Rows). + + +get_non_deleted_rev(#{} = Db, DocId, RevId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {RevPos, Rev} = RevId, + + BaseKey = {?DB_REVS, DocId, true, RevPos, Rev}, + Key = erlfdb_tuple:pack(BaseKey, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> + not_found; + Val -> + fdb_to_revinfo(BaseKey, erlfdb_tuple:unpack(Val)) + end. + + +get_doc_body(Db, DocId, RevInfo) -> + Future = get_doc_body_future(Db, DocId, RevInfo), + get_doc_body_wait(Db, DocId, RevInfo, Future). + + +get_doc_body_future(#{} = Db, DocId, RevInfo) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + #{ + rev_id := {RevPos, Rev} + } = RevInfo, + + Key = erlfdb_tuple:pack({?DB_DOCS, DocId, RevPos, Rev}, DbPrefix), + erlfdb:get(Tx, Key). + + +get_doc_body_wait(#{} = Db0, DocId, RevInfo, Future) -> + Db = ensure_current(Db0), + + #{ + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevInfo, + + Val = erlfdb:wait(Future), + fdb_to_doc(Db, DocId, RevPos, [Rev | RevPath], Val). + + +get_local_doc(#{} = Db0, <> = DocId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db = ensure_current(Db0), + + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, DocId}, DbPrefix), + Val = erlfdb:wait(erlfdb:get(Tx, Key)), + fdb_to_local_doc(Db, DocId, Val). + + +write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db = ensure_current(Db0), + + #doc{ + id = DocId, + deleted = Deleted + } = Doc, + + % Revision tree + + NewWinner = NewWinner0#{winner := true}, + NewRevId = maps:get(rev_id, NewWinner), + + {WKey, WVal, WinnerVS} = revinfo_to_fdb(Tx, DbPrefix, DocId, NewWinner), + ok = erlfdb:set_versionstamped_value(Tx, WKey, WVal), + + lists:foreach(fun(RI0) -> + RI = RI0#{winner := false}, + {K, V, undefined} = revinfo_to_fdb(Tx, DbPrefix, DocId, RI), + ok = erlfdb:set(Tx, K, V) + end, ToUpdate), + + lists:foreach(fun(RI0) -> + RI = RI0#{winner := false}, + {K, _, undefined} = revinfo_to_fdb(Tx, DbPrefix, DocId, RI), + ok = erlfdb:clear(Tx, K) + end, ToRemove), + + % _all_docs + + UpdateStatus = case {OldWinner, NewWinner} of + {not_found, #{deleted := false}} -> + created; + {#{deleted := true}, #{deleted := false}} -> + recreated; + {#{deleted := false}, #{deleted := false}} -> + updated; + {#{deleted := false}, #{deleted := true}} -> + deleted + end, + + case UpdateStatus of + Status when Status == created orelse Status == recreated -> + ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), + ADVal = erlfdb_tuple:pack(NewRevId), + ok = erlfdb:set(Tx, ADKey, ADVal); + deleted -> + ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), + ok = erlfdb:clear(Tx, ADKey); + updated -> + ok + end, + + % _changes + + if OldWinner == not_found -> ok; true -> + OldSeq = maps:get(sequence, OldWinner), + OldSeqKey = erlfdb_tuple:pack({?DB_CHANGES, OldSeq}, DbPrefix), + erlfdb:clear(Tx, OldSeqKey) + end, + + NewSeqKey = erlfdb_tuple:pack_vs({?DB_CHANGES, WinnerVS}, DbPrefix), + NewSeqVal = erlfdb_tuple:pack({DocId, Deleted, NewRevId}), + erlfdb:set_versionstamped_key(Tx, NewSeqKey, NewSeqVal), + + % And all the rest... + + ok = write_doc_body(Db, Doc), + + IsDDoc = case Doc#doc.id of + <> -> true; + _ -> false + end, + + if not IsDDoc -> ok; true -> + bump_db_version(Db) + end, + + case UpdateStatus of + created -> + if not IsDDoc -> ok; true -> + incr_stat(Db, <<"doc_design_count">>, 1) + end, + incr_stat(Db, <<"doc_count">>, 1); + recreated -> + if not IsDDoc -> ok; true -> + incr_stat(Db, <<"doc_design_count">>, 1) + end, + incr_stat(Db, <<"doc_count">>, 1), + incr_stat(Db, <<"doc_del_count">>, -1); + deleted -> + if not IsDDoc -> ok; true -> + incr_stat(Db, <<"doc_design_count">>, -1) + end, + incr_stat(Db, <<"doc_count">>, -1), + incr_stat(Db, <<"doc_del_count">>, 1); + updated -> + ok + end, + + ok. + + +write_local_doc(#{} = Db0, Doc) -> + #{ + tx := Tx + } = Db = ensure_current(Db0), + + {LDocKey, LDocVal} = local_doc_to_fdb(Db, Doc), + + WasDeleted = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of + <<_/binary>> -> false; + not_found -> true + end, + + case Doc#doc.deleted of + true -> erlfdb:clear(Tx, LDocKey); + false -> erlfdb:set(Tx, LDocKey, LDocVal) + end, + + case {WasDeleted, Doc#doc.deleted} of + {true, false} -> + incr_stat(Db, <<"doc_local_count">>, 1); + {false, true} -> + incr_stat(Db, <<"doc_local_count">>, -1); + _ -> + ok + end, + + ok. + + +read_attachment(#{} = Db, DocId, AttId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of + not_found -> + throw({not_found, missing}); + KVs -> + Vs = [V || {_K, V} <- KVs], + iolist_to_binary(Vs) + end. + + +write_attachment(#{} = Db, DocId, Data) when is_binary(Data) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + AttId = fabric2_util:uuid(), + Chunks = chunkify_attachment(Data), + + lists:foldl(fun(Chunk, ChunkId) -> + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), + ok = erlfdb:set(Tx, AttKey, Chunk), + ChunkId + 1 + end, 0, Chunks), + {ok, AttId}. + + +fold_docs(#{} = Db, UserFun, UserAcc0, Options) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {Reverse, Start, End} = get_dir_and_bounds(DbPrefix, Options), + + DocCountKey = erlfdb_tuple:pack({?DB_STATS, <<"doc_count">>}, DbPrefix), + DocCountBin = erlfdb:wait(erlfdb:get(Tx, DocCountKey)), + + try + UserAcc1 = maybe_stop(UserFun({meta, [ + {total, ?bin2uint(DocCountBin)}, + {offset, null} + ]}, UserAcc0)), + + UserAcc2 = erlfdb:fold_range(Tx, Start, End, fun({K, V}, UserAccIn) -> + {?DB_ALL_DOCS, DocId} = erlfdb_tuple:unpack(K, DbPrefix), + RevId = erlfdb_tuple:unpack(V), + maybe_stop(UserFun({row, [ + {id, DocId}, + {key, DocId}, + {value, couch_doc:rev_to_str(RevId)} + ]}, UserAccIn)) + end, UserAcc1, [{reverse, Reverse}] ++ Options), + + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end. + + +fold_changes(#{} = Db, SinceSeq0, UserFun, UserAcc0, Options) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + SinceSeq1 = get_since_seq(SinceSeq0), + + Reverse = case fabric2_util:get_value(dir, Options, fwd) of + fwd -> false; + rev -> true + end, + + {Start0, End0} = case Reverse of + false -> {SinceSeq1, fabric2_util:seq_max_vs()}; + true -> {fabric2_util:seq_zero_vs(), SinceSeq1} + end, + + Start1 = erlfdb_tuple:pack({?DB_CHANGES, Start0}, DbPrefix), + End1 = erlfdb_tuple:pack({?DB_CHANGES, End0}, DbPrefix), + + {Start, End} = case Reverse of + false -> {erlfdb_key:first_greater_than(Start1), End1}; + true -> {Start1, erlfdb_key:first_greater_than(End1)} + end, + + try + {ok, erlfdb:fold_range(Tx, Start, End, fun({K, V}, UserAccIn) -> + {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(K, DbPrefix), + {DocId, Deleted, RevId} = erlfdb_tuple:unpack(V), + + Change = #{ + id => DocId, + sequence => vs_to_seq(SeqVS), + rev_id => RevId, + deleted => Deleted + }, + + maybe_stop(UserFun(Change, UserAccIn)) + end, UserAcc0, [{reverse, Reverse}] ++ Options)} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end. + + +get_last_change(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {Start, End} = erlfdb_tuple:range({?DB_CHANGES}, DbPrefix), + Options = [{limit, 1}, {reverse, true}], + case erlfdb:get_range(Tx, Start, End, Options) of + [] -> + vs_to_seq(fabric2_util:seq_zero_vs()); + [{K, _V}] -> + {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(K, DbPrefix), + vs_to_seq(SeqVS) + end. + + +maybe_stop({ok, Acc}) -> + Acc; +maybe_stop({stop, Acc}) -> + throw({stop, Acc}). + + +vs_to_seq(VS) -> + <<51:8, SeqBin:12/binary>> = erlfdb_tuple:pack({VS}), + fabric2_util:to_hex(SeqBin). + + +debug_cluster() -> + debug_cluster(<<>>, <<16#FE, 16#FF, 16#FF>>). + + +debug_cluster(Start, End) -> + transactional(fun(Tx) -> + lists:foreach(fun({Key, Val}) -> + io:format("~s => ~s~n", [ + string:pad(erlfdb_util:repr(Key), 60), + erlfdb_util:repr(Val) + ]) + end, erlfdb:get_range(Tx, Start, End)) + end). + + +init_db(Tx, DbName, Options) -> + Root = erlfdb_directory:root(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Prefix = erlfdb_directory:get_name(CouchDB), + Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), + #{ + name => DbName, + tx => Tx, + layer_prefix => Prefix, + md_version => Version, + + db_options => Options + }. + + +load_validate_doc_funs(#{} = Db) -> + FoldFun = fun + ({row, Row}, Acc) -> + DDocInfo = #{id => fabric2_util:get_value(id, Row)}, + {ok, [DDocInfo | Acc]}; + (_, Acc) -> + {ok, Acc} + end, + + Options = [ + {start_key, <<"_design/">>}, + {end_key, <<"_design0">>} + ], + + {ok, Infos1} = fold_docs(Db, FoldFun, [], Options), + + Infos2 = lists:map(fun(Info) -> + #{ + id := DDocId = <<"_design/", _/binary>> + } = Info, + Info#{ + rev_info => get_winning_revs_future(Db, DDocId, 1) + } + end, Infos1), + + Infos3 = lists:flatmap(fun(Info) -> + #{ + id := DDocId, + rev_info := RevInfoFuture + } = Info, + [RevInfo] = get_winning_revs_wait(Db, RevInfoFuture), + #{deleted := Deleted} = RevInfo, + if Deleted -> []; true -> + [Info#{ + rev_info := RevInfo, + body => get_doc_body_future(Db, DDocId, RevInfo) + }] + end + end, Infos2), + + VDUs = lists:flatmap(fun(Info) -> + #{ + id := DDocId, + rev_info := RevInfo, + body := BodyFuture + } = Info, + #doc{} = Doc = get_doc_body_wait(Db, DDocId, RevInfo, BodyFuture), + case couch_doc:get_validate_doc_fun(Doc) of + nil -> []; + Fun -> [Fun] + end + end, Infos3), + + Db#{ + validate_doc_update_funs := VDUs + }. + + +bump_metadata_version(Tx) -> + % The 14 zero bytes is pulled from the PR for adding the + % metadata version key. Not sure why 14 bytes when version + % stamps are only 80, but whatever for now. + erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>). + + +bump_db_version(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + DbVersion = fabric2_util:uuid(), + ok = erlfdb:set(Tx, DbVersionKey, DbVersion). + + +write_doc_body(#{} = Db0, #doc{} = Doc) -> + #{ + tx := Tx + } = Db = ensure_current(Db0), + + {NewDocKey, NewDocVal} = doc_to_fdb(Db, Doc), + erlfdb:set(Tx, NewDocKey, NewDocVal). + + +revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> + #{ + deleted := Deleted, + rev_id := {RevPos, Rev}, + rev_path := RevPath, + branch_count := BranchCount + } = RevId, + VS = new_versionstamp(Tx), + Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, + Val = {?CURR_REV_FORMAT, VS, BranchCount, list_to_tuple(RevPath)}, + KBin = erlfdb_tuple:pack(Key, DbPrefix), + VBin = erlfdb_tuple:pack_vs(Val), + {KBin, VBin, VS}; + +revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) -> + #{ + deleted := Deleted, + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevId, + Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, + Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath)}, + KBin = erlfdb_tuple:pack(Key, DbPrefix), + VBin = erlfdb_tuple:pack(Val), + {KBin, VBin, undefined}. + + +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> + {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, + {_RevFormat, Sequence, BranchCount, RevPath} = Val, + #{ + winner => true, + deleted => not NotDeleted, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + sequence => Sequence, + branch_count => BranchCount + }; + +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _} = Val) -> + {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, + {_RevFormat, RevPath} = Val, + #{ + winner => false, + deleted => not NotDeleted, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + sequence => undefined, + branch_count => undefined + }. + + +doc_to_fdb(Db, #doc{} = Doc) -> + #{ + db_prefix := DbPrefix + } = Db, + + #doc{ + id = Id, + revs = {Start, [Rev | _]}, + body = Body, + atts = Atts, + deleted = Deleted + } = doc_flush_atts(Db, Doc), + + Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev}, DbPrefix), + Val = {Body, Atts, Deleted}, + {Key, term_to_binary(Val, [{minor_version, 1}])}. + + +fdb_to_doc(_Db, DocId, Pos, Path, Bin) when is_binary(Bin) -> + {Body, Atts, Deleted} = binary_to_term(Bin, [safe]), + #doc{ + id = DocId, + revs = {Pos, Path}, + body = Body, + atts = Atts, + deleted = Deleted + }; +fdb_to_doc(_Db, _DocId, _Pos, _Path, not_found) -> + {not_found, missing}. + + +local_doc_to_fdb(Db, #doc{} = Doc) -> + #{ + db_prefix := DbPrefix + } = Db, + + #doc{ + id = Id, + revs = {0, [Rev]}, + body = Body + } = Doc, + + StoreRev = case Rev of + _ when is_integer(Rev) -> integer_to_binary(Rev); + _ when is_binary(Rev) -> Rev + end, + + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, Id}, DbPrefix), + Val = {StoreRev, Body}, + {Key, term_to_binary(Val, [{minor_version, 1}])}. + + +fdb_to_local_doc(_Db, DocId, Bin) when is_binary(Bin) -> + {Rev, Body} = binary_to_term(Bin, [safe]), + #doc{ + id = DocId, + revs = {0, [Rev]}, + deleted = false, + body = Body + }; +fdb_to_local_doc(_Db, _DocId, not_found) -> + {not_found, missing}. + + +doc_flush_atts(Db, Doc) -> + Atts = lists:map(fun(Att) -> + couch_att:flush(Db, Doc#doc.id, Att) + end, Doc#doc.atts), + Doc#doc{atts = Atts}. + + +chunkify_attachment(Data) -> + case Data of + <<>> -> + []; + <> -> + [Head | chunkify_attachment(Rest)]; + <<_/binary>> when size(Data) < ?ATTACHMENT_CHUNK_SIZE -> + [Data] + end. + + +get_dir_and_bounds(DbPrefix, Options) -> + Reverse = case fabric2_util:get_value(dir, Options, fwd) of + fwd -> false; + rev -> true + end, + StartKey0 = fabric2_util:get_value(start_key, Options), + EndKeyGt = fabric2_util:get_value(end_key_gt, Options), + EndKey0 = fabric2_util:get_value(end_key, Options, EndKeyGt), + InclusiveEnd = EndKeyGt == undefined, + + % CouchDB swaps the key meanings based on the direction + % of the fold. FoundationDB does not so we have to + % swap back here. + {StartKey1, EndKey1} = case Reverse of + false -> {StartKey0, EndKey0}; + true -> {EndKey0, StartKey0} + end, + + % Set the maximum bounds for the start and endkey + StartKey2 = case StartKey1 of + undefined -> {?DB_ALL_DOCS}; + SK2 when is_binary(SK2) -> {?DB_ALL_DOCS, SK2} + end, + + EndKey2 = case EndKey1 of + undefined -> {?DB_ALL_DOCS, <<16#FF>>}; + EK2 when is_binary(EK2) -> {?DB_ALL_DOCS, EK2} + end, + + StartKey3 = erlfdb_tuple:pack(StartKey2, DbPrefix), + EndKey3 = erlfdb_tuple:pack(EndKey2, DbPrefix), + + % FoundationDB ranges are applied as SK <= key < EK + % By default, CouchDB is SK <= key <= EK with the + % optional inclusive_end=false option changing that + % to SK <= key < EK. Also, remember that CouchDB + % swaps the meaning of SK and EK based on direction. + % + % Thus we have this wonderful bit of logic to account + % for all of those combinations. + + StartKey4 = case {Reverse, InclusiveEnd} of + {true, false} -> + erlfdb_key:first_greater_than(StartKey3); + _ -> + StartKey3 + end, + + EndKey4 = case {Reverse, InclusiveEnd} of + {false, true} when EndKey0 /= undefined -> + erlfdb_key:first_greater_than(EndKey3); + {true, _} -> + erlfdb_key:first_greater_than(EndKey3); + _ -> + EndKey3 + end, + + {Reverse, StartKey4, EndKey4}. + + +get_since_seq(Seq) when Seq == <<>>; Seq == <<"0">>; Seq == 0-> + fabric2_util:seq_zero_vs(); + +get_since_seq(Seq) when Seq == now; Seq == <<"now">> -> + fabric2_util:seq_max_vs(); + +get_since_seq(Seq) when is_binary(Seq), size(Seq) == 24 -> + Seq1 = fabric2_util:from_hex(Seq), + Seq2 = <<51:8, Seq1/binary>>, + {SeqVS} = erlfdb_tuple:unpack(Seq2), + SeqVS; + +get_since_seq(List) when is_list(List) -> + get_since_seq(list_to_binary(List)); + +get_since_seq(Seq) -> + erlang:error({invalid_since_seq, Seq}). + + +get_db_handle() -> + case get(?PDICT_DB_KEY) of + undefined -> + {ok, Db} = application:get_env(fabric, db), + put(?PDICT_DB_KEY, Db), + Db; + Db -> + Db + end. + + +require_transaction(#{tx := {erlfdb_transaction, _}} = _Db) -> + ok; +require_transaction(#{} = _Db) -> + erlang:error(transaction_required). + + +ensure_current(Db) -> + ensure_current(Db, true). + + +ensure_current(#{} = Db, CheckDbVersion) -> + require_transaction(Db), + + #{ + tx := Tx, + md_version := MetaDataVersion + } = Db, + + case erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) of + MetaDataVersion -> Db; + _NewVersion -> reopen(Db) + end, + + AlreadyChecked = get(?PDICT_CHECKED_DB_IS_CURRENT), + if not CheckDbVersion orelse AlreadyChecked == true -> Db; true -> + #{ + db_prefix := DbPrefix, + db_version := DbVersion + } = Db, + + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + + case erlfdb:wait(erlfdb:get(Tx, DbVersionKey)) of + DbVersion -> + put(?PDICT_CHECKED_DB_IS_CURRENT, true), + Db; + _NewDBVersion -> + fabric2_server:remove(maps:get(name, Db)), + reopen(Db) + end + end. + + +is_transaction_applied(Tx) -> + is_commit_unknown_result() + andalso has_transaction_id() + andalso transaction_id_exists(Tx). + + +get_previous_transaction_result() -> + get(?PDICT_TX_RES_KEY). + + +execute_transaction(Tx, Fun, LayerPrefix) -> + put(?PDICT_CHECKED_DB_IS_CURRENT, false), + Result = Fun(Tx), + case erlfdb:is_read_only(Tx) of + true -> + ok; + false -> + erlfdb:set(Tx, get_transaction_id(Tx, LayerPrefix), <<>>), + put(?PDICT_TX_RES_KEY, Result) + end, + Result. + + +clear_transaction() -> + fabric2_txids:remove(get(?PDICT_TX_ID_KEY)), + erase(?PDICT_CHECKED_DB_IS_CURRENT), + erase(?PDICT_TX_ID_KEY), + erase(?PDICT_TX_RES_KEY). + + +is_commit_unknown_result() -> + erlfdb:get_last_error() == ?COMMIT_UNKNOWN_RESULT. + + +has_transaction_id() -> + is_binary(get(?PDICT_TX_ID_KEY)). + + +transaction_id_exists(Tx) -> + erlfdb:wait(erlfdb:get(Tx, get(?PDICT_TX_ID_KEY))) == <<>>. + + +get_transaction_id(Tx, LayerPrefix) -> + case get(?PDICT_TX_ID_KEY) of + undefined -> + TxId = fabric2_txids:create(Tx, LayerPrefix), + put(?PDICT_TX_ID_KEY, TxId), + TxId; + TxId when is_binary(TxId) -> + TxId + end. + + +new_versionstamp(Tx) -> + TxId = erlfdb:get_next_tx_id(Tx), + {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF, TxId}. + diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl new file mode 100644 index 000000000..5b826cd14 --- /dev/null +++ b/src/fabric/src/fabric2_server.erl @@ -0,0 +1,104 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_server). +-behaviour(gen_server). +-vsn(1). + + +-export([ + start_link/0, + fetch/1, + store/1, + remove/1 +]). + + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-define(CLUSTER_FILE, "/usr/local/etc/foundationdb/fdb.cluster"). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +fetch(DbName) when is_binary(DbName) -> + case ets:lookup(?MODULE, DbName) of + [{DbName, #{} = Db}] -> Db; + [] -> undefined + end. + + +store(#{name := DbName} = Db0) when is_binary(DbName) -> + Db1 = Db0#{ + tx := undefined, + user_ctx := #user_ctx{} + }, + true = ets:insert(?MODULE, {DbName, Db1}), + ok. + + +remove(DbName) when is_binary(DbName) -> + true = ets:delete(?MODULE, DbName), + ok. + + +init(_) -> + ets:new(?MODULE, [ + public, + named_table, + {read_concurrency, true}, + {write_concurrency, true} + ]), + + Db = case application:get_env(fabric, eunit_run) of + {ok, true} -> + erlfdb_util:get_test_db([empty]); + undefined -> + ClusterStr = config:get("erlfdb", "cluster_file", ?CLUSTER_FILE), + erlfdb:open(iolist_to_binary(ClusterStr)) + end, + application:set_env(fabric, db, Db), + + {ok, nil}. + + +terminate(_, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl new file mode 100644 index 000000000..73c6c1f4d --- /dev/null +++ b/src/fabric/src/fabric2_sup.erl @@ -0,0 +1,47 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_sup). +-behaviour(supervisor). +-vsn(1). + + +-export([ + start_link/1 +]). + +-export([ + init/1 +]). + + +start_link(Args) -> + supervisor:start_link({local, ?MODULE}, ?MODULE, Args). + + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 1, + period => 5 + }, + Children = [ + #{ + id => fabric2_server, + start => {fabric2_server, start_link, []} + }, + #{ + id => fabric2_txids, + start => {fabric2_txids, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl new file mode 100644 index 000000000..bbb8bdf57 --- /dev/null +++ b/src/fabric/src/fabric2_txids.erl @@ -0,0 +1,144 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_txids). +-behaviour(gen_server). +-vsn(1). + + +-export([ + start_link/0, + create/2, + remove/1 +]). + + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-include("fabric2.hrl"). + + +-define(ONE_HOUR, 3600000000). +-define(MAX_TX_IDS, 1000). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +create(Tx, undefined) -> + Root = erlfdb_directory:root(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Prefix = erlfdb_directory:get_name(CouchDB), + create(Tx, Prefix); + +create(_Tx, LayerPrefix) -> + {Mega, Secs, Micro} = os:timestamp(), + Key = {?TX_IDS, Mega, Secs, Micro, fabric2_util:uuid()}, + erlfdb_tuple:pack(Key, LayerPrefix). + + +remove(TxId) when is_binary(TxId) -> + gen_server:cast(?MODULE, {remove, TxId}); + +remove(undefined) -> + ok. + + + +init(_) -> + {ok, #{ + last_sweep => os:timestamp(), + txids => [] + }}. + + +terminate(_, #{txids := TxIds}) -> + if TxIds == [] -> ok; true -> + fabric2_fdb:transactional(fun(Tx) -> + lists:foreach(fun(TxId) -> + erlfdb:clear(Tx, TxId) + end) + end) + end, + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast({remove, TxId}, St) -> + #{ + last_sweep := LastSweep, + txids := TxIds + } = St, + + NewTxIds = [TxId | TxIds], + NewSt = St#{txids := NewTxIds}, + + NeedsSweep = timer:now_diff(os:timestamp(), LastSweep) > ?ONE_HOUR, + + case NeedsSweep orelse length(NewTxIds) >= ?MAX_TX_IDS of + true -> + {noreply, clean(NewSt, NeedsSweep)}; + false -> + {noreply, NewSt} + end. + + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +clean(St, NeedsSweep) -> + #{ + last_sweep := LastSweep, + txids := TxIds + } = St, + fabric2_fdb:transactional(fun(Tx) -> + lists:foreach(fun(TxId) -> + erlfdb:clear(Tx, TxId) + end, TxIds), + case NeedsSweep of + true -> + sweep(Tx, LastSweep), + St#{ + last_sweep := os:timestamp(), + txids := [] + }; + false -> + St#{txids := []} + end + end). + + +sweep(Tx, {Mega, Secs, Micro}) -> + Root = erlfdb_directory:root(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Prefix = erlfdb_directory:get_name(CouchDB), + StartKey = erlfdb_tuple:pack({?TX_IDS}, Prefix), + EndKey = erlfdb_tuple:pack({?TX_IDS, Mega, Secs, Micro}, Prefix), + erlfdb:set_option(Tx, next_write_no_write_conflict_range), + erlfdb:clear_range(Tx, StartKey, EndKey). diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl new file mode 100644 index 000000000..6e2df67c2 --- /dev/null +++ b/src/fabric/src/fabric2_util.erl @@ -0,0 +1,203 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_util). + + +-export([ + revinfo_to_path/1, + sort_revinfos/1, + + seq_zero_vs/0, + seq_max_vs/0, + + user_ctx_to_json/1, + + validate_security_object/1, + + get_value/2, + get_value/3, + to_hex/1, + from_hex/1, + uuid/0 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +revinfo_to_path(RevInfo) -> + #{ + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevInfo, + Revs = lists:reverse(RevPath, [Rev]), + Path = revinfo_to_path(RevInfo, Revs), + {RevPos - length(Revs) + 1, Path}. + + +revinfo_to_path(RevInfo, [Rev]) -> + {Rev, RevInfo, []}; + +revinfo_to_path(RevInfo, [Rev | Rest]) -> + {Rev, ?REV_MISSING, [revinfo_to_path(RevInfo, Rest)]}. + + +sort_revinfos(RevInfos) -> + CmpFun = fun(A, B) -> rev_sort_key(A) > rev_sort_key(B) end, + lists:sort(CmpFun, RevInfos). + + +rev_sort_key(#{} = RevInfo) -> + #{ + deleted := Deleted, + rev_id := {RevPos, Rev} + } = RevInfo, + {not Deleted, RevPos, Rev}. + + +seq_zero_vs() -> + {versionstamp, 0, 0, 0}. + + +seq_max_vs() -> + {versionstamp, 18446744073709551615, 65535, 65535}. + + +user_ctx_to_json(Db) -> + UserCtx = fabric2_db:get_user_ctx(Db), + {[ + {<<"db">>, fabric2_db:name(Db)}, + {<<"name">>, UserCtx#user_ctx.name}, + {<<"roles">>, UserCtx#user_ctx.roles} + ]}. + + +validate_security_object({SecProps}) -> + Admins = get_value(<<"admins">>, SecProps, {[]}), + ok = validate_names_and_roles(Admins), + + % we fallback to readers here for backwards compatibility + Readers = get_value(<<"readers">>, SecProps, {[]}), + Members = get_value(<<"members">>, SecProps, Readers), + ok = validate_names_and_roles(Members). + + +validate_names_and_roles({Props}) when is_list(Props) -> + validate_json_list_of_strings(<<"names">>, Props), + validate_json_list_of_strings(<<"roles">>, Props); +validate_names_and_roles(_) -> + throw("admins or members must be a JSON list of strings"). + + +validate_json_list_of_strings(Member, Props) -> + case get_value(Member, Props, []) of + Values when is_list(Values) -> + NonBinary = lists:filter(fun(V) -> not is_binary(V) end, Values), + if NonBinary == [] -> ok; true -> + MemberStr = binary_to_list(Member), + throw(MemberStr ++ " must be a JSON list of strings") + end; + _ -> + MemberStr = binary_to_list(Member), + throw(MemberStr ++ " must be a JSON list of strings") + end. + + +get_value(Key, List) -> + get_value(Key, List, undefined). + + +get_value(Key, List, Default) -> + case lists:keysearch(Key, 1, List) of + {value, {Key,Value}} -> + Value; + false -> + Default + end. + + +to_hex(Bin) -> + list_to_binary(to_hex_int(Bin)). + + +to_hex_int(<<>>) -> + []; +to_hex_int(<>) -> + [nibble_to_hex(Hi), nibble_to_hex(Lo) | to_hex(Rest)]. + + +nibble_to_hex(I) -> + case I of + 0 -> $0; + 1 -> $1; + 2 -> $2; + 3 -> $3; + 4 -> $4; + 5 -> $5; + 6 -> $6; + 7 -> $7; + 8 -> $8; + 9 -> $9; + 10 -> $a; + 11 -> $b; + 12 -> $c; + 13 -> $d; + 14 -> $e; + 15 -> $f + end. + + +from_hex(Bin) -> + iolist_to_binary(from_hex_int(Bin)). + + +from_hex_int(<<>>) -> + []; +from_hex_int(<>) -> + HiNib = hex_to_nibble(Hi), + LoNib = hex_to_nibble(Lo), + [<> | from_hex_int(RestBinary)]; +from_hex_int(<>) -> + erlang:error({invalid_hex, BadHex}). + + +hex_to_nibble(N) -> + case N of + $0 -> 0; + $1 -> 1; + $2 -> 2; + $3 -> 3; + $4 -> 4; + $5 -> 5; + $6 -> 6; + $7 -> 7; + $8 -> 8; + $9 -> 9; + $a -> 10; + $A -> 10; + $b -> 11; + $B -> 11; + $c -> 12; + $C -> 12; + $d -> 13; + $D -> 13; + $e -> 14; + $E -> 14; + $f -> 15; + $F -> 15; + _ -> erlang:error({invalid_hex, N}) + end. + + +uuid() -> + to_hex(crypto:strong_rand_bytes(16)). -- cgit v1.2.1 From b0814d07568cebd4da290550691f14af93331d11 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Jun 2019 13:33:55 -0500 Subject: Initial test suite for the fabric2 implementation This provides a good bit of code coverage for the new implementation. We'll want to expand this to include relevant tests from the previous fabric test suite along with reading through the various other tests and ensuring that we cover the API as deeply as is appropriate for this layer. --- src/fabric/test/fabric2_changes_fold_tests.erl | 114 +++ src/fabric/test/fabric2_db_crud_tests.erl | 88 +++ src/fabric/test/fabric2_db_misc_tests.erl | 113 +++ src/fabric/test/fabric2_db_security_tests.erl | 162 +++++ src/fabric/test/fabric2_doc_count_tests.erl | 251 +++++++ src/fabric/test/fabric2_doc_crud_tests.erl | 770 +++++++++++++++++++++ src/fabric/test/fabric2_doc_fold_tests.erl | 209 ++++++ src/fabric/test/fabric2_fdb_tx_retry_tests.erl | 178 +++++ src/fabric/test/fabric2_trace_db_create_tests.erl | 46 ++ src/fabric/test/fabric2_trace_db_delete_tests.erl | 49 ++ src/fabric/test/fabric2_trace_db_open_tests.erl | 50 ++ src/fabric/test/fabric2_trace_doc_create_tests.erl | 86 +++ 12 files changed, 2116 insertions(+) create mode 100644 src/fabric/test/fabric2_changes_fold_tests.erl create mode 100644 src/fabric/test/fabric2_db_crud_tests.erl create mode 100644 src/fabric/test/fabric2_db_misc_tests.erl create mode 100644 src/fabric/test/fabric2_db_security_tests.erl create mode 100644 src/fabric/test/fabric2_doc_count_tests.erl create mode 100644 src/fabric/test/fabric2_doc_crud_tests.erl create mode 100644 src/fabric/test/fabric2_doc_fold_tests.erl create mode 100644 src/fabric/test/fabric2_fdb_tx_retry_tests.erl create mode 100644 src/fabric/test/fabric2_trace_db_create_tests.erl create mode 100644 src/fabric/test/fabric2_trace_db_delete_tests.erl create mode 100644 src/fabric/test/fabric2_trace_db_open_tests.erl create mode 100644 src/fabric/test/fabric2_trace_doc_create_tests.erl diff --git a/src/fabric/test/fabric2_changes_fold_tests.erl b/src/fabric/test/fabric2_changes_fold_tests.erl new file mode 100644 index 000000000..892b448b4 --- /dev/null +++ b/src/fabric/test/fabric2_changes_fold_tests.erl @@ -0,0 +1,114 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_changes_fold_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(DOC_COUNT, 25). + + +changes_fold_test_() -> + { + "Test changes fold operations", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun fold_changes_basic/1, + fun fold_changes_since_now/1, + fun fold_changes_since_seq/1, + fun fold_changes_basic_rev/1, + fun fold_changes_since_now_rev/1, + fun fold_changes_since_seq_rev/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Rows = lists:map(fun(Val) -> + DocId = fabric2_util:uuid(), + Doc = #doc{ + id = DocId, + body = {[{<<"value">>, Val}]} + }, + {ok, RevId} = fabric2_db:update_doc(Db, Doc, []), + UpdateSeq = fabric2_db:get_update_seq(Db), + #{ + id => DocId, + sequence => UpdateSeq, + deleted => false, + rev_id => RevId + } + end, lists:seq(1, ?DOC_COUNT)), + {Db, Rows, Ctx}. + + +cleanup({Db, _DocIdRevs, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +fold_changes_basic({Db, DocRows, _}) -> + {ok, Rows} = fabric2_db:fold_changes(Db, 0, fun fold_fun/2, []), + ?assertEqual(lists:reverse(DocRows), Rows). + + +fold_changes_since_now({Db, _, _}) -> + {ok, Rows} = fabric2_db:fold_changes(Db, now, fun fold_fun/2, []), + ?assertEqual([], Rows). + + +fold_changes_since_seq({_, [], _}) -> + ok; + +fold_changes_since_seq({Db, [Row | RestRows], _}) -> + #{sequence := Since} = Row, + {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, []), + ?assertEqual(lists:reverse(RestRows), Rows), + fold_changes_since_seq({Db, RestRows, nil}). + + +fold_changes_basic_rev({Db, _, _}) -> + Opts = [{dir, rev}], + {ok, Rows} = fabric2_db:fold_changes(Db, 0, fun fold_fun/2, [], Opts), + ?assertEqual([], Rows). + + +fold_changes_since_now_rev({Db, DocRows, _}) -> + Opts = [{dir, rev}], + {ok, Rows} = fabric2_db:fold_changes(Db, now, fun fold_fun/2, [], Opts), + ?assertEqual(DocRows, Rows). + + +fold_changes_since_seq_rev({_, [], _}) -> + ok; + +fold_changes_since_seq_rev({Db, DocRows, _}) -> + #{sequence := Since} = lists:last(DocRows), + Opts = [{dir, rev}], + {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, [], Opts), + ?assertEqual(DocRows, Rows), + RestRows = lists:sublist(DocRows, length(DocRows) - 1), + fold_changes_since_seq_rev({Db, RestRows, nil}). + + +fold_fun(#{} = Change, Acc) -> + {ok, [Change | Acc]}. diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl new file mode 100644 index 000000000..24deeb2dc --- /dev/null +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -0,0 +1,88 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_crud_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +crud_test_() -> + { + "Test database CRUD operations", + { + setup, + fun() -> test_util:start_couch([fabric]) end, + fun test_util:stop_couch/1, + [ + ?TDEF(create_db), + ?TDEF(open_db), + ?TDEF(delete_db), + ?TDEF(list_dbs) + ] + } + }. + + +create_db() -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + ?assertEqual({error, file_exists}, fabric2_db:create(DbName, [])). + + +open_db() -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + % Opening the cached version + ?assertMatch({ok, _}, fabric2_db:open(DbName, [])), + + % Remove from cache and re-open + true = ets:delete(fabric2_server, DbName), + ?assertMatch({ok, _}, fabric2_db:open(DbName, [])). + + +delete_db() -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])). + + +list_dbs() -> + DbName = ?tempdb(), + AllDbs1 = fabric2_db:list_dbs(), + + ?assert(is_list(AllDbs1)), + ?assert(not lists:member(DbName, AllDbs1)), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + AllDbs2 = fabric2_db:list_dbs(), + ?assert(lists:member(DbName, AllDbs2)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + AllDbs3 = fabric2_db:list_dbs(), + ?assert(not lists:member(DbName, AllDbs3)). diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl new file mode 100644 index 000000000..8e6405632 --- /dev/null +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -0,0 +1,113 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_misc_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/1}). + + +misc_test_() -> + { + "Test database miscellaney", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun empty_db_info/1, + fun accessors/1, + fun set_revs_limit/1, + fun set_security/1, + fun is_system_db/1, + fun ensure_full_commit/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + {DbName, Db, Ctx}. + + +cleanup({_DbName, Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +empty_db_info({DbName, Db, _}) -> + {ok, Info} = fabric2_db:get_db_info(Db), + ?assertEqual(DbName, fabric2_util:get_value(db_name, Info)), + ?assertEqual(0, fabric2_util:get_value(doc_count, Info)), + ?assertEqual(0, fabric2_util:get_value(doc_del_count, Info)), + ?assert(is_binary(fabric2_util:get_value(update_seq, Info))). + + +accessors({DbName, Db, _}) -> + SeqZero = fabric2_fdb:vs_to_seq(fabric2_util:seq_zero_vs()), + ?assertEqual(DbName, fabric2_db:name(Db)), + ?assertEqual(0, fabric2_db:get_instance_start_time(Db)), + ?assertEqual(nil, fabric2_db:get_pid(Db)), + ?assertEqual(undefined, fabric2_db:get_before_doc_update_fun(Db)), + ?assertEqual(undefined, fabric2_db:get_after_doc_read_fun(Db)), + ?assertEqual(SeqZero, fabric2_db:get_committed_update_seq(Db)), + ?assertEqual(SeqZero, fabric2_db:get_compacted_seq(Db)), + ?assertEqual(SeqZero, fabric2_db:get_update_seq(Db)), + ?assertEqual(nil, fabric2_db:get_compactor_pid(Db)), + ?assertEqual(1000, fabric2_db:get_revs_limit(Db)), + ?assertMatch(<<_:32/binary>>, fabric2_db:get_uuid(Db)), + ?assertEqual(true, fabric2_db:is_db(Db)), + ?assertEqual(false, fabric2_db:is_db(#{})), + ?assertEqual(false, fabric2_db:is_partitioned(Db)), + ?assertEqual(false, fabric2_db:is_clustered(Db)). + + +set_revs_limit({DbName, Db, _}) -> + ?assertEqual(ok, fabric2_db:set_revs_limit(Db, 500)), + {ok, Db2} = fabric2_db:open(DbName, []), + ?assertEqual(500, fabric2_db:get_revs_limit(Db2)). + + +set_security({DbName, Db, _}) -> + SecObj = {[ + {<<"admins">>, {[ + {<<"names">>, []}, + {<<"roles">>, []} + ]}} + ]}, + ?assertEqual(ok, fabric2_db:set_security(Db, SecObj)), + {ok, Db2} = fabric2_db:open(DbName, []), + ?assertEqual(SecObj, fabric2_db:get_security(Db2)). + + +is_system_db({DbName, Db, _}) -> + ?assertEqual(false, fabric2_db:is_system_db(Db)), + ?assertEqual(false, fabric2_db:is_system_db_name("foo")), + ?assertEqual(false, fabric2_db:is_system_db_name(DbName)), + ?assertEqual(true, fabric2_db:is_system_db_name(<<"_replicator">>)), + ?assertEqual(true, fabric2_db:is_system_db_name("_replicator")), + ?assertEqual(true, fabric2_db:is_system_db_name(<<"foo/_replicator">>)), + ?assertEqual(false, fabric2_db:is_system_db_name(<<"f.o/_replicator">>)), + ?assertEqual(false, fabric2_db:is_system_db_name(<<"foo/bar">>)). + + +ensure_full_commit({_, Db, _}) -> + ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db)), + ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db, 5)). diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl new file mode 100644 index 000000000..979601167 --- /dev/null +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -0,0 +1,162 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_security_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +security_test_() -> + { + "Test database security operations", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun is_admin_name/1, + fun is_not_admin_name/1, + fun is_admin_role/1, + fun is_not_admin_role/1, + fun check_is_admin/1, + fun check_is_not_admin/1, + fun check_is_member_name/1, + fun check_is_not_member_name/1, + fun check_is_member_role/1, + fun check_is_not_member_role/1, + fun check_admin_is_member/1, + fun check_is_member_of_public_db/1, + fun check_set_user_ctx/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + DbName = ?tempdb(), + {ok, Db1} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + SecProps = {[ + {<<"admins">>, {[ + {<<"names">>, [<<"admin_name1">>, <<"admin_name2">>]}, + {<<"roles">>, [<<"admin_role1">>, <<"admin_role2">>]} + ]}}, + {<<"members">>, {[ + {<<"names">>, [<<"member_name1">>, <<"member_name2">>]}, + {<<"roles">>, [<<"member_role1">>, <<"member_role2">>]} + ]}} + ]}, + ok = fabric2_db:set_security(Db1, SecProps), + {ok, Db2} = fabric2_db:open(DbName, []), + {Db2, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +is_admin_name({Db, _}) -> + UserCtx = #user_ctx{name = <<"admin_name1">>}, + ?assertEqual(true, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). + + +is_not_admin_name({Db, _}) -> + UserCtx = #user_ctx{name = <<"member1">>}, + ?assertEqual(false, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). + + +is_admin_role({Db, _}) -> + UserCtx = #user_ctx{roles = [<<"admin_role1">>]}, + ?assertEqual(true, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). + + +is_not_admin_role({Db, _}) -> + UserCtx = #user_ctx{roles = [<<"member_role1">>]}, + ?assertEqual(false, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). + + +check_is_admin({Db, _}) -> + UserCtx = #user_ctx{name = <<"admin_name1">>}, + ?assertEqual(ok, fabric2_db:check_is_admin(Db#{user_ctx := UserCtx})). + + +check_is_not_admin({Db, _}) -> + UserCtx = #user_ctx{name = <<"member_name1">>}, + ?assertThrow( + {unauthorized, <<"You are not a db or server admin.">>}, + fabric2_db:check_is_admin(Db#{user_ctx := #user_ctx{}}) + ), + ?assertThrow( + {forbidden, <<"You are not a db or server admin.">>}, + fabric2_db:check_is_admin(Db#{user_ctx := UserCtx}) + ). + + +check_is_member_name({Db, _}) -> + UserCtx = #user_ctx{name = <<"member_name1">>}, + ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). + + +check_is_not_member_name({Db, _}) -> + UserCtx = #user_ctx{name = <<"foo">>}, + ?assertThrow( + {unauthorized, <<"You are not authorized", _/binary>>}, + fabric2_db:check_is_member(Db#{user_ctx := #user_ctx{}}) + ), + ?assertThrow( + {forbidden, <<"You are not allowed to access", _/binary>>}, + fabric2_db:check_is_member(Db#{user_ctx := UserCtx}) + ). + + +check_is_member_role({Db, _}) -> + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"member_role1">>]}, + ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). + + +check_is_not_member_role({Db, _}) -> + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + ?assertThrow( + {forbidden, <<"You are not allowed to access", _/binary>>}, + fabric2_db:check_is_member(Db#{user_ctx := UserCtx}) + ). + + +check_admin_is_member({Db, _}) -> + UserCtx = #user_ctx{name = <<"admin_name1">>}, + ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). + + +check_is_member_of_public_db({Db, _}) -> + PublicDb = Db#{security_doc := {[]}}, + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + ?assertEqual( + ok, + fabric2_db:check_is_member(PublicDb#{user_ctx := #user_ctx{}}) + ), + ?assertEqual( + ok, + fabric2_db:check_is_member(PublicDb#{user_ctx := UserCtx}) + ). + + +check_set_user_ctx({Db0, _}) -> + DbName = fabric2_db:name(Db0), + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(UserCtx, fabric2_db:get_user_ctx(Db1)). + + diff --git a/src/fabric/test/fabric2_doc_count_tests.erl b/src/fabric/test/fabric2_doc_count_tests.erl new file mode 100644 index 000000000..37d08404d --- /dev/null +++ b/src/fabric/test/fabric2_doc_count_tests.erl @@ -0,0 +1,251 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_count_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(DOC_COUNT, 10). + + +doc_count_test_() -> + { + "Test document counting operations", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun normal_docs/1, + fun design_docs/1, + fun local_docs/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +normal_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Docs1 = lists:map(fun(Id) -> + Doc = #doc{ + id = integer_to_binary(Id), + body = {[{<<"value">>, Id}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + Doc#doc{revs = {RevPos, [Rev]}} + end, lists:seq(1, ?DOC_COUNT)), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount, + LDocCount + ), + + Docs2 = lists:map(fun(Doc) -> + {[{<<"value">>, V}]} = Doc#doc.body, + NewDoc = case V rem 2 of + 0 -> Doc#doc{deleted = true}; + 1 -> Doc + end, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, NewDoc, []), + NewDoc#doc{revs = {RevPos, [Rev]}} + end, Docs1), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT div 2, + DelDocCount + ?DOC_COUNT div 2, + DDocCount, + LDocCount + ), + + lists:map(fun(Doc) -> + case Doc#doc.deleted of + true -> + Undeleted = Doc#doc{ + revs = {0, []}, + deleted = false + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Undeleted, []), + Undeleted#doc{revs = {RevPos, [Rev]}}; + false -> + Doc + end + end, Docs2), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount, + LDocCount + ). + + +design_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Docs1 = lists:map(fun(Id) -> + BinId = integer_to_binary(Id), + DDocId = <>, + Doc = #doc{ + id = DDocId, + body = {[{<<"value">>, Id}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + Doc#doc{revs = {RevPos, [Rev]}} + end, lists:seq(1, ?DOC_COUNT)), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount + ?DOC_COUNT, + LDocCount + ), + + Docs2 = lists:map(fun(Doc) -> + {[{<<"value">>, V}]} = Doc#doc.body, + NewDoc = case V rem 2 of + 0 -> Doc#doc{deleted = true}; + 1 -> Doc + end, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, NewDoc, []), + NewDoc#doc{revs = {RevPos, [Rev]}} + end, Docs1), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT div 2, + DelDocCount + ?DOC_COUNT div 2, + DDocCount + ?DOC_COUNT div 2, + LDocCount + ), + + lists:map(fun(Doc) -> + case Doc#doc.deleted of + true -> + Undeleted = Doc#doc{ + revs = {0, []}, + deleted = false + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Undeleted, []), + Undeleted#doc{revs = {RevPos, [Rev]}}; + false -> + Doc + end + end, Docs2), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount + ?DOC_COUNT, + LDocCount + ). + + +local_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Docs1 = lists:map(fun(Id) -> + BinId = integer_to_binary(Id), + LDocId = <>, + Doc = #doc{ + id = LDocId, + body = {[{<<"value">>, Id}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + Doc#doc{revs = {RevPos, [Rev]}} + end, lists:seq(1, ?DOC_COUNT)), + + check_doc_counts( + Db, + DocCount, + DelDocCount, + DDocCount, + LDocCount + ?DOC_COUNT + ), + + Docs2 = lists:map(fun(Doc) -> + {[{<<"value">>, V}]} = Doc#doc.body, + NewDoc = case V rem 2 of + 0 -> Doc#doc{deleted = true}; + 1 -> Doc + end, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, NewDoc, []), + NewDoc#doc{revs = {RevPos, [Rev]}} + end, Docs1), + + check_doc_counts( + Db, + DocCount, + DelDocCount, + DDocCount, + LDocCount + ?DOC_COUNT div 2 + ), + + lists:map(fun(Doc) -> + case Doc#doc.deleted of + true -> + Undeleted = Doc#doc{ + revs = {0, []}, + deleted = false + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Undeleted, []), + Undeleted#doc{revs = {RevPos, [Rev]}}; + false -> + Doc + end + end, Docs2), + + check_doc_counts( + Db, + DocCount, + DelDocCount, + DDocCount, + LDocCount + ?DOC_COUNT + ). + + +get_doc_counts(Db) -> + DocCount = fabric2_db:get_doc_count(Db), + DelDocCount = fabric2_db:get_del_doc_count(Db), + DDocCount = fabric2_db:get_doc_count(Db, <<"_design">>), + LDocCount = fabric2_db:get_doc_count(Db, <<"_local">>), + {DocCount, DelDocCount, DDocCount, LDocCount}. + + +check_doc_counts(Db, DocCount, DelDocCount, DDocCount, LDocCount) -> + ?assertEqual(DocCount, fabric2_db:get_doc_count(Db)), + ?assertEqual(DelDocCount, fabric2_db:get_del_doc_count(Db)), + ?assertEqual(DocCount, fabric2_db:get_doc_count(Db, <<"_all_docs">>)), + ?assertEqual(DDocCount, fabric2_db:get_doc_count(Db, <<"_design">>)), + ?assertEqual(LDocCount, fabric2_db:get_doc_count(Db, <<"_local">>)). diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl new file mode 100644 index 000000000..85b276679 --- /dev/null +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -0,0 +1,770 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_crud_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun open_missing_doc/1, + fun create_new_doc/1, + fun create_ddoc_basic/1, + fun create_ddoc_requires_admin/1, + fun create_ddoc_requires_validation/1, + fun create_ddoc_requires_compilation/1, + fun update_doc_basic/1, + fun update_ddoc_basic/1, + fun update_doc_replicated/1, + fun update_doc_replicated_add_conflict/1, + fun update_doc_replicated_changes_winner/1, + fun update_doc_replicated_extension/1, + fun update_doc_replicate_existing_rev/1, + fun update_winning_conflict_branch/1, + fun update_non_winning_conflict_branch/1, + fun delete_doc_basic/1, + fun delete_changes_winner/1, + fun recreate_doc_basic/1, + fun conflict_on_create_new_with_rev/1, + fun conflict_on_update_with_no_rev/1, + fun conflict_on_create_as_deleted/1, + fun conflict_on_recreate_as_deleted/1, + fun conflict_on_extend_deleted/1, + fun open_doc_revs_basic/1, + fun open_doc_revs_all/1, + fun open_doc_revs_latest/1, + fun get_missing_revs_basic/1, + fun get_missing_revs_on_missing_doc/1, + fun open_missing_local_doc/1, + fun create_local_doc_basic/1, + fun update_local_doc_basic/1, + fun delete_local_doc_basic/1, + fun recreate_local_doc/1, + fun create_local_doc_bad_rev/1, + fun create_local_doc_random_rev/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +open_missing_doc({Db, _}) -> + ?assertEqual({not_found, missing}, fabric2_db:open_doc(Db, <<"foo">>)). + + +create_new_doc({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc), + NewDoc = Doc#doc{revs = {RevPos, [Rev]}}, + ?assertEqual({ok, NewDoc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +create_ddoc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc), + NewDoc = Doc#doc{revs = {RevPos, [Rev]}}, + ?assertEqual({ok, NewDoc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +create_ddoc_requires_admin({Db, _}) -> + Db2 = fabric2_db:set_user_ctx(Db, #user_ctx{}), + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[{<<"foo">>, <<"bar">>}]} + }, + ?assertThrow({unauthorized, _}, fabric2_db:update_doc(Db2, Doc)). + + +create_ddoc_requires_validation({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[ + {<<"views">>, {[ + {<<"foo">>, {[ + {<<"map">>, <<"function(doc) {}">>}, + {<<"reduce">>, <<"_not_a_builtin_reduce">>} + ]}} + ]}} + ]} + }, + ?assertThrow( + {bad_request, invalid_design_doc, _}, + fabric2_db:update_doc(Db, Doc) + ). + + +create_ddoc_requires_compilation({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[ + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"foo">>, {[ + {<<"map">>, <<"Hopefully this is invalid JavaScript">>} + ]}} + ]}} + ]} + }, + ?assertThrow( + {bad_request, compilation_error, _}, + fabric2_db:update_doc(Db, Doc) + ). + + +update_doc_basic({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{ + revs = {Pos2, [Rev2, Rev1]} + }, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_ddoc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc1 = #doc{ + id = DDocId, + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{ + revs = {Pos2, [Rev2, Rev1]} + }, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_doc_replicated({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {2, [fabric2_util:uuid(), fabric2_util:uuid()]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc, [replicated_changes]), + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +update_doc_replicated_add_conflict({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc1#doc.id)), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_doc_replicated_changes_winner({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc1#doc.id)), + Doc2 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + ?assertEqual({ok, Doc2}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_doc_replicated_extension({Db, _}) -> + % No sort necessary and avoided on purpose to + % demonstrate that this is not sort dependent + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Rev3 = fabric2_util:uuid(), + Rev4 = fabric2_util:uuid(), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {4, [Rev4, Rev3, Rev2]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {4, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + {ok, Doc3} = fabric2_db:open_doc(Db, Doc2#doc.id), + ?assertEqual({4, [Rev4, Rev3, Rev2, Rev1]}, Doc3#doc.revs), + ?assertEqual(Doc2#doc{revs = undefined}, Doc3#doc{revs = undefined}). + + +update_doc_replicate_existing_rev({Db, _}) -> + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + {ok, []} = fabric2_db:update_docs(Db, [Doc1], [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc1#doc.id)). + + +update_winning_conflict_branch({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev3, Rev1]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +update_non_winning_conflict_branch({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the non winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev2, Rev1]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +delete_doc_basic({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{revs = {Pos2, [Rev2, Rev1]}}, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id, [deleted])). + + +delete_changes_winner({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Delete the winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + deleted = true, + body = {[]} + }, + {ok, {3, _}} = fabric2_db:update_doc(Db, Doc3), + ?assertEqual({ok, Doc2}, fabric2_db:open_doc(Db, Doc3#doc.id)). + + +recreate_doc_basic({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {0, []}, + deleted = false, + body = {[{<<"state">>, 3}]} + }, + {ok, {3, Rev3}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual({3, [Rev3, Rev2, Rev1]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +conflict_on_create_new_with_rev({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {1, [fabric2_util:uuid()]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc)). + + +conflict_on_update_with_no_rev({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {0, []}, + body = {[{<<"state">>, 2}]} + }, + ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc2)). + + +conflict_on_create_as_deleted({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + deleted = true, + body = {[{<<"foo">>, <<"bar">>}]} + }, + ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc)). + + +conflict_on_recreate_as_deleted({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {0, []}, + deleted = true, + body = {[{<<"state">>, 3}]} + }, + ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc3)). + + +conflict_on_extend_deleted({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {Pos2, [Rev2]}, + deleted = false, + body = {[{<<"state">>, 3}]} + }, + ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc3)). + + +open_doc_revs_basic({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + {ok, [{ok, Doc3}]} = fabric2_db:open_doc_revs(Db, DocId, [{2, Rev3}], []), + ?assertEqual(Doc1, Doc3), + + {ok, [{ok, Doc4}]} = fabric2_db:open_doc_revs(Db, DocId, [{2, Rev2}], []), + ?assertEqual(Doc2, Doc4), + + Revs = [{2, Rev3}, {2, Rev2}, {1, Rev1}], + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, Revs, []), + ?assert(length(Docs) == 3), + ?assert(lists:member({ok, Doc1}, Docs)), + ?assert(lists:member({ok, Doc2}, Docs)), + ?assert(lists:member({{not_found, missing}, {1, Rev1}}, Docs)), + + % Make sure crazy madeup revisions are accepted + MissingRevs = [{5, fabric2_util:uuid()}, {1, fabric2_util:uuid()}], + {ok, NFMissing} = fabric2_db:open_doc_revs(Db, DocId, MissingRevs, []), + ?assertEqual(2, length(NFMissing)), + lists:foreach(fun(MR) -> + ?assert(lists:member({{not_found, missing}, MR}, NFMissing)) + end, MissingRevs). + + +open_doc_revs_all({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, all, []), + ?assert(length(Docs) == 2), + ?assert(lists:member({ok, Doc1}, Docs)), + ?assert(lists:member({ok, Doc2}, Docs)). + + +open_doc_revs_latest({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + Opts = [latest], + {ok, [{ok, Doc3}]} = fabric2_db:open_doc_revs(Db, DocId, [{2, Rev3}], Opts), + ?assertEqual(Doc1, Doc3), + + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, [{1, Rev1}], Opts), + ?assert(length(Docs) == 2), + ?assert(lists:member({ok, Doc1}, Docs)), + ?assert(lists:member({ok, Doc2}, Docs)). + + +get_missing_revs_basic({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + % Check that we can find all revisions + AllRevs = [{1, Rev1}, {2, Rev2}, {2, Rev3}], + ?assertEqual( + {ok, []}, + fabric2_db:get_missing_revs(Db, [{DocId, AllRevs}]) + ), + + % Check that a missing revision is found with no possible ancestors + MissingRev = {2, fabric2_util:uuid()}, + ?assertEqual( + {ok, [{DocId, [MissingRev], []}]}, + fabric2_db:get_missing_revs(Db, [{DocId, [MissingRev]}]) + ), + + % Check that only a missing rev is returned + ?assertEqual( + {ok, [{DocId, [MissingRev], []}]}, + fabric2_db:get_missing_revs(Db, [{DocId, [MissingRev | AllRevs]}]) + ), + + % Check that we can find possible ancestors + MissingWithAncestors = {4, fabric2_util:uuid()}, + PossibleAncestors = [{2, Rev2}, {2, Rev3}], + ?assertEqual( + {ok, [{DocId, [MissingWithAncestors], PossibleAncestors}]}, + fabric2_db:get_missing_revs(Db, [{DocId, [MissingWithAncestors]}]) + ). + + +get_missing_revs_on_missing_doc({Db, _}) -> + Revs = lists:sort([ + couch_doc:rev_to_str({1, fabric2_util:uuid()}), + couch_doc:rev_to_str({2, fabric2_util:uuid()}), + couch_doc:rev_to_str({800, fabric2_util:uuid()}) + ]), + DocId = fabric2_util:uuid(), + {ok, Resp} = fabric2_db:get_missing_revs(Db, [{DocId, Revs}]), + ?assertMatch([{DocId, [_ | _], []}], Resp), + [{DocId, Missing, _}] = Resp, + MissingStrs = [couch_doc:rev_to_str(Rev) || Rev <- Missing], + ?assertEqual(Revs, lists:sort(MissingStrs)). + + +open_missing_local_doc({Db, _}) -> + ?assertEqual( + {not_found, missing}, + fabric2_db:open_doc(Db, <<"_local/foo">>, []) + ). + + +create_local_doc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc2} = fabric2_db:open_doc(Db, Doc1#doc.id, []), + ?assertEqual(Doc1#doc{revs = {0, [<<"1">>]}}, Doc2). + + +update_local_doc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + Doc2 = Doc1#doc{ + revs = {0, [<<"1">>]}, + body = {[{<<"whiz">>, <<"bang">>}]} + }, + ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc2)), + {ok, Doc3} = fabric2_db:open_doc(Db, Doc1#doc.id, []), + ?assertEqual(Doc2#doc{revs = {0, [<<"2">>]}}, Doc3). + + +delete_local_doc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + Doc2 = Doc1#doc{ + revs = {0, [<<"1">>]}, + deleted = true, + body = {[]} + }, + ?assertEqual({ok, {0, <<"0">>}}, fabric2_db:update_doc(Db, Doc2)), + ?assertEqual( + {not_found, missing}, + fabric2_db:open_doc(Db, LDocId) + ). + + +recreate_local_doc({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + Doc2 = Doc1#doc{ + revs = {0, [<<"1">>]}, + deleted = true, + body = {[]} + }, + ?assertEqual({ok, {0, <<"0">>}}, fabric2_db:update_doc(Db, Doc2)), + ?assertEqual( + {not_found, missing}, + fabric2_db:open_doc(Db, LDocId) + ), + + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc3} = fabric2_db:open_doc(Db, LDocId), + ?assertEqual(Doc1#doc{revs = {0, [<<"1">>]}}, Doc3). + + +create_local_doc_bad_rev({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <>, + Doc1 = #doc{ + id = LDocId, + revs = {0, [<<"not a number">>]} + }, + ?assertThrow( + {error, <<"Invalid rev format">>}, + fabric2_db:update_doc(Db, Doc1) + ), + + Doc2 = Doc1#doc{ + revs = bad_bad_rev_roy_brown + }, + ?assertThrow( + {error, <<"Invalid rev format">>}, + fabric2_db:update_doc(Db, Doc2) + ). + + +create_local_doc_random_rev({Db, _}) -> + % Local docs don't care what rev is passed as long + % as long as its a number. + UUID = fabric2_util:uuid(), + LDocId = <>, + Doc1 = #doc{ + id = LDocId, + revs = {0, [<<"42">>]}, + body = {[{<<"state">>, 1}]} + }, + ?assertEqual({ok, {0, <<"43">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc2} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc1#doc{revs = {0, [<<"43">>]}}, Doc2), + + Doc3 = Doc1#doc{ + revs = {0, [<<"1234567890">>]}, + body = {[{<<"state">>, 2}]} + }, + ?assertEqual({ok, {0, <<"1234567891">>}}, fabric2_db:update_doc(Db, Doc3)), + {ok, Doc4} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc3#doc{revs = {0, [<<"1234567891">>]}}, Doc4), + + Doc5 = Doc1#doc{ + revs = {0, [<<"1">>]}, + body = {[{<<"state">>, 3}]} + }, + ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc5)), + {ok, Doc6} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc5#doc{revs = {0, [<<"2">>]}}, Doc6). diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl new file mode 100644 index 000000000..caa5f925a --- /dev/null +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -0,0 +1,209 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_fold_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(DOC_COUNT, 50). + + +doc_fold_test_() -> + { + "Test document fold operations", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun fold_docs_basic/1, + fun fold_docs_rev/1, + fun fold_docs_with_start_key/1, + fun fold_docs_with_end_key/1, + fun fold_docs_with_both_keys_the_same/1, + fun fold_docs_with_different_keys/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DocIdRevs = lists:map(fun(Val) -> + DocId = fabric2_util:uuid(), + Doc = #doc{ + id = DocId, + body = {[{<<"value">>, Val}]} + }, + {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), + {DocId, couch_doc:rev_to_str(Rev)} + end, lists:seq(1, ?DOC_COUNT)), + {Db, lists:sort(DocIdRevs), Ctx}. + + +cleanup({Db, _DocIdRevs, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +fold_docs_basic({Db, DocIdRevs, _}) -> + {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_docs(Db, fun fold_fun/2, []), + ?assertEqual(DocIdRevs, lists:reverse(Rows)). + + +fold_docs_rev({Db, DocIdRevs, _}) -> + Opts = [{dir, rev}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, Rows). + + +fold_docs_with_start_key({Db, DocIdRevs, _}) -> + {StartKey, _} = hd(DocIdRevs), + Opts = [{start_key, StartKey}], + {ok, {?DOC_COUNT, Rows}} + = fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, lists:reverse(Rows)), + if length(DocIdRevs) == 1 -> ok; true -> + fold_docs_with_start_key({Db, tl(DocIdRevs), nil}) + end. + + +fold_docs_with_end_key({Db, DocIdRevs, _}) -> + RevDocIdRevs = lists:reverse(DocIdRevs), + {EndKey, _} = hd(RevDocIdRevs), + Opts = [{end_key, EndKey}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(RevDocIdRevs, Rows), + if length(DocIdRevs) == 1 -> ok; true -> + fold_docs_with_end_key({Db, lists:reverse(tl(RevDocIdRevs)), nil}) + end. + + +fold_docs_with_both_keys_the_same({Db, DocIdRevs, _}) -> + lists:foreach(fun({DocId, _} = Row) -> + check_all_combos(Db, DocId, DocId, [Row]) + end, DocIdRevs). + + +fold_docs_with_different_keys({Db, DocIdRevs, _}) -> + lists:foreach(fun(_) -> + {StartKey, EndKey, Rows} = pick_range(DocIdRevs), + check_all_combos(Db, StartKey, EndKey, Rows) + end, lists:seq(1, 500)). + + +check_all_combos(Db, StartKey, EndKey, Rows) -> + Opts1 = make_opts(fwd, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:reverse(Rows), Rows1), + + Opts2 = make_opts(fwd, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = if EndKey == undefined -> lists:reverse(Rows); true -> + lists:reverse(all_but_last(Rows)) + end, + ?assertEqual(Expect2, Rows2), + + Opts3 = make_opts(rev, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows3}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts3), + ?assertEqual(Rows, Rows3), + + Opts4 = make_opts(rev, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows4}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts4), + Expect4 = if StartKey == undefined -> Rows; true -> + tl(Rows) + end, + ?assertEqual(Expect4, Rows4). + + + +make_opts(fwd, StartKey, EndKey, InclusiveEnd) -> + DirOpts = case rand:uniform() =< 0.50 of + true -> [{dir, fwd}]; + false -> [] + end, + StartOpts = case StartKey of + undefined -> []; + <<_/binary>> -> [{start_key, StartKey}] + end, + EndOpts = case EndKey of + undefined -> []; + <<_/binary>> when InclusiveEnd -> [{end_key, EndKey}]; + <<_/binary>> -> [{end_key_gt, EndKey}] + end, + DirOpts ++ StartOpts ++ EndOpts; +make_opts(rev, StartKey, EndKey, InclusiveEnd) -> + BaseOpts = make_opts(fwd, EndKey, StartKey, InclusiveEnd), + [{dir, rev}] ++ BaseOpts -- [{dir, fwd}]. + + +all_but_last([]) -> + []; +all_but_last([_]) -> + []; +all_but_last(Rows) -> + lists:sublist(Rows, length(Rows) - 1). + + +pick_range(DocIdRevs) -> + {StartKey, StartRow, RestRows} = pick_start_key(DocIdRevs), + {EndKey, EndRow, RowsBetween} = pick_end_key(RestRows), + {StartKey, EndKey, StartRow ++ RowsBetween ++ EndRow}. + + +pick_start_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + {DocId, [Row], lists:nthtail(Idx, Rows)} + end. + + +pick_end_key([]) -> + {undefined, [], []}; + +pick_end_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + Tail = lists:nthtail(Idx, Rows), + {DocId, [Row], Rows -- [Row | Tail]} + end. + + +fold_fun({meta, Meta}, _Acc) -> + Total = fabric2_util:get_value(total, Meta), + {ok, {Total, []}}; +fold_fun({row, Row}, {Total, Rows}) -> + RowId = fabric2_util:get_value(id, Row), + RowId = fabric2_util:get_value(key, Row), + RowRev = fabric2_util:get_value(value, Row), + {ok, {Total, [{RowId, RowRev} | Rows]}}; +fold_fun(complete, Acc) -> + {ok, Acc}. diff --git a/src/fabric/test/fabric2_fdb_tx_retry_tests.erl b/src/fabric/test/fabric2_fdb_tx_retry_tests.erl new file mode 100644 index 000000000..c924ce52a --- /dev/null +++ b/src/fabric/test/fabric2_fdb_tx_retry_tests.erl @@ -0,0 +1,178 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_fdb_tx_retry_tests). + + +-include_lib("eunit/include/eunit.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +meck_setup() -> + meck:new(erlfdb), + meck:new(fabric2_txids), + EnvSt = case application:get_env(fabric, db) of + {ok, Db} -> {ok, Db}; + undefined -> undefined + end, + application:set_env(fabric, db, not_a_real_db), + EnvSt. + + +meck_cleanup(EnvSt) -> + case EnvSt of + {ok, Db} -> application:set_env(fabric, db, Db); + undefined -> application:unset_env(fabric, db) + end, + meck:unload(). + + +retry_test_() -> + { + foreach, + fun meck_setup/0, + fun meck_cleanup/1, + [ + ?TDEF(read_only_no_retry), + ?TDEF(read_only_commit_unknown_result), + ?TDEF(run_on_first_try), + ?TDEF(retry_when_commit_conflict), + ?TDEF(retry_when_txid_not_found), + ?TDEF(no_retry_when_txid_found) + ] + }. + + +read_only_no_retry() -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 0 end), + meck:expect(erlfdb, get, fun(_, _) -> foo end), + meck:expect(erlfdb, is_read_only, fun(_) -> true end), + meck:expect(fabric2_txids, remove, fun(undefined) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(foo, erlfdb:get(Tx, bar)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +read_only_commit_unknown_result() -> + % Not 100% certain that this would ever actually + % happen in the wild but might as well test that + % we don't blow up if it does. + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1021 end), + meck:expect(erlfdb, get, fun(_, _) -> foo end), + meck:expect(erlfdb, is_read_only, fun(_) -> true end), + meck:expect(fabric2_txids, remove, fun(undefined) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(foo, erlfdb:get(Tx, bar)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +run_on_first_try() -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> undefined end), + meck:expect(erlfdb, clear, fun(_, _) -> ok end), + meck:expect(erlfdb, is_read_only, fun(_) -> false end), + meck:expect(fabric2_txids, create, fun(_, _) -> <<"a txid">> end), + meck:expect(erlfdb, set, fun(_, <<"a txid">>, <<>>) -> ok end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, erlfdb:clear(Tx, bang)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +retry_when_commit_conflict() -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1020 end), + meck:expect(erlfdb, clear, fun(_, _) -> ok end), + meck:expect(erlfdb, is_read_only, fun(_) -> false end), + meck:expect(fabric2_txids, create, fun(_, _) -> <<"a txid">> end), + meck:expect(erlfdb, set, fun(_, <<"a txid">>, <<>>) -> ok end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, erlfdb:clear(Tx, <<"foo">>)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +retry_when_txid_not_found() -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1021 end), + meck:expect(erlfdb, get, fun(_, <<"a txid">>) -> future end), + meck:expect(erlfdb, wait, fun(future) -> not_found end), + meck:expect(erlfdb, clear, fun(_, _) -> ok end), + meck:expect(erlfdb, is_read_only, fun(_) -> false end), + meck:expect(erlfdb, set, fun(_, <<"a txid">>, <<>>) -> ok end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + put('$fabric_tx_id', <<"a txid">>), + put('$fabric_tx_result', not_the_correct_result), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, erlfdb:clear(Tx, <<"foo">>)), + yay_not_skipped + end), + + ?assertEqual(yay_not_skipped, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +no_retry_when_txid_found() -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1021 end), + meck:expect(erlfdb, get, fun(_, <<"a txid">>) -> future end), + meck:expect(erlfdb, wait, fun(future) -> <<>> end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + put('$fabric_tx_id', <<"a txid">>), + put('$fabric_tx_result', did_not_run), + + Result = fabric2_fdb:transactional(fun(_Tx) -> + ?assert(false), + did_run + end), + + ?assertEqual(did_not_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). \ No newline at end of file diff --git a/src/fabric/test/fabric2_trace_db_create_tests.erl b/src/fabric/test/fabric2_trace_db_create_tests.erl new file mode 100644 index 000000000..09cc86375 --- /dev/null +++ b/src/fabric/test/fabric2_trace_db_create_tests.erl @@ -0,0 +1,46 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_db_create_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +trace_test_() -> + { + "Trace operation", + { + setup, + fun setup/0, + fun cleanup/1, + [ + fun create_db/0 + ] + } + }. + + +setup() -> + put(erlfdb_trace, "starting fabric"), + test_util:start_couch([fabric]). + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +create_db() -> + put(erlfdb_trace, <<"create db">>), + {ok, _Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]). diff --git a/src/fabric/test/fabric2_trace_db_delete_tests.erl b/src/fabric/test/fabric2_trace_db_delete_tests.erl new file mode 100644 index 000000000..ddbb2c858 --- /dev/null +++ b/src/fabric/test/fabric2_trace_db_delete_tests.erl @@ -0,0 +1,49 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_db_delete_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +trace_test_() -> + { + "Trace operation", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun delete_db/1 + ]} + } + }. + + +setup() -> + put(erlfdb_trace, "starting fabric"), + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({_Db, Ctx}) -> + test_util:stop_couch(Ctx). + + +delete_db({Db, _}) -> + put(erlfdb_trace, <<"delete db">>), + fabric2_server:remove(fabric2_db:name(Db)), + ok = fabric2_db:delete(fabric2_db:name(Db), []). diff --git a/src/fabric/test/fabric2_trace_db_open_tests.erl b/src/fabric/test/fabric2_trace_db_open_tests.erl new file mode 100644 index 000000000..71e33019e --- /dev/null +++ b/src/fabric/test/fabric2_trace_db_open_tests.erl @@ -0,0 +1,50 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_db_open_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +trace_test_() -> + { + "Trace operation", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun open_db/1 + ]} + } + }. + + +setup() -> + put(erlfdb_trace, "starting fabric"), + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +open_db({Db, _}) -> + put(erlfdb_trace, <<"open db">>), + fabric2_server:remove(fabric2_db:name(Db)), + {ok, _Db} = fabric2_db:open(fabric2_db:name(Db), [{user_ctx, ?ADMIN_USER}]). diff --git a/src/fabric/test/fabric2_trace_doc_create_tests.erl b/src/fabric/test/fabric2_trace_doc_create_tests.erl new file mode 100644 index 000000000..1e0b47cec --- /dev/null +++ b/src/fabric/test/fabric2_trace_doc_create_tests.erl @@ -0,0 +1,86 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_doc_create_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun create_new_doc/1, + fun create_two_docs/1, + fun create_50_docs/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +create_new_doc({Db, _}) -> + put(erlfdb_trace, <<"one doc">>), + Doc = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc). + + +create_two_docs({Db, _}) -> + put(erlfdb_trace, <<"two docs">>), + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"bam">>, <<"baz">>}]} + }, + Doc2 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"bang">>, <<"bargle">>}]} + }, + {ok, _} = fabric2_db:update_docs(Db, [Doc1, Doc2]). + + +create_50_docs({Db, _}) -> + lists:foreach(fun(_) -> + spawn_monitor(fun() -> + Name = io_lib:format("50 docs : ~w", [self()]), + put(erlfdb_trace, iolist_to_binary(Name)), + Docs = lists:map(fun(Val) -> + #doc{ + id = fabric2_util:uuid(), + body = {[{<<"value">>, Val}]} + } + end, lists:seq(1, 50)), + {ok, _} = fabric2_db:update_docs(Db, Docs) + end) + end, lists:seq(1, 5)), + lists:foreach(fun(_) -> + receive {'DOWN', _, _, _, _} -> ok end + end, lists:seq(1, 5)). -- cgit v1.2.1 From 14984e85b414306f1d6190c6c917dc7dd75ea132 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Jun 2019 13:36:02 -0500 Subject: Update ddoc_cache to use fabric2 --- src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl | 2 +- src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl b/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl index 5248469fb..7c3dc6787 100644 --- a/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl +++ b/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl @@ -33,7 +33,7 @@ ddocid({_, DDocId}) -> recover({DbName, DDocId}) -> - fabric:open_doc(DbName, DDocId, [ejson_body, ?ADMIN_CTX]). + fabric2_db:open_doc(DbName, DDocId, [ejson_body, ?ADMIN_CTX]). insert({DbName, DDocId}, {ok, #doc{revs = Revs} = DDoc}) -> diff --git a/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl b/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl index 868fa7789..38445af96 100644 --- a/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl +++ b/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl @@ -34,7 +34,7 @@ ddocid({_, DDocId, _}) -> recover({DbName, DDocId, Rev}) -> Opts = [ejson_body, ?ADMIN_CTX], - {ok, [Resp]} = fabric:open_revs(DbName, DDocId, [Rev], Opts), + {ok, [Resp]} = fabric2_db:open_doc_revs(DbName, DDocId, [Rev], Opts), Resp. -- cgit v1.2.1 From 3b8baaf2353b48b0ce0d6649b8a7b0886dde732e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Jun 2019 13:43:20 -0500 Subject: Start switching chttpd HTTP endpoints to fabric2 This is not an exhaustive port of the entire chttpd API. However, this is enough to support basic CRUD operations far enough that replication works. --- src/chttpd/src/chttpd.erl | 11 +- src/chttpd/src/chttpd_auth_request.erl | 7 +- src/chttpd/src/chttpd_changes.erl | 755 +++++++++++++++++++++++++++++++++ src/chttpd/src/chttpd_db.erl | 328 ++++++-------- src/chttpd/src/chttpd_external.erl | 35 +- src/chttpd/src/chttpd_misc.erl | 62 ++- src/chttpd/src/chttpd_show.erl | 5 +- src/couch_mrview/src/couch_mrview.erl | 16 +- test/elixir/test/basics_test.exs | 2 +- 9 files changed, 965 insertions(+), 256 deletions(-) create mode 100644 src/chttpd/src/chttpd_changes.erl diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index adde0730f..868937f6d 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -25,7 +25,7 @@ error_info/1, parse_form/1, json_body/1, json_body_obj/1, body/1, doc_etag/1, make_etag/1, etag_respond/3, etag_match/2, partition/1, serve_file/3, serve_file/4, - server_header/0, start_chunked_response/3,send_chunk/2, + server_header/0, start_chunked_response/3,send_chunk/2,last_chunk/1, start_response_length/4, send/2, start_json_response/2, start_json_response/3, end_json_response/1, send_response/4, send_response_no_cors/4, @@ -745,7 +745,14 @@ start_chunked_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers0) -> send_chunk({remote, _Pid, _Ref} = Resp, Data) -> couch_httpd:send_chunk(Resp, Data); send_chunk(Resp, Data) -> - Resp:write_chunk(Data), + case iolist_size(Data) of + 0 -> ok; % do nothing + _ -> Resp:write_chunk(Data) + end, + {ok, Resp}. + +last_chunk(Resp) -> + Resp:write_chunk([]), {ok, Resp}. send_response(Req, Code, Headers0, Body) -> diff --git a/src/chttpd/src/chttpd_auth_request.erl b/src/chttpd/src/chttpd_auth_request.erl index 8040f91fd..4a9b4e9e6 100644 --- a/src/chttpd/src/chttpd_auth_request.erl +++ b/src/chttpd/src/chttpd_auth_request.erl @@ -107,7 +107,8 @@ server_authorization_check(#httpd{path_parts=[<<"_", _/binary>>|_]}=Req) -> require_admin(Req). db_authorization_check(#httpd{path_parts=[DbName|_],user_ctx=Ctx}=Req) -> - {_} = fabric:get_security(DbName, [{user_ctx, Ctx}]), + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, Ctx}]), + fabric2_db:check_is_member(Db), Req. @@ -125,8 +126,8 @@ require_admin(Req) -> Req. require_db_admin(#httpd{path_parts=[DbName|_],user_ctx=Ctx}=Req) -> - Sec = fabric:get_security(DbName, [{user_ctx, Ctx}]), - + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, Ctx}]), + Sec = fabric2_db:get_security(Db), case is_db_admin(Ctx,Sec) of true -> Req; false -> throw({unauthorized, <<"You are not a server or db admin.">>}) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl new file mode 100644 index 000000000..39e18d4f9 --- /dev/null +++ b/src/chttpd/src/chttpd_changes.erl @@ -0,0 +1,755 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_changes). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +-export([ + handle_db_changes/3, + get_changes_timeout/2, + wait_updated/3, + get_rest_updated/1, + configure_filter/4, + filter/3, + handle_db_event/3, + handle_view_event/3, + send_changes_doc_ids/6, + send_changes_design_docs/6 +]). + +-export([changes_enumerator/2]). + +%% export so we can use fully qualified call to facilitate hot-code upgrade +-export([ + keep_sending_changes/3 +]). + +-record(changes_acc, { + db, + seq, + prepend, + filter, + callback, + user_acc, + resp_type, + limit, + include_docs, + doc_options, + conflicts, + timeout, + timeout_fun, + aggregation_kvs, + aggregation_results +}). + +handle_db_changes(Args0, Req, Db0) -> + #changes_args{ + style = Style, + filter = FilterName, + feed = Feed, + dir = Dir, + since = Since + } = Args0, + Filter = configure_filter(FilterName, Style, Req, Db0), + Args = Args0#changes_args{filter_fun = Filter}, + DbName = fabric2_db:name(Db0), + StartListenerFun = fun() -> + fabric2_events:link_listener( + ?MODULE, handle_db_event, self(), [{dbname, DbName}] + ) + end, + Start = fun() -> + StartSeq = case Dir of + rev -> + fabric2_fdb:get_update_seq(Db); + fwd -> + Since + end, + {Db0, StartSeq} + end, + % begin timer to deal with heartbeat when filter function fails + case Args#changes_args.heartbeat of + undefined -> + erlang:erase(last_changes_heartbeat); + Val when is_integer(Val); Val =:= true -> + put(last_changes_heartbeat, os:timestamp()) + end, + + case lists:member(Feed, ["continuous", "longpoll", "eventsource"]) of + true -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + {ok, Listener} = StartListenerFun(), + + {Db, StartSeq} = Start(), + UserAcc2 = start_sending_changes(Callback, UserAcc), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + Acc0 = build_acc(Args, Callback, UserAcc2, Db, StartSeq, + <<"">>, Timeout, TimeoutFun), + try + keep_sending_changes( + Args#changes_args{dir=fwd}, + Acc0, + true) + after + fabric2_events:stop_listener(Listener), + get_rest_updated(ok) % clean out any remaining update messages + end + end; + false -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + UserAcc2 = start_sending_changes(Callback, UserAcc), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + {Db, StartSeq} = Start(), + Acc0 = build_acc(Args#changes_args{feed="normal"}, Callback, + UserAcc2, Db, StartSeq, <<>>, + Timeout, TimeoutFun), + {ok, #changes_acc{seq = LastSeq, user_acc = UserAcc3}} = + send_changes( + Acc0, + Dir, + true), + end_sending_changes(Callback, UserAcc3, LastSeq) + end + end. + + +handle_db_event(_DbName, updated, Parent) -> + Parent ! updated, + {ok, Parent}; +handle_db_event(_DbName, deleted, Parent) -> + Parent ! deleted, + {ok, Parent}; +handle_db_event(_DbName, _Event, Parent) -> + {ok, Parent}. + + +handle_view_event(_DbName, Msg, {Parent, DDocId}) -> + case Msg of + {index_commit, DDocId} -> + Parent ! updated; + {index_delete, DDocId} -> + Parent ! deleted; + _ -> + ok + end, + {ok, {Parent, DDocId}}. + +get_callback_acc({Callback, _UserAcc} = Pair) when is_function(Callback, 2) -> + Pair; +get_callback_acc(Callback) when is_function(Callback, 1) -> + {fun(Ev, _) -> Callback(Ev) end, ok}. + + +configure_filter("_doc_ids", Style, Req, _Db) -> + {doc_ids, Style, get_doc_ids(Req)}; +configure_filter("_selector", Style, Req, _Db) -> + {selector, Style, get_selector_and_fields(Req)}; +configure_filter("_design", Style, _Req, _Db) -> + {design_docs, Style}; +configure_filter("_view", Style, Req, Db) -> + ViewName = get_view_qs(Req), + if ViewName /= "" -> ok; true -> + throw({bad_request, "`view` filter parameter is not provided."}) + end, + ViewNameParts = string:tokens(ViewName, "/"), + case [?l2b(couch_httpd:unquote(Part)) || Part <- ViewNameParts] of + [DName, VName] -> + {ok, DDoc} = open_ddoc(Db, <<"_design/", DName/binary>>), + check_member_exists(DDoc, [<<"views">>, VName]), + case couch_db:is_clustered(Db) of + true -> + DIR = fabric_util:doc_id_and_rev(DDoc), + {fetch, view, Style, DIR, VName}; + false -> + {view, Style, DDoc, VName} + end; + [] -> + Msg = "`view` must be of the form `designname/viewname`", + throw({bad_request, Msg}) + end; +configure_filter([$_ | _], _Style, _Req, _Db) -> + throw({bad_request, "unknown builtin filter name"}); +configure_filter("", main_only, _Req, _Db) -> + {default, main_only}; +configure_filter("", all_docs, _Req, _Db) -> + {default, all_docs}; +configure_filter(FilterName, Style, Req, Db) -> + FilterNameParts = string:tokens(FilterName, "/"), + case [?l2b(couch_httpd:unquote(Part)) || Part <- FilterNameParts] of + [DName, FName] -> + {ok, DDoc} = open_ddoc(Db, <<"_design/", DName/binary>>), + check_member_exists(DDoc, [<<"filters">>, FName]), + {custom, Style, Req, DDoc, FName}; + [] -> + {default, Style}; + _Else -> + Msg = "`filter` must be of the form `designname/filtername`", + throw({bad_request, Msg}) + end. + + +filter(Db, Change, {default, Style}) -> + apply_style(Db, Change, Style); +filter(Db, Change, {doc_ids, Style, DocIds}) -> + case lists:member(maps:get(id, Change), DocIds) of + true -> + apply_style(Db, Change, Style); + false -> + [] + end; +filter(Db, Change, {selector, Style, {Selector, _Fields}}) -> + Docs = open_revs(Db, Change, Style), + Passes = [mango_selector:match(Selector, couch_doc:to_json_obj(Doc, [])) + || Doc <- Docs], + filter_revs(Passes, Docs); +filter(Db, Change, {design_docs, Style}) -> + case maps:get(id, Change) of + <<"_design", _/binary>> -> + apply_style(Db, Change, Style); + _ -> + [] + end; +filter(Db, Change, {view, Style, DDoc, VName}) -> + Docs = open_revs(Db, Change, Style), + {ok, Passes} = couch_query_servers:filter_view(DDoc, VName, Docs), + filter_revs(Passes, Docs); +filter(Db, Change, {custom, Style, Req0, DDoc, FName}) -> + Req = case Req0 of + {json_req, _} -> Req0; + #httpd{} -> {json_req, chttpd_external:json_req_obj(Req0, Db)} + end, + Docs = open_revs(Db, Change, Style), + {ok, Passes} = couch_query_servers:filter_docs(Req, Db, DDoc, FName, Docs), + filter_revs(Passes, Docs); +filter(Db, Change, Filter) -> + erlang:error({filter_error, Db, Change, Filter}). + + +get_view_qs({json_req, {Props}}) -> + {Query} = couch_util:get_value(<<"query">>, Props, {[]}), + binary_to_list(couch_util:get_value(<<"view">>, Query, "")); +get_view_qs(Req) -> + couch_httpd:qs_value(Req, "view", ""). + +get_doc_ids({json_req, {Props}}) -> + check_docids(couch_util:get_value(<<"doc_ids">>, Props)); +get_doc_ids(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + {Props} = couch_httpd:json_body_obj(Req), + check_docids(couch_util:get_value(<<"doc_ids">>, Props)); +get_doc_ids(#httpd{method='GET'}=Req) -> + DocIds = ?JSON_DECODE(couch_httpd:qs_value(Req, "doc_ids", "null")), + check_docids(DocIds); +get_doc_ids(_) -> + throw({bad_request, no_doc_ids_provided}). + + +get_selector_and_fields({json_req, {Props}}) -> + Selector = check_selector(couch_util:get_value(<<"selector">>, Props)), + Fields = check_fields(couch_util:get_value(<<"fields">>, Props, nil)), + {Selector, Fields}; +get_selector_and_fields(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + get_selector_and_fields({json_req, couch_httpd:json_body_obj(Req)}); +get_selector_and_fields(_) -> + throw({bad_request, "Selector must be specified in POST payload"}). + + +check_docids(DocIds) when is_list(DocIds) -> + lists:foreach(fun + (DocId) when not is_binary(DocId) -> + Msg = "`doc_ids` filter parameter is not a list of doc ids.", + throw({bad_request, Msg}); + (_) -> ok + end, DocIds), + DocIds; +check_docids(_) -> + Msg = "`doc_ids` filter parameter is not a list of doc ids.", + throw({bad_request, Msg}). + + +check_selector(Selector={_}) -> + try + mango_selector:normalize(Selector) + catch + {mango_error, Mod, Reason0} -> + {_StatusCode, _Error, Reason} = mango_error:info(Mod, Reason0), + throw({bad_request, Reason}) + end; +check_selector(_Selector) -> + throw({bad_request, "Selector error: expected a JSON object"}). + + +check_fields(nil) -> + nil; +check_fields(Fields) when is_list(Fields) -> + try + {ok, Fields1} = mango_fields:new(Fields), + Fields1 + catch + {mango_error, Mod, Reason0} -> + {_StatusCode, _Error, Reason} = mango_error:info(Mod, Reason0), + throw({bad_request, Reason}) + end; +check_fields(_Fields) -> + throw({bad_request, "Selector error: fields must be JSON array"}). + + +open_ddoc(Db, DDocId) -> + case ddoc_cache:open_doc(Db, DDocId) of + {ok, _} = Resp -> Resp; + Else -> throw(Else) + end. + + +check_member_exists(#doc{body={Props}}, Path) -> + couch_util:get_nested_json_value({Props}, Path). + + +apply_style(_Db, Change, main_only) -> + #{rev_id := RevId} = Change, + [{[{<<"rev">>, couch_doc:rev_to_str(RevId)}]}]; +apply_style(Db, Change, all_docs) -> + % We have to fetch all revs for this row + #{id := DocId} = Change, + {ok, Resps} = fabric2_db:open_doc_revs(Db, DocId, all, [deleted]), + lists:flatmap(fun(Resp) -> + case Resp of + {ok, #doc{revs = {Pos, [Rev | _]}}} -> + [{[{<<"rev">>, couch_doc:rev_to_str({Pos, Rev})}]}]; + _ -> + [] + end + end, Resps); +apply_style(Db, Change, Style) -> + erlang:error({changes_apply_style, Db, Change, Style}). + + +open_revs(Db, Change, Style) -> + #{id := DocId} = Change, + Options = [deleted, conflicts], + try + case Style of + main_only -> + {ok, Doc} = fabric2_db:open_doc(Db, DocId, Options), + [Doc]; + all_docs -> + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, all, Options), + [Doc || {ok, Doc} <- Docs] + end + catch _:_ -> + % We didn't log this before, should we now? + [] + end. + + +filter_revs(Passes, Docs) -> + lists:flatmap(fun + ({true, #doc{revs={RevPos, [RevId | _]}}}) -> + RevStr = couch_doc:rev_to_str({RevPos, RevId}), + Change = {[{<<"rev">>, RevStr}]}, + [Change]; + (_) -> + [] + end, lists:zip(Passes, Docs)). + + +get_changes_timeout(Args, Callback) -> + #changes_args{ + heartbeat = Heartbeat, + timeout = Timeout, + feed = ResponseType + } = Args, + DefaultTimeout = list_to_integer( + config:get("httpd", "changes_timeout", "60000") + ), + case Heartbeat of + undefined -> + case Timeout of + undefined -> + {DefaultTimeout, fun(UserAcc) -> {stop, UserAcc} end}; + infinity -> + {infinity, fun(UserAcc) -> {stop, UserAcc} end}; + _ -> + {lists:min([DefaultTimeout, Timeout]), + fun(UserAcc) -> {stop, UserAcc} end} + end; + true -> + {DefaultTimeout, + fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end}; + _ -> + {lists:min([DefaultTimeout, Heartbeat]), + fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end} + end. + +start_sending_changes(Callback, UserAcc) -> + {_, NewUserAcc} = Callback(start, UserAcc), + NewUserAcc. + +build_acc(Args, Callback, UserAcc, Db, StartSeq, Prepend, Timeout, TimeoutFun) -> + #changes_args{ + include_docs = IncludeDocs, + doc_options = DocOpts, + conflicts = Conflicts, + limit = Limit, + feed = ResponseType, + filter_fun = Filter + } = Args, + #changes_acc{ + db = Db, + seq = StartSeq, + prepend = Prepend, + filter = Filter, + callback = Callback, + user_acc = UserAcc, + resp_type = ResponseType, + limit = Limit, + include_docs = IncludeDocs, + doc_options = DocOpts, + conflicts = Conflicts, + timeout = Timeout, + timeout_fun = TimeoutFun, + aggregation_results=[], + aggregation_kvs=[] + }. + +send_changes(Acc, Dir, FirstRound) -> + #changes_acc{ + db = Db, + seq = StartSeq, + filter = Filter + } = maybe_upgrade_changes_acc(Acc), + DbEnumFun = fun changes_enumerator/2, + case can_optimize(FirstRound, Filter) of + {true, Fun} -> + Fun(Db, StartSeq, Dir, DbEnumFun, Acc, Filter); + _ -> + Opts = [{dir, Dir}], + fabric2_db:fold_changes(Db, StartSeq, DbEnumFun, Acc, Opts) + end. + + +can_optimize(true, {doc_ids, _Style, DocIds}) -> + MaxDocIds = config:get_integer("couchdb", + "changes_doc_ids_optimization_threshold", 100), + if length(DocIds) =< MaxDocIds -> + {true, fun send_changes_doc_ids/6}; + true -> + false + end; +can_optimize(true, {design_docs, _Style}) -> + {true, fun send_changes_design_docs/6}; +can_optimize(_, _) -> + false. + + +send_changes_doc_ids(Db, StartSeq, Dir, Fun, Acc0, {doc_ids, _Style, DocIds}) -> + Results = fabric2_db:get_full_doc_infos(Db, DocIds), + FullInfos = lists:foldl(fun + (#full_doc_info{}=FDI, Acc) -> [FDI | Acc]; + (not_found, Acc) -> Acc + end, [], Results), + send_lookup_changes(FullInfos, StartSeq, Dir, Db, Fun, Acc0). + + +send_changes_design_docs(Db, StartSeq, Dir, Fun, Acc0, {design_docs, _Style}) -> + FoldFun = fun(FDI, Acc) -> {ok, [FDI | Acc]} end, + Opts = [ + include_deleted, + {start_key, <<"_design/">>}, + {end_key_gt, <<"_design0">>} + ], + {ok, FullInfos} = fabric2_db:fold_docs(Db, FoldFun, [], Opts), + send_lookup_changes(FullInfos, StartSeq, Dir, Db, Fun, Acc0). + + +send_lookup_changes(FullDocInfos, StartSeq, Dir, Db, Fun, Acc0) -> + FoldFun = case Dir of + fwd -> fun lists:foldl/3; + rev -> fun lists:foldr/3 + end, + GreaterFun = case Dir of + fwd -> fun(A, B) -> A > B end; + rev -> fun(A, B) -> A =< B end + end, + DocInfos = lists:foldl(fun(FDI, Acc) -> + DI = couch_doc:to_doc_info(FDI), + case GreaterFun(DI#doc_info.high_seq, StartSeq) of + true -> [DI | Acc]; + false -> Acc + end + end, [], FullDocInfos), + SortedDocInfos = lists:keysort(#doc_info.high_seq, DocInfos), + FinalAcc = try + FoldFun(fun(DocInfo, Acc) -> + % Kinda gross that we're munging this back to a map + % that will then have to re-read and rebuild the FDI + % for all_docs style. But c'est la vie. + #doc_info{ + id = DocId, + high_seq = Seq, + revs = [#rev_info{rev = Rev, deleted = Deleted} | _] + } = DocInfo, + Change = #{ + id => DocId, + sequence => Seq, + rev_id => Rev, + deleted => Deleted + }, + case Fun(Change, Acc) of + {ok, NewAcc} -> + NewAcc; + {stop, NewAcc} -> + throw({stop, NewAcc}) + end + end, Acc0, SortedDocInfos) + catch + {stop, Acc} -> Acc + end, + case Dir of + fwd -> + FinalAcc0 = case element(1, FinalAcc) of + changes_acc -> % we came here via couch_http or internal call + FinalAcc#changes_acc{seq = fabric2_db:get_update_seq(Db)}; + fabric_changes_acc -> % we came here via chttpd / fabric / rexi + FinalAcc#fabric_changes_acc{seq = couch_db:get_update_seq(Db)} + end, + {ok, FinalAcc0}; + rev -> {ok, FinalAcc} + end. + + +keep_sending_changes(Args, Acc0, FirstRound) -> + #changes_args{ + feed = ResponseType, + limit = Limit, + db_open_options = DbOptions + } = Args, + + {ok, ChangesAcc} = send_changes(Acc0, fwd, FirstRound), + + #changes_acc{ + db = Db, callback = Callback, + timeout = Timeout, timeout_fun = TimeoutFun, seq = EndSeq, + prepend = Prepend2, user_acc = UserAcc2, limit = NewLimit + } = maybe_upgrade_changes_acc(ChangesAcc), + + if Limit > NewLimit, ResponseType == "longpoll" -> + end_sending_changes(Callback, UserAcc2, EndSeq); + true -> + {Go, UserAcc3} = notify_waiting_for_updates(Callback, UserAcc2), + if Go /= ok -> end_sending_changes(Callback, UserAcc3, EndSeq); true -> + case wait_updated(Timeout, TimeoutFun, UserAcc3) of + {updated, UserAcc4} -> + UserCtx = fabric2_db:get_user_ctx(Db), + DbOptions1 = [{user_ctx, UserCtx} | DbOptions], + case fabric2_db:open(fabric2_db:name(Db), DbOptions1) of + {ok, Db2} -> + ?MODULE:keep_sending_changes( + Args#changes_args{limit=NewLimit}, + ChangesAcc#changes_acc{ + db = Db2, + user_acc = UserAcc4, + seq = EndSeq, + prepend = Prepend2, + timeout = Timeout, + timeout_fun = TimeoutFun}, + false); + _Else -> + end_sending_changes(Callback, UserAcc3, EndSeq) + end; + {stop, UserAcc4} -> + end_sending_changes(Callback, UserAcc4, EndSeq) + end + end + end. + +notify_waiting_for_updates(Callback, UserAcc) -> + Callback(waiting_for_updates, UserAcc). + +end_sending_changes(Callback, UserAcc, EndSeq) -> + Callback({stop, EndSeq, null}, UserAcc). + +changes_enumerator(Change, Acc) -> + #changes_acc{ + filter = Filter, + callback = Callback, + user_acc = UserAcc, + limit = Limit, + db = Db, + timeout = Timeout, + timeout_fun = TimeoutFun + } = maybe_upgrade_changes_acc(Acc), + Results0 = filter(Db, Change, Filter), + Results = [Result || Result <- Results0, Result /= null], + Seq = maps:get(sequence, Change), + Go = if (Limit =< 1) andalso Results =/= [] -> stop; true -> ok end, + case Results of + [] -> + {Done, UserAcc2} = maybe_heartbeat(Timeout, TimeoutFun, UserAcc), + case Done of + stop -> + {stop, Acc#changes_acc{seq = Seq, user_acc = UserAcc2}}; + ok -> + {Go, Acc#changes_acc{seq = Seq, user_acc = UserAcc2}} + end; + _ -> + ChangesRow = changes_row(Results, Change, Acc), + {UserGo, UserAcc2} = Callback({change, ChangesRow}, UserAcc), + RealGo = case UserGo of + ok -> Go; + stop -> stop + end, + reset_heartbeat(), + {RealGo, Acc#changes_acc{ + seq = Seq, + user_acc = UserAcc2, + limit = Limit - 1 + }} + end. + + +changes_row(Results, Change, Acc) -> + #{ + id := Id, + sequence := Seq, + deleted := Del + } = Change, + {[ + {<<"seq">>, Seq}, + {<<"id">>, Id}, + {<<"changes">>, Results} + ] ++ deleted_item(Del) ++ maybe_get_changes_doc(Change, Acc)}. + +maybe_get_changes_doc(Value, #changes_acc{include_docs=true}=Acc) -> + #changes_acc{ + db = Db, + doc_options = DocOpts, + conflicts = Conflicts, + filter = Filter + } = Acc, + Opts = case Conflicts of + true -> [deleted, conflicts]; + false -> [deleted] + end, + load_doc(Db, Value, Opts, DocOpts, Filter); + +maybe_get_changes_doc(_Value, _Acc) -> + []. + + +load_doc(Db, Value, Opts, DocOpts, Filter) -> + case load_doc(Db, Value, Opts) of + null -> + [{doc, null}]; + Doc -> + [{doc, doc_to_json(Doc, DocOpts, Filter)}] + end. + + +load_doc(Db, Change, Opts) -> + #{ + id := Id, + rev_id := RevId + } = Change, + case fabric2_db:open_doc_revs(Db, Id, [RevId], Opts) of + {ok, [{ok, Doc}]} -> + Doc; + _ -> + null + end. + + +doc_to_json(Doc, DocOpts, {selector, _Style, {_Selector, Fields}}) + when Fields =/= nil -> + mango_fields:extract(couch_doc:to_json_obj(Doc, DocOpts), Fields); +doc_to_json(Doc, DocOpts, _Filter) -> + couch_doc:to_json_obj(Doc, DocOpts). + + +deleted_item(true) -> [{<<"deleted">>, true}]; +deleted_item(_) -> []. + +% waits for a updated msg, if there are multiple msgs, collects them. +wait_updated(Timeout, TimeoutFun, UserAcc) -> + receive + updated -> + get_rest_updated(UserAcc); + deleted -> + {stop, UserAcc} + after Timeout -> + {Go, UserAcc2} = TimeoutFun(UserAcc), + case Go of + ok -> + ?MODULE:wait_updated(Timeout, TimeoutFun, UserAcc2); + stop -> + {stop, UserAcc2} + end + end. + +get_rest_updated(UserAcc) -> + receive + updated -> + get_rest_updated(UserAcc) + after 0 -> + {updated, UserAcc} + end. + +reset_heartbeat() -> + case get(last_changes_heartbeat) of + undefined -> + ok; + _ -> + put(last_changes_heartbeat, os:timestamp()) + end. + +maybe_heartbeat(Timeout, TimeoutFun, Acc) -> + Before = get(last_changes_heartbeat), + case Before of + undefined -> + {ok, Acc}; + _ -> + Now = os:timestamp(), + case timer:now_diff(Now, Before) div 1000 >= Timeout of + true -> + Acc2 = TimeoutFun(Acc), + put(last_changes_heartbeat, Now), + Acc2; + false -> + {ok, Acc} + end + end. + + +maybe_upgrade_changes_acc(#changes_acc{} = Acc) -> + Acc; +maybe_upgrade_changes_acc(Acc) when tuple_size(Acc) == 19 -> + #changes_acc{ + db = element(2, Acc), + seq = element(6, Acc), + prepend = element(7, Acc), + filter = element(8, Acc), + callback = element(9, Acc), + user_acc = element(10, Acc), + resp_type = element(11, Acc), + limit = element(12, Acc), + include_docs = element(13, Acc), + doc_options = element(14, Acc), + conflicts = element(15, Acc), + timeout = element(16, Acc), + timeout_fun = element(17, Acc), + aggregation_kvs = element(18, Acc), + aggregation_results = element(19, Acc) + }. diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 6a3df6def..fae90375e 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -94,18 +94,13 @@ handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> handle_changes_req1(#httpd{}=Req, Db) -> #changes_args{filter=Raw, style=Style} = Args0 = parse_changes_query(Req), ChangesArgs = Args0#changes_args{ - filter_fun = couch_changes:configure_filter(Raw, Style, Req, Db), - db_open_options = [{user_ctx, couch_db:get_user_ctx(Db)}] + db_open_options = [{user_ctx, fabric2_db:get_user_ctx(Db)}] }, + ChangesFun = chttpd_changes:handle_db_changes(ChangesArgs, Req, Db), Max = chttpd:chunked_response_buffer_size(), case ChangesArgs#changes_args.feed of "normal" -> - T0 = os:timestamp(), - {ok, Info} = fabric:get_db_info(Db), - Suffix = mem3:shard_suffix(Db), - Etag = chttpd:make_etag({Info, Suffix}), - DeltaT = timer:now_diff(os:timestamp(), T0) / 1000, - couch_stats:update_histogram([couchdb, dbinfo], DeltaT), + Etag = <<"foo">>, chttpd:etag_respond(Req, Etag, fun() -> Acc0 = #cacc{ feed = normal, @@ -113,7 +108,7 @@ handle_changes_req1(#httpd{}=Req, Db) -> mochi = Req, threshold = Max }, - fabric:changes(Db, fun changes_callback/2, Acc0, ChangesArgs) + ChangesFun({fun changes_callback/2, Acc0}) end); Feed when Feed =:= "continuous"; Feed =:= "longpoll"; Feed =:= "eventsource" -> couch_stats:increment_counter([couchdb, httpd, clients_requesting_changes]), @@ -123,7 +118,7 @@ handle_changes_req1(#httpd{}=Req, Db) -> threshold = Max }, try - fabric:changes(Db, fun changes_callback/2, Acc0, ChangesArgs) + ChangesFun({fun changes_callback/2, Acc0}) after couch_stats:decrement_counter([couchdb, httpd, clients_requesting_changes]) end; @@ -355,7 +350,7 @@ update_partition_stats(PathParts) -> handle_design_req(#httpd{ path_parts=[_DbName, _Design, Name, <<"_",_/binary>> = Action | _Rest] }=Req, Db) -> - DbName = mem3:dbname(couch_db:name(Db)), + DbName = fabric2_db:name(Db), case ddoc_cache:open(DbName, <<"_design/", Name/binary>>) of {ok, DDoc} -> Handler = chttpd_handlers:design_handler(Action, fun bad_action_req/3), @@ -383,56 +378,33 @@ handle_design_info_req(Req, _Db, _DDoc) -> create_db_req(#httpd{}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), - N = chttpd:qs_value(Req, "n", config:get("cluster", "n", "3")), - Q = chttpd:qs_value(Req, "q", config:get("cluster", "q", "8")), - P = chttpd:qs_value(Req, "placement", config:get("cluster", "placement")), - EngineOpt = parse_engine_opt(Req), - DbProps = parse_partitioned_opt(Req), - Options = [ - {n, N}, - {q, Q}, - {placement, P}, - {props, DbProps} - ] ++ EngineOpt, DocUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), - case fabric:create_db(DbName, Options) of - ok -> - send_json(Req, 201, [{"Location", DocUrl}], {[{ok, true}]}); - accepted -> - send_json(Req, 202, [{"Location", DocUrl}], {[{ok, true}]}); - {error, file_exists} -> - chttpd:send_error(Req, file_exists); - Error -> - throw(Error) + case fabric2_db:create(DbName, []) of + {ok, _} -> + send_json(Req, 201, [{"Location", DocUrl}], {[{ok, true}]}); + {error, file_exists} -> + chttpd:send_error(Req, file_exists); + Error -> + throw(Error) end. delete_db_req(#httpd{}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), - case fabric:delete_db(DbName, []) of - ok -> - send_json(Req, 200, {[{ok, true}]}); - accepted -> - send_json(Req, 202, {[{ok, true}]}); - Error -> - throw(Error) + case fabric2_db:delete(DbName, []) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + Error -> + throw(Error) end. do_db_req(#httpd{path_parts=[DbName|_], user_ctx=Ctx}=Req, Fun) -> - Shard = hd(mem3:shards(DbName)), - Props = couch_util:get_value(props, Shard#shard.opts, []), - Opts = case Ctx of - undefined -> - [{props, Props}]; - #user_ctx{} -> - [{user_ctx, Ctx}, {props, Props}] - end, - {ok, Db} = couch_db:clustered_db(DbName, Opts), + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, Ctx}]), Fun(Req, Db). -db_req(#httpd{method='GET',path_parts=[DbName]}=Req, _Db) -> +db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> % measure the time required to generate the etag, see if it's worth it T0 = os:timestamp(), - {ok, DbInfo} = fabric:get_db_info(DbName), + {ok, DbInfo} = fabric2_db:get_db_info(Db), DeltaT = timer:now_diff(os:timestamp(), T0) / 1000, couch_stats:update_histogram([couchdb, dbinfo], DeltaT), send_json(Req, {DbInfo}); @@ -440,22 +412,22 @@ db_req(#httpd{method='GET',path_parts=[DbName]}=Req, _Db) -> db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx,Ctx}, {w,W}], + Options = [{user_ctx,Ctx}], - Doc = couch_db:doc_from_json_obj_validate(Db, chttpd:json_body(Req)), - Doc2 = case Doc#doc.id of + Doc0 = chttpd:json_body(Req), + Doc1 = couch_doc:from_json_obj_validate(Doc0, fabric2_db:name(Db)), + Doc2 = case Doc1#doc.id of <<"">> -> - Doc#doc{id=couch_uuids:new(), revs={0, []}}; + Doc1#doc{id=couch_uuids:new(), revs={0, []}}; _ -> - Doc + Doc1 end, DocId = Doc2#doc.id, case chttpd:qs_value(Req, "batch") of "ok" -> % async_batching spawn(fun() -> - case catch(fabric:update_doc(Db, Doc2, Options)) of + case catch(fabric2_db:update_doc(Db, Doc2, Options)) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -475,7 +447,7 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> % normal DocUrl = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), - case fabric:update_doc(Db, Doc2, Options) of + case fabric2_db:update_doc(Db, Doc2, Options) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -493,13 +465,10 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> db_req(#httpd{path_parts=[_DbName]}=Req, _Db) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST"); -db_req(#httpd{method='POST', path_parts=[DbName, <<"_ensure_full_commit">>], - user_ctx=Ctx}=Req, _Db) -> +db_req(#httpd{method='POST', path_parts=[_DbName, <<"_ensure_full_commit">>], + user_ctx=Ctx}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - %% use fabric call to trigger a database_does_not_exist exception - %% for missing databases that'd return error 404 from chttpd - %% get_security used to prefer shards on the same node over other nodes - fabric:get_security(DbName, [{user_ctx, Ctx}]), + #{db_prefix := <<_/binary>>} = Db, send_json(Req, 201, {[ {ok, true}, {instance_start_time, <<"0">>} @@ -521,22 +490,17 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, DocsArray0 end, couch_stats:update_histogram([couchdb, httpd, bulk_docs], length(DocsArray)), - W = case couch_util:get_value(<<"w">>, JsonProps) of - Value when is_integer(Value) -> - integer_to_list(Value); - _ -> - chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))) - end, case chttpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> - Options = [full_commit, {user_ctx,Ctx}, {w,W}]; + Options = [full_commit, {user_ctx,Ctx}]; "false" -> - Options = [delay_commit, {user_ctx,Ctx}, {w,W}]; + Options = [delay_commit, {user_ctx,Ctx}]; _ -> - Options = [{user_ctx,Ctx}, {w,W}] + Options = [{user_ctx,Ctx}] end, + DbName = fabric2_db:name(Db), Docs = lists:map(fun(JsonObj) -> - Doc = couch_db:doc_from_json_obj_validate(Db, JsonObj), + Doc = couch_doc:from_json_obj_validate(JsonObj, DbName), validate_attachment_names(Doc), case Doc#doc.id of <<>> -> Doc#doc{id = couch_uuids:new()}; @@ -550,7 +514,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, true -> [all_or_nothing|Options]; _ -> Options end, - case fabric:update_docs(Db, Docs, Options2) of + case fabric2_db:update_docs(Db, Docs, Options2) of {ok, Results} -> % output the results chttpd_stats:incr_writes(length(Results)), @@ -569,7 +533,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, send_json(Req, 417, ErrorsJson) end; false -> - case fabric:update_docs(Db, Docs, [replicated_changes|Options]) of + case fabric2_db:update_docs(Db, Docs, [replicated_changes|Options]) of {ok, Errors} -> chttpd_stats:incr_writes(length(Docs)), ErrorsJson = lists:map(fun update_doc_result_to_json/1, Errors), @@ -665,8 +629,7 @@ db_req(#httpd{path_parts=[_, <<"_bulk_get">>]}=Req, _Db) -> db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> couch_stats:increment_counter([couchdb, httpd, purge_requests]), chttpd:validate_ctype(Req, "application/json"), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx, Req#httpd.user_ctx}, {w, W}], + Options = [{user_ctx, Req#httpd.user_ctx}], {IdsRevs} = chttpd:json_body_obj(Req), IdsRevs2 = [{Id, couch_doc:parse_revs(Revs)} || {Id, Revs} <- IdsRevs], MaxIds = config:get_integer("purge", "max_document_id_number", 100), @@ -741,7 +704,7 @@ db_req(#httpd{path_parts=[_,OP]}=Req, _Db) when ?IS_ALL_DOCS(OP) -> db_req(#httpd{method='POST',path_parts=[_,<<"_missing_revs">>]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), {JsonDocIdRevs} = chttpd:json_body_obj(Req), - case fabric:get_missing_revs(Db, JsonDocIdRevs) of + case fabric2_db:get_missing_revs(Db, JsonDocIdRevs) of {error, Reason} -> chttpd:send_error(Req, Reason); {ok, Results} -> @@ -758,7 +721,7 @@ db_req(#httpd{path_parts=[_,<<"_missing_revs">>]}=Req, _Db) -> db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), {JsonDocIdRevs} = chttpd:json_body_obj(Req), - case fabric:get_missing_revs(Db, JsonDocIdRevs) of + case fabric2_db:get_missing_revs(Db, JsonDocIdRevs) of {error, Reason} -> chttpd:send_error(Req, Reason); {ok, Results} -> @@ -876,22 +839,22 @@ multi_all_docs_view(Req, Db, OP, Queries) -> 200, [], FirstChunk), VAcc1 = VAcc0#vacc{resp=Resp0}, VAcc2 = lists:foldl(fun(Args, Acc0) -> - {ok, Acc1} = fabric:all_docs(Db, Options, + {ok, Acc1} = fabric2_db:fold_docs(Db, Options, fun view_cb/2, Acc0, Args), Acc1 end, VAcc1, ArgQueries), {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). -all_docs_view(Req, Db, Keys, OP) -> - Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), - Args1 = Args0#mrargs{view_type=map}, - Args2 = fabric_util:validate_all_docs_args(Db, Args1), - Args3 = set_namespace(OP, Args2), +all_docs_view(Req, Db, _Keys, _OP) -> + % Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), + % Args1 = Args0#mrargs{view_type=map}, + % Args2 = fabric_util:validate_all_docs_args(Db, Args1), + % Args3 = set_namespace(OP, Args2), Options = [{user_ctx, Req#httpd.user_ctx}], Max = chttpd:chunked_response_buffer_size(), VAcc = #vacc{db=Db, req=Req, threshold=Max}, - {ok, Resp} = fabric:all_docs(Db, Options, fun view_cb/2, VAcc, Args3), + {ok, Resp} = fabric2_db:fold_docs(Db, fun view_cb/2, VAcc, Options), {ok, Resp#vacc.resp}. view_cb({row, Row} = Msg, Acc) -> @@ -935,7 +898,7 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> Doc = couch_doc_open(Db, DocId, Rev, Options2), send_doc(Req, Doc, Options2); _ -> - case fabric:open_revs(Db, DocId, Revs, Options) of + case fabric2_db:open_doc_revs(Db, DocId, Revs, Options) of {ok, []} when Revs == all -> chttpd:send_error(Req, {not_found, missing}); {ok, Results} -> @@ -976,8 +939,7 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> couch_db:validate_docid(Db, DocId), chttpd:validate_ctype(Req, "multipart/form-data"), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx,Ctx}, {w,W}], + Options = [{user_ctx,Ctx}], Form = couch_httpd:parse_form(Req), case proplists:is_defined("_doc", Form) of @@ -986,7 +948,7 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> Doc = couch_doc_from_req(Req, Db, DocId, Json); false -> Rev = couch_doc:parse_rev(list_to_binary(couch_util:get_value("_rev", Form))), - Doc = case fabric:open_revs(Db, DocId, [Rev], []) of + Doc = case fabric2_db:open_doc_revs(Db, DocId, [Rev], []) of {ok, [{ok, Doc0}]} -> chttpd_stats:incr_reads(), Doc0; @@ -1015,7 +977,7 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> NewDoc = Doc#doc{ atts = UpdatedAtts ++ OldAtts2 }, - case fabric:update_doc(Db, NewDoc, Options) of + case fabric2_db:update_doc(Db, NewDoc, Options) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1033,11 +995,10 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> #doc_query_args{ update_type = UpdateType } = parse_doc_query(Req), - DbName = couch_db:name(Db), - couch_db:validate_docid(Db, DocId), + DbName = fabric2_db:name(Db), + couch_doc:validate_docid(DocId), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx,Ctx}, {w,W}], + Options = [{user_ctx, Ctx}], Loc = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), @@ -1045,7 +1006,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of ("multipart/related;" ++ _) = ContentType -> couch_httpd:check_max_request_length(Req), - couch_httpd_multipart:num_mp_writers(mem3:n(mem3:dbname(DbName), DocId)), + couch_httpd_multipart:num_mp_writers(1), {ok, Doc0, WaitFun, Parser} = couch_doc:doc_from_multi_part_stream(ContentType, fun() -> receive_request_data(Req) end), Doc = couch_doc_from_req(Req, Db, DocId, Doc0), @@ -1065,7 +1026,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> Doc = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), spawn(fun() -> - case catch(fabric:update_doc(Db, Doc, Options)) of + case catch(fabric2_db:update_doc(Db, Doc, Options)) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -1099,7 +1060,7 @@ db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> % open old doc Doc = couch_doc_open(Db, SourceDocId, SourceRev, []), % save new doc - case fabric:update_doc(Db, + case fabric2_db:update_doc(Db, Doc#doc{id=TargetDocId, revs=TargetRevs}, [{user_ctx,Ctx}]) of {ok, NewTargetRev} -> chttpd_stats:incr_writes(), @@ -1200,7 +1161,7 @@ send_docs_multipart(Req, Results, Options1) -> CType = {"Content-Type", "multipart/mixed; boundary=\"" ++ ?b2l(OuterBoundary) ++ "\""}, {ok, Resp} = start_chunked_response(Req, 200, [CType]), - couch_httpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), + chttpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), lists:foreach( fun({ok, #doc{atts=Atts}=Doc}) -> Refs = monitor_attachments(Doc#doc.atts), @@ -1208,25 +1169,25 @@ send_docs_multipart(Req, Results, Options1) -> JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, Options)), {ContentType, _Len} = couch_doc:len_doc_to_multi_part_stream( InnerBoundary, JsonBytes, Atts, true), - couch_httpd:send_chunk(Resp, <<"\r\nContent-Type: ", + chttpd:send_chunk(Resp, <<"\r\nContent-Type: ", ContentType/binary, "\r\n\r\n">>), couch_doc:doc_to_multi_part_stream(InnerBoundary, JsonBytes, Atts, - fun(Data) -> couch_httpd:send_chunk(Resp, Data) + fun(Data) -> chttpd:send_chunk(Resp, Data) end, true), - couch_httpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>) + chttpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>) after demonitor_refs(Refs) end; ({{not_found, missing}, RevId}) -> RevStr = couch_doc:rev_to_str(RevId), Json = ?JSON_ENCODE({[{<<"missing">>, RevStr}]}), - couch_httpd:send_chunk(Resp, + chttpd:send_chunk(Resp, [<<"\r\nContent-Type: application/json; error=\"true\"\r\n\r\n">>, Json, <<"\r\n--", OuterBoundary/binary>>]) end, Results), - couch_httpd:send_chunk(Resp, <<"--">>), - couch_httpd:last_chunk(Resp). + chttpd:send_chunk(Resp, <<"--">>), + chttpd:last_chunk(Resp). bulk_get_multipart_headers({0, []}, Id, Boundary) -> [ @@ -1296,15 +1257,14 @@ send_updated_doc(Req, Db, DocId, Doc, Headers) -> send_updated_doc(#httpd{user_ctx=Ctx} = Req, Db, DocId, #doc{deleted=Deleted}=Doc, Headers, UpdateType) -> - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> - [full_commit, UpdateType, {user_ctx,Ctx}, {w,W}]; + [full_commit, UpdateType, {user_ctx,Ctx}]; "false" -> - [delay_commit, UpdateType, {user_ctx,Ctx}, {w,W}]; + [delay_commit, UpdateType, {user_ctx,Ctx}]; _ -> - [UpdateType, {user_ctx,Ctx}, {w,W}] + [UpdateType, {user_ctx,Ctx}] end, {Status, {etag, Etag}, Body} = update_doc(Db, DocId, #doc{deleted=Deleted}=Doc, Options), @@ -1323,31 +1283,7 @@ http_code_from_status(Status) -> end. update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc, Options) -> - {_, Ref} = spawn_monitor(fun() -> - try fabric:update_doc(Db, Doc, Options) of - Resp -> - exit({exit_ok, Resp}) - catch - throw:Reason -> - exit({exit_throw, Reason}); - error:Reason -> - exit({exit_error, Reason}); - exit:Reason -> - exit({exit_exit, Reason}) - end - end), - Result = receive - {'DOWN', Ref, _, _, {exit_ok, Ret}} -> - Ret; - {'DOWN', Ref, _, _, {exit_throw, Reason}} -> - throw(Reason); - {'DOWN', Ref, _, _, {exit_error, Reason}} -> - erlang:error(Reason); - {'DOWN', Ref, _, _, {exit_exit, Reason}} -> - erlang:exit(Reason) - end, - - case Result of + case fabric2_db:update_doc(Db, Doc, Options) of {ok, NewRev} -> Accepted = false; {accepted, NewRev} -> @@ -1394,7 +1330,7 @@ couch_doc_from_req(Req, _Db, DocId, #doc{revs=Revs} = Doc) -> end, Doc#doc{id=DocId, revs=Revs2}; couch_doc_from_req(Req, Db, DocId, Json) -> - Doc = couch_db:doc_from_json_obj_validate(Db, Json), + Doc = couch_doc:from_json_obj_validate(Json, fabric2_db:name(Db)), couch_doc_from_req(Req, Db, DocId, Doc). @@ -1402,11 +1338,10 @@ couch_doc_from_req(Req, Db, DocId, Json) -> % couch_doc_open(Db, DocId) -> % couch_doc_open(Db, DocId, nil, []). -couch_doc_open(Db, DocId, Rev, Options0) -> - Options = [{user_ctx, couch_db:get_user_ctx(Db)} | Options0], +couch_doc_open(Db, DocId, Rev, Options) -> case Rev of nil -> % open most recent rev - case fabric:open_doc(Db, DocId, Options) of + case fabric2_db:open_doc(Db, DocId, Options) of {ok, Doc} -> chttpd_stats:incr_reads(), Doc; @@ -1414,7 +1349,7 @@ couch_doc_open(Db, DocId, Rev, Options0) -> throw(Error) end; _ -> % open a specific rev (deletions come back as stubs) - case fabric:open_revs(Db, DocId, [Rev], Options) of + case fabric2_db:open_doc_revs(Db, DocId, [Rev], Options) of {ok, [{ok, Doc}]} -> chttpd_stats:incr_reads(), Doc; @@ -1535,8 +1470,12 @@ db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNa end; -db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNameParts) +db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) when (Method == 'PUT') or (Method == 'DELETE') -> + #httpd{ + user_ctx = Ctx, + mochi_req = MochiReq + } = Req, FileName = validate_attachment_name( mochiweb_util:join( lists:map(fun binary_to_list/1, @@ -1546,16 +1485,45 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa 'DELETE' -> []; _ -> - MimeType = case couch_httpd:header_value(Req,"Content-Type") of + MimeType = case chttpd:header_value(Req,"Content-Type") of % We could throw an error here or guess by the FileName. % Currently, just giving it a default. undefined -> <<"application/octet-stream">>; CType -> list_to_binary(CType) end, - Data = fabric:att_receiver(Req, chttpd:body_length(Req)), + Data = case chttpd:body_length(Req) of + undefined -> + <<"">>; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + fun(MaxChunkSize, ChunkFun, InitState) -> + chttpd:recv_chunked( + Req, MaxChunkSize, ChunkFun, InitState + ) + end; + 0 -> + <<"">>; + Length when is_integer(Length) -> + Expect = case chttpd:header_value(Req, "expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end, + fun() -> chttpd:recv(Req, 0) end; + Length -> + exit({length_not_integer, Length}) + end, ContentLen = case couch_httpd:header_value(Req,"Content-Length") of undefined -> undefined; - Length -> list_to_integer(Length) + CL -> list_to_integer(CL) end, ContentEnc = string:to_lower(string:strip( couch_httpd:header_value(Req, "Content-Encoding", "identity") @@ -1590,7 +1558,7 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa couch_db:validate_docid(Db, DocId), #doc{id=DocId}; Rev -> - case fabric:open_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of + case fabric2_db:open_doc_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of {ok, [{ok, Doc0}]} -> chttpd_stats:incr_reads(), Doc0; @@ -1605,8 +1573,7 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa DocEdited = Doc#doc{ atts = NewAtt ++ [A || A <- Atts, couch_att:fetch(name, A) /= FileName] }, - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - case fabric:update_doc(Db, DocEdited, [{user_ctx,Ctx}, {w,W}]) of + case fabric2_db:update_doc(Db, DocEdited, [{user_ctx,Ctx}]) of {ok, UpdatedRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1615,7 +1582,7 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa HttpCode = 202 end, erlang:put(mochiweb_request_recv, true), - DbName = couch_db:name(Db), + DbName = fabric2_db:name(Db), {Status, Headers} = case Method of 'DELETE' -> @@ -1702,46 +1669,6 @@ get_md5_header(Req) -> parse_doc_query(Req) -> lists:foldl(fun parse_doc_query/2, #doc_query_args{}, chttpd:qs(Req)). -parse_engine_opt(Req) -> - case chttpd:qs_value(Req, "engine") of - undefined -> - []; - Extension -> - Available = couch_server:get_engine_extensions(), - case lists:member(Extension, Available) of - true -> - [{engine, iolist_to_binary(Extension)}]; - false -> - throw({bad_request, invalid_engine_extension}) - end - end. - - -parse_partitioned_opt(Req) -> - case chttpd:qs_value(Req, "partitioned") of - undefined -> - []; - "false" -> - []; - "true" -> - ok = validate_partitioned_db_enabled(Req), - [ - {partitioned, true}, - {hash, [couch_partition, hash, []]} - ]; - _ -> - throw({bad_request, <<"Invalid `partitioned` parameter">>}) - end. - - -validate_partitioned_db_enabled(Req) -> - case couch_flags:is_enabled(partitioned, Req) of - true -> - ok; - false -> - throw({bad_request, <<"Partitioned feature is not enabled.">>}) - end. - parse_doc_query({Key, Value}, Args) -> case {Key, Value} of @@ -1811,7 +1738,7 @@ parse_changes_query(Req) -> {"descending", "true"} -> Args#changes_args{dir=rev}; {"since", _} -> - Args#changes_args{since=Value}; + Args#changes_args{since=parse_since_seq(Value)}; {"last-event-id", _} -> Args#changes_args{since=Value}; {"limit", _} -> @@ -1872,6 +1799,27 @@ parse_changes_query(Req) -> ChangesArgs end. + +parse_since_seq(Seq) when is_binary(Seq), size(Seq) > 30 -> + throw({bad_request, url_encoded_since_seq}); + +parse_since_seq(Seq) when is_binary(Seq), size(Seq) > 2 -> + % We have implicitly allowed the since seq to either be + % JSON encoded or a "raw" string. Here we just remove the + % surrounding quotes if they exist and are paired. + SeqSize = size(Seq) - 2, + case Seq of + <<"\"", S:SeqSize/binary, "\"">> -> S; + S -> S + end; + +parse_since_seq(Seq) when is_binary(Seq) -> + Seq; + +parse_since_seq(Seq) when is_list(Seq) -> + parse_since_seq(iolist_to_binary(Seq)). + + extract_header_rev(Req, ExplicitRev) when is_binary(ExplicitRev) or is_list(ExplicitRev)-> extract_header_rev(Req, couch_doc:parse_rev(ExplicitRev)); extract_header_rev(Req, ExplicitRev) -> @@ -1921,6 +1869,8 @@ monitor_attachments(Atts) when is_list(Atts) -> case couch_att:fetch(data, Att) of {Fd, _} -> [monitor(process, Fd) | Monitors]; + {loc, _, _, _} -> + Monitors; stub -> Monitors; Else -> @@ -2018,7 +1968,7 @@ bulk_get_open_doc_revs1(Db, Props, Options, {DocId, Revs}) -> bulk_get_open_doc_revs1(Db, Props, Options, {DocId, Revs, Options1}) end; bulk_get_open_doc_revs1(Db, Props, _, {DocId, Revs, Options}) -> - case fabric:open_revs(Db, DocId, Revs, Options) of + case fabric2_db:open_doc_revs(Db, DocId, Revs, Options) of {ok, []} -> RevStr = couch_util:get_value(<<"rev">>, Props), Error = {RevStr, <<"not_found">>, <<"missing">>}, diff --git a/src/chttpd/src/chttpd_external.erl b/src/chttpd/src/chttpd_external.erl index 451d87d2e..7317b7e4b 100644 --- a/src/chttpd/src/chttpd_external.erl +++ b/src/chttpd/src/chttpd_external.erl @@ -38,7 +38,7 @@ json_req_obj_fields() -> <<"peer">>, <<"form">>, <<"cookie">>, <<"userCtx">>, <<"secObj">>]. json_req_obj_field(<<"info">>, #httpd{}, Db, _DocId) -> - {ok, Info} = get_db_info(Db), + {ok, Info} = fabric2_db:get_db_info(Db), {Info}; json_req_obj_field(<<"uuid">>, #httpd{}, _Db, _DocId) -> couch_uuids:new(); @@ -81,27 +81,18 @@ json_req_obj_field(<<"form">>, #httpd{mochi_req=Req, method=Method}=HttpReq, Db, json_req_obj_field(<<"cookie">>, #httpd{mochi_req=Req}, _Db, _DocId) -> to_json_terms(Req:parse_cookie()); json_req_obj_field(<<"userCtx">>, #httpd{}, Db, _DocId) -> - couch_util:json_user_ctx(Db); -json_req_obj_field(<<"secObj">>, #httpd{user_ctx=UserCtx}, Db, _DocId) -> - get_db_security(Db, UserCtx). - - -get_db_info(Db) -> - case couch_db:is_clustered(Db) of - true -> - fabric:get_db_info(Db); - false -> - couch_db:get_db_info(Db) - end. - - -get_db_security(Db, #user_ctx{}) -> - case couch_db:is_clustered(Db) of - true -> - fabric:get_security(Db); - false -> - couch_db:get_security(Db) - end. + json_user_ctx(Db); +json_req_obj_field(<<"secObj">>, #httpd{user_ctx = #user_ctx{}}, Db, _DocId) -> + fabric2_db:get_security(Db). + + +json_user_ctx(Db) -> + Ctx = fabric2_db:get_user_ctx(Db), + {[ + {<<"db">>, fabric2_db:name(Db)}, + {<<"name">>, Ctx#user_ctx.name}, + {<<"roles">>, Ctx#user_ctx.roles} + ]}. to_json_terms(Data) -> diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index ffb5295b5..a39e31cd3 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -113,43 +113,39 @@ maybe_add_csp_headers(Headers, _) -> Headers. handle_all_dbs_req(#httpd{method='GET'}=Req) -> - Args = couch_mrview_http:parse_params(Req, undefined), - ShardDbName = config:get("mem3", "shards_db", "_dbs"), - %% shard_db is not sharded but mem3:shards treats it as an edge case - %% so it can be pushed thru fabric - {ok, Info} = fabric:get_db_info(ShardDbName), - Etag = couch_httpd:make_etag({Info}), - Options = [{user_ctx, Req#httpd.user_ctx}], + % TODO: Support args and options properly, transform + % this back into a fold call similar to the old + % version. + %% Args = couch_mrview_http:parse_params(Req, undefined), + % Eventually the Etag for this request will be derived + % from the \xFFmetadataVersion key in fdb + Etag = <<"foo">>, + %% Options = [{user_ctx, Req#httpd.user_ctx}], {ok, Resp} = chttpd:etag_respond(Req, Etag, fun() -> - {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [{"ETag",Etag}]), - VAcc = #vacc{req=Req,resp=Resp}, - fabric:all_docs(ShardDbName, Options, fun all_dbs_callback/2, VAcc, Args) - end), - case is_record(Resp, vacc) of - true -> {ok, Resp#vacc.resp}; - _ -> {ok, Resp} - end; + AllDbs = fabric2_db:list_dbs(), + chttpd:send_json(Req, AllDbs) + end); handle_all_dbs_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). -all_dbs_callback({meta, _Meta}, #vacc{resp=Resp0}=Acc) -> - {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), - {ok, Acc#vacc{resp=Resp1}}; -all_dbs_callback({row, Row}, #vacc{resp=Resp0}=Acc) -> - Prepend = couch_mrview_http:prepend_val(Acc), - case couch_util:get_value(id, Row) of <<"_design", _/binary>> -> - {ok, Acc}; - DbName -> - {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, [Prepend, ?JSON_ENCODE(DbName)]), - {ok, Acc#vacc{prepend=",", resp=Resp1}} - end; -all_dbs_callback(complete, #vacc{resp=Resp0}=Acc) -> - {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), - {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), - {ok, Acc#vacc{resp=Resp2}}; -all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> - {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), - {ok, Acc#vacc{resp=Resp1}}. +%% all_dbs_callback({meta, _Meta}, #vacc{resp=Resp0}=Acc) -> +%% {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), +%% {ok, Acc#vacc{resp=Resp1}}; +%% all_dbs_callback({row, Row}, #vacc{resp=Resp0}=Acc) -> +%% Prepend = couch_mrview_http:prepend_val(Acc), +%% case couch_util:get_value(id, Row) of <<"_design", _/binary>> -> +%% {ok, Acc}; +%% DbName -> +%% {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, [Prepend, ?JSON_ENCODE(DbName)]), +%% {ok, Acc#vacc{prepend=",", resp=Resp1}} +%% end; +%% all_dbs_callback(complete, #vacc{resp=Resp0}=Acc) -> +%% {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), +%% {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), +%% {ok, Acc#vacc{resp=Resp2}}; +%% all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> +%% {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), +%% {ok, Acc#vacc{resp=Resp1}}. handle_dbs_info_req(#httpd{method='POST'}=Req) -> chttpd:validate_ctype(Req, "application/json"), diff --git a/src/chttpd/src/chttpd_show.erl b/src/chttpd/src/chttpd_show.erl index a6d0368b9..8a15bdcbe 100644 --- a/src/chttpd/src/chttpd_show.erl +++ b/src/chttpd/src/chttpd_show.erl @@ -123,15 +123,14 @@ send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> JsonReq = chttpd_external:json_req_obj(Req, Db, DocId), JsonDoc = couch_query_servers:json_doc(Doc), Cmd = [<<"updates">>, UpdateName], - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), UpdateResp = couch_query_servers:ddoc_prompt(DDoc, Cmd, [JsonDoc, JsonReq]), JsonResp = case UpdateResp of [<<"up">>, {NewJsonDoc}, {JsonResp0}] -> case chttpd:header_value(Req, "X-Couch-Full-Commit", "false") of "true" -> - Options = [full_commit, {user_ctx, Req#httpd.user_ctx}, {w, W}]; + Options = [full_commit, {user_ctx, Req#httpd.user_ctx}]; _ -> - Options = [{user_ctx, Req#httpd.user_ctx}, {w, W}] + Options = [{user_ctx, Req#httpd.user_ctx}] end, NewDoc = couch_db:doc_from_json_obj_validate(Db, {NewJsonDoc}), couch_doc:validate_docid(NewDoc#doc.id), diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index 1cdc91809..02e11d031 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -170,8 +170,18 @@ join([H|[]], _, Acc) -> join([H|T], Sep, Acc) -> join(T, Sep, [Sep, H | Acc]). +validate(#{} = Db, DDoc) -> + DbName = fabric2_db:name(Db), + IsPartitioned = fabric2_db:is_partitioned(Db), + validate(DbName, IsPartitioned, DDoc); -validate(Db, DDoc) -> +validate(Db, DDoc) -> + DbName = couch_db:name(Db), + IsPartitioned = couch_db:is_partitioned(Db), + validate(DbName, IsPartitioned, DDoc). + + +validate(DbName, IsDbPartitioned, DDoc) -> ok = validate_ddoc_fields(DDoc#doc.body), GetName = fun (#mrview{map_names = [Name | _]}) -> Name; @@ -200,9 +210,9 @@ validate(Db, DDoc) -> language = Lang, views = Views, partitioned = Partitioned - }} = couch_mrview_util:ddoc_to_mrst(couch_db:name(Db), DDoc), + }} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), - case {couch_db:is_partitioned(Db), Partitioned} of + case {IsDbPartitioned, Partitioned} of {false, true} -> throw({invalid_design_doc, <<"partitioned option cannot be true in a " diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index a03fa2922..f280513fb 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -100,7 +100,7 @@ defmodule BasicsTest do db_name = context[:db_name] {:ok, _} = create_doc(db_name, sample_doc_foo()) resp = Couch.get("/#{db_name}/foo", query: %{:local_seq => true}) - assert resp.body["_local_seq"] == 1, "Local seq value == 1" + assert is_binary(resp.body["_local_seq"]), "Local seq value is a binary" end @tag :with_db -- cgit v1.2.1 From 814088c730aca3e2e262e19790026cfad370305f Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 6 Jun 2019 11:56:58 -0500 Subject: Remove debug logging --- src/fabric/src/fabric2_events.erl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/fabric/src/fabric2_events.erl b/src/fabric/src/fabric2_events.erl index a5717147f..094ca2fdb 100644 --- a/src/fabric/src/fabric2_events.erl +++ b/src/fabric/src/fabric2_events.erl @@ -43,11 +43,9 @@ stop_listener(Pid) -> init(Parent, DbName, Mod, Fun, St) -> {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), Since = fabric2_db:get_update_seq(Db), - couch_log:error("XKCD: START LISTENER: ~s : ~p for ~p", [DbName, Since, Parent]), erlang:monitor(process, Parent), Parent ! {self(), initialized}, - poll(DbName, Since, Mod, Fun, St), - couch_log:error("XKCD: STOP LISTENER for ~p", [Parent]). + poll(DbName, Since, Mod, Fun, St). poll(DbName, Since, Mod, Fun, St) -> @@ -56,10 +54,8 @@ poll(DbName, Since, Mod, Fun, St) -> {ok, Db} -> case fabric2_db:get_update_seq(Db) of Since -> - couch_log:error("XKCD: NO UPDATE: ~s :: ~p", [DbName, Since]), {{ok, St}, Since}; Other -> - couch_log:error("XKCD: UPDATED: ~s :: ~p -> ~p", [DbName, Since, Other]), {Mod:Fun(DbName, updated, St), Other} end; Error -> -- cgit v1.2.1 From 5dbd56686d74f3ecd6c18f2df4247148ff2b8eb1 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 6 Jun 2019 13:30:01 -0500 Subject: Implement attachment compression This still holds all attachment data in RAM which we'll have to revisit at some point. --- src/couch/src/couch_att.erl | 109 +++++++++++++++++++++++----------- test/elixir/test/replication_test.exs | 7 ++- 2 files changed, 77 insertions(+), 39 deletions(-) diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index 0dc5fa56b..90d364441 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -383,8 +383,8 @@ flush(Db, DocId, Att1) -> % If we were sent a gzip'ed attachment with no % length data, we have to set it here. - Att3 = case AttLen of - undefined -> store(att_len, DiskLen, Att2); + Att3 = case DiskLen of + undefined -> store(disk_len, AttLen, Att2); _ -> Att2 end, @@ -400,12 +400,13 @@ flush(Db, DocId, Att1) -> % Already flushed Att1; _ when is_binary(Data) -> - IdentMd5 = get_identity_md5(Data, fetch(encoding, Att4)), + DataMd5 = couch_hash:md5_hash(Data), if ReqMd5 == undefined -> ok; true -> - couch_util:check_md5(IdentMd5, ReqMd5) + couch_util:check_md5(DataMd5, ReqMd5) end, - Att5 = store(md5, IdentMd5, Att4), - fabric2_db:write_attachment(Db, DocId, Att5) + Att5 = store(md5, DataMd5, Att4), + Att6 = maybe_compress(Att5), + fabric2_db:write_attachment(Db, DocId, Att6) end. @@ -451,7 +452,7 @@ read_data(Fun, Att) when is_function(Fun) -> end, Props0 = [ {data, iolist_to_binary(lists:reverse(Acc))}, - {disk_len, Len} + {att_len, Len} ], Props1 = if InMd5 /= md5_in_footer -> Props0; true -> [{md5, Md5} | Props0] @@ -473,7 +474,7 @@ read_streamed_attachment(Att, _F, 0, Acc) -> Bin = iolist_to_binary(lists:reverse(Acc)), store([ {data, Bin}, - {disk_len, size(Bin)} + {att_len, size(Bin)} ], Att); read_streamed_attachment(_Att, _F, LenLeft, _Acc) when LenLeft < 0 -> @@ -550,8 +551,23 @@ range_foldl(Att, From, To, Fun, Acc) -> range_foldl(Bin, From, To, Fun, Acc). -foldl_decode(_Att, _Fun, _Acc) -> - erlang:error(not_supported). +foldl_decode(Att, Fun, Acc) -> + [Encoding, Data] = fetch([encoding, data], Att), + case {Encoding, Data} of + {gzip, {loc, Db, DocId, AttId}} -> + NoTxDb = Db#{tx := undefined}, + Bin = fabric2_db:read_attachment(NoTxDb, DocId, AttId), + foldl_decode(store(data, Bin, Att), Fun, Acc); + {gzip, _} when is_binary(Data) -> + Z = zlib:open(), + ok = zlib:inflateInit(Z, 16 + 15), + Inflated = iolist_to_binary(zlib:inflate(Z, Data)), + ok = zlib:inflateEnd(Z), + ok = zlib:close(Z), + foldl(Inflated, Att, Fun, Acc); + _ -> + foldl(Att, Fun, Acc) + end. to_binary(Att) -> @@ -563,7 +579,8 @@ to_binary(Bin, _Att) when is_binary(Bin) -> to_binary(Iolist, _Att) when is_list(Iolist) -> iolist_to_binary(Iolist); to_binary({loc, Db, DocId, AttId}, _Att) -> - fabric2_db:read_attachmet(Db, DocId, AttId); + NoTxDb = Db#{tx := undefined}, + fabric2_db:read_attachment(NoTxDb, DocId, AttId); to_binary(DataFun, Att) when is_function(DataFun)-> Len = fetch(att_len, Att), iolist_to_binary( @@ -585,15 +602,53 @@ fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). -get_identity_md5(Bin, gzip) -> +maybe_compress(Att) -> + [Encoding, Type] = fetch([encoding, type], Att), + IsCompressible = is_compressible(Type), + CompLevel = config:get_integer("attachments", "compression_level", 0), + case Encoding of + identity when IsCompressible, CompLevel >= 1, CompLevel =< 9 -> + compress(Att, CompLevel); + _ -> + Att + end. + + +compress(Att, Level) -> + Data = fetch(data, Att), + Z = zlib:open(), - ok = zlib:inflateInit(Z, 16 + 15), - Inflated = zlib:inflate(Z, Bin), - ok = zlib:inflateEnd(Z), + % 15 = ?MAX_WBITS (defined in the zlib module) + % the 16 + ?MAX_WBITS formula was obtained by inspecting zlib:gzip/1 + ok = zlib:deflateInit(Z, Level, deflated, 16 + 15, 8, default), + CompData = iolist_to_binary(zlib:deflate(Z, Data, finish)), + ok = zlib:deflateEnd(Z), ok = zlib:close(Z), - couch_hash:md5_hash(Inflated); -get_identity_md5(Bin, _) -> - couch_hash:md5_hash(Bin). + + store([ + {att_len, size(CompData)}, + {md5, couch_hash:md5_hash(CompData)}, + {data, CompData}, + {encoding, gzip} + ], Att). + + +is_compressible(Type) when is_binary(Type) -> + is_compressible(binary_to_list(Type)); +is_compressible(Type) -> + TypeExpList = re:split( + config:get("attachments", "compressible_types", ""), + "\\s*,\\s*", + [{return, list}] + ), + lists:any( + fun(TypeExp) -> + Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"), + "(?:\\s*;.*?)?\\s*", $$], + re:run(Type, Regexp, [caseless]) =/= nomatch + end, + [T || T <- TypeExpList, T /= []] + ). max_attachment_size() -> @@ -612,24 +667,6 @@ validate_attachment_size(_AttName, _AttSize, _MAxAttSize) -> ok. -%% is_compressible(Type) when is_binary(Type) -> -%% is_compressible(binary_to_list(Type)); -%% is_compressible(Type) -> -%% TypeExpList = re:split( -%% config:get("attachments", "compressible_types", ""), -%% "\\s*,\\s*", -%% [{return, list}] -%% ), -%% lists:any( -%% fun(TypeExp) -> -%% Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"), -%% "(?:\\s*;.*?)?\\s*", $$], -%% re:run(Type, Regexp, [caseless]) =/= nomatch -%% end, -%% [T || T <- TypeExpList, T /= []] -%% ). - - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/test/elixir/test/replication_test.exs b/test/elixir/test/replication_test.exs index bdd683e97..e513ddd16 100644 --- a/test/elixir/test/replication_test.exs +++ b/test/elixir/test/replication_test.exs @@ -711,9 +711,10 @@ defmodule ReplicationTest do assert tgt_info["doc_count"] == src_info["doc_count"] - src_shards = seq_to_shards(src_info["update_seq"]) - tgt_shards = seq_to_shards(tgt_info["update_seq"]) - assert tgt_shards == src_shards + # This assertion is no longer valid + # src_shards = seq_to_shards(src_info["update_seq"]) + # tgt_shards = seq_to_shards(tgt_info["update_seq"]) + # assert tgt_shards == src_shards end) end -- cgit v1.2.1 From 2359dabdeacf8c04803c58e043325c0874f45d35 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 7 Jun 2019 12:49:49 -0500 Subject: Fix fabric2_txids:terminate/2 --- src/fabric/src/fabric2_txids.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl index bbb8bdf57..ba427415d 100644 --- a/src/fabric/src/fabric2_txids.erl +++ b/src/fabric/src/fabric2_txids.erl @@ -75,7 +75,7 @@ terminate(_, #{txids := TxIds}) -> fabric2_fdb:transactional(fun(Tx) -> lists:foreach(fun(TxId) -> erlfdb:clear(Tx, TxId) - end) + end, TxIds) end) end, ok. -- cgit v1.2.1 From 1ca83981a97dca78038a9b3400adb42bff93ad94 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 7 Jun 2019 15:02:19 -0500 Subject: Fix revision generation on attachment upload When uploading an attachment we hadn't yet flushed data to FoundationDB which caused the md5 to be empty. The `new_revid` algorithm then declared that was because it was an old style attachment and thus our new revision would be a random number. This fix just flushes our attachments earlier in the process of updating a document. --- src/fabric/src/fabric2_db.erl | 103 +++++++++++++++++++++++++++-------------- src/fabric/src/fabric2_fdb.erl | 9 +--- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 02a18fa23..acd473f12 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -120,7 +120,7 @@ %% validate_dbname/1, %% make_doc/5, - new_revid/1 + new_revid/2 ]). @@ -604,9 +604,7 @@ read_attachment(Db, DocId, AttId) -> write_attachment(Db, DocId, Att) -> Data = couch_att:fetch(data, Att), - {ok, AttId} = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:write_attachment(TxDb, DocId, Data) - end), + {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data), couch_att:store(data, {loc, Db, DocId, AttId}, Att). @@ -630,33 +628,69 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> end). -new_revid(Doc) -> +maybe_add_sys_db_callbacks(Db) -> + IsReplicatorDb = fabric2_util:dbname_ends_with(Db, <<"_replicator">>), + + CfgUsersSuffix = config:get("couchdb", "users_db_suffix", "_users"), + IsCfgUsersDb = fabric2_util:dbname_ends_with(Db, ?l2b(CfgUsersSuffix)), + IsGlobalUsersDb = fabric2_util:dbname_ends_with(Db, <<"_users">>), + IsUsersDb = IsCfgUsersDb orelse IsGlobalUsersDb, + + {BDU, ADR} = if + IsReplicatorDb -> + { + fun couch_replicator_docs:before_doc_update/3, + fun couch_replicator_docs:after_doc_read/2 + }; + IsUsersDb -> + { + fun fabric2_users_db:before_doc_update/3, + fun fabric2_users_db:after_doc_read/2 + }; + true -> + {undefined, undefined} + end, + + Db#{ + before_doc_update := BDU, + after_doc_read := ADR + }. + + +new_revid(Db, Doc) -> #doc{ + id = DocId, body = Body, revs = {OldStart, OldRevs}, atts = Atts, deleted = Deleted } = Doc, - DigestedAtts = lists:foldl(fun(Att, Acc) -> - [N, T, M] = couch_att:fetch([name, type, md5], Att), - case M == <<>> of - true -> Acc; - false -> [{N, T, M} | Acc] + {NewAtts, AttSigInfo} = lists:mapfoldl(fun(Att, Acc) -> + [Name, Type, Data, Md5] = couch_att:fetch([name, type, data, md5], Att), + case Data of + {loc, _, _, _} -> + {Att, [{Name, Type, Md5} | Acc]}; + _ -> + Att1 = couch_att:flush(Db, DocId, Att), + Att2 = couch_att:store(revpos, OldStart + 1, Att1), + {Att2, [{Name, Type, couch_att:fetch(md5, Att2)} | Acc]} end end, [], Atts), - Rev = case DigestedAtts of - Atts2 when length(Atts) =/= length(Atts2) -> - % We must have old style non-md5 attachments - list_to_binary(integer_to_list(couch_util:rand32())); - Atts2 -> + Rev = case length(Atts) == length(AttSigInfo) of + true -> OldRev = case OldRevs of [] -> 0; [OldRev0 | _] -> OldRev0 end, - SigTerm = [Deleted, OldStart, OldRev, Body, Atts2], - couch_hash:md5_hash(term_to_binary(SigTerm, [{minor_version, 1}])) + SigTerm = [Deleted, OldStart, OldRev, Body, AttSigInfo], + couch_hash:md5_hash(term_to_binary(SigTerm, [{minor_version, 1}])); + false -> + erlang:error(missing_att_info) end, - Doc#doc{revs = {OldStart + 1, [Rev | OldRevs]}}. + Doc#doc{ + revs = {OldStart + 1, [Rev | OldRevs]}, + atts = NewAtts + }. maybe_set_user_ctx(Db, Options) -> @@ -970,12 +1004,11 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % Validate the doc update and create the % new revinfo map Doc2 = prep_and_validate(Db, Doc1, Target), + #doc{ deleted = NewDeleted, revs = {NewRevPos, [NewRev | NewRevPath]} - } = Doc3 = new_revid(Doc2), - - Doc4 = update_attachment_revpos(Doc3), + } = Doc3 = new_revid(Db, Doc2), NewRevInfo = #{ winner => undefined, @@ -988,9 +1021,9 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % Gather the list of possible winnig revisions Possible = case Target == Winner of - true when not Doc4#doc.deleted -> + true when not Doc3#doc.deleted -> [NewRevInfo]; - true when Doc4#doc.deleted -> + true when Doc3#doc.deleted -> case SecondPlace of #{} -> [NewRevInfo, SecondPlace]; not_found -> [NewRevInfo] @@ -1015,7 +1048,7 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> ok = fabric2_fdb:write_doc( Db, - Doc4, + Doc3, NewWinner, Winner, ToUpdate, @@ -1076,6 +1109,7 @@ update_doc_replicated(Db, Doc0, _Options) -> LeafPath = get_leaf_path(RevPos, Rev, AllLeafsFull), PrevRevInfo = find_prev_revinfo(RevPos, LeafPath), Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo), + Doc3 = flush_doc_atts(Db, Doc2), % Possible winners are the previous winner and % the new DocRevInfo @@ -1097,7 +1131,7 @@ update_doc_replicated(Db, Doc0, _Options) -> ok = fabric2_fdb:write_doc( Db, - Doc2, + Doc3, NewWinner, Winner, ToUpdate, @@ -1119,19 +1153,20 @@ update_local_doc(Db, Doc0, _Options) -> {ok, {0, integer_to_binary(Rev)}}. -update_attachment_revpos(#doc{revs = {RevPos, _Revs}, atts = Atts0} = Doc) -> - Atts = lists:map(fun(Att) -> +flush_doc_atts(Db, Doc) -> + #doc{ + id = DocId, + atts = Atts + } = Doc, + NewAtts = lists:map(fun(Att) -> case couch_att:fetch(data, Att) of - {loc, _Db, _DocId, _AttId} -> - % Attachment was already on disk + {loc, _, _, _} -> Att; _ -> - % We will write this attachment with this update - % so mark it with the RevPos that will be written - couch_att:store(revpos, RevPos, Att) + couch_att:flush(Db, DocId, Att) end - end, Atts0), - Doc#doc{atts = Atts}. + end, Atts), + Doc#doc{atts = NewAtts}. get_winning_rev_futures(Db, Docs) -> diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 0a4f2981b..788bbc62a 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -924,7 +924,7 @@ doc_to_fdb(Db, #doc{} = Doc) -> body = Body, atts = Atts, deleted = Deleted - } = doc_flush_atts(Db, Doc), + } = Doc, Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev}, DbPrefix), Val = {Body, Atts, Deleted}, @@ -977,13 +977,6 @@ fdb_to_local_doc(_Db, _DocId, not_found) -> {not_found, missing}. -doc_flush_atts(Db, Doc) -> - Atts = lists:map(fun(Att) -> - couch_att:flush(Db, Doc#doc.id, Att) - end, Doc#doc.atts), - Doc#doc{atts = Atts}. - - chunkify_attachment(Data) -> case Data of <<>> -> -- cgit v1.2.1 From ed567a22a67406ab4ab511d40b4b9b99839da6c7 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 7 Jun 2019 16:12:28 -0500 Subject: Convert attachment info to disk terms correctly I was accidentally skipping this step around properly serializing/deserializing attachments. Note to self: If someon specifies attachment headers this will likely break when we attempt to pack the value tuple here. --- src/fabric/src/fabric2_fdb.erl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 788bbc62a..4f08d971a 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -926,14 +926,19 @@ doc_to_fdb(Db, #doc{} = Doc) -> deleted = Deleted } = Doc, + DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), + Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev}, DbPrefix), - Val = {Body, Atts, Deleted}, + Val = {Body, DiskAtts, Deleted}, {Key, term_to_binary(Val, [{minor_version, 1}])}. -fdb_to_doc(_Db, DocId, Pos, Path, Bin) when is_binary(Bin) -> - {Body, Atts, Deleted} = binary_to_term(Bin, [safe]), - #doc{ +fdb_to_doc(Db, DocId, Pos, Path, Bin) when is_binary(Bin) -> + {Body, DiskAtts, Deleted} = binary_to_term(Bin, [safe]), + Atts = lists:map(fun(Att) -> + couch_att:from_disk_term(Db, DocId, Att) + end, DiskAtts), + Doc0 = #doc{ id = DocId, revs = {Pos, Path}, body = Body, -- cgit v1.2.1 From d15377cff743fb3540d40c67a673ed9d8532e182 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 10 Jun 2019 14:33:12 -0500 Subject: Allow for previously configured filters The older chttpd/fabric split configured filters as one step in the coordinator instead of within each RPC worker. --- src/chttpd/src/chttpd_changes.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index 39e18d4f9..3389aebc0 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -152,6 +152,9 @@ get_callback_acc(Callback) when is_function(Callback, 1) -> {fun(Ev, _) -> Callback(Ev) end, ok}. +configure_filter(Filter, _Style, _Req, _Db) when is_tuple(Filter) -> + % Filter has already been configured + Filter; configure_filter("_doc_ids", Style, Req, _Db) -> {doc_ids, Style, get_doc_ids(Req)}; configure_filter("_selector", Style, Req, _Db) -> -- cgit v1.2.1 From fab340e944a1fdb5652994ce66e5ece400d55b73 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 10 Jun 2019 14:35:11 -0500 Subject: Fix validate_doc_update when recreating a document This fixes the behavior when validating a document update that is recreating a previously deleted document. Before this fix we were sending a document body with `"_deleted":true` as the existing document. However, CouchDB behavior expects the previous document passed to VDU's to be `null` in this case. --- src/fabric/src/fabric2_db.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index acd473f12..48e50f11c 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1196,8 +1196,13 @@ prep_and_validate(Db, NewDoc, PrevRevInfo) -> _ -> false end, + WasDeleted = case PrevRevInfo of + not_found -> false; + #{deleted := D} -> D + end, + PrevDoc = case HasStubs orelse (HasVDUs and not IsDDoc) of - true when PrevRevInfo /= not_found -> + true when PrevRevInfo /= not_found, not WasDeleted -> case fabric2_fdb:get_doc_body(Db, NewDoc#doc.id, PrevRevInfo) of #doc{} = PDoc -> PDoc; {not_found, _} -> nil -- cgit v1.2.1 From eaec8ac47da136b398d7834b71aa58ac0219e2be Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 10 Jun 2019 14:36:55 -0500 Subject: Database config changes should bump the db version This was a remnant before we used a version per database. --- src/fabric/src/fabric2_fdb.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 4f08d971a..d179387f6 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -338,7 +338,7 @@ set_config(#{} = Db, ConfigKey, ConfigVal) -> Key = erlfdb_tuple:pack({?DB_CONFIG, ConfigKey}, DbPrefix), erlfdb:set(Tx, Key, ConfigVal), - bump_metadata_version(Tx). + bump_db_version(Db). get_stat(#{} = Db, StatKey) -> -- cgit v1.2.1 From f07ed76017220a6db8176819c208ff7ddc2d5a74 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 7 Jun 2019 12:46:06 -0500 Subject: Implement `_users` db authentication This changes `chttpd_auth_cache` to use FoundationDB to back the `_users` database including the `before_doc_update` and `after_doc_read` features. --- src/chttpd/src/chttpd_auth_cache.erl | 40 ++++++---- src/chttpd/src/chttpd_db.erl | 8 +- src/fabric/src/fabric2_db.erl | 34 ++++++--- src/fabric/src/fabric2_fdb.erl | 8 +- src/fabric/src/fabric2_users_db.erl | 144 +++++++++++++++++++++++++++++++++++ src/fabric/src/fabric2_util.erl | 7 ++ 6 files changed, 213 insertions(+), 28 deletions(-) create mode 100644 src/fabric/src/fabric2_users_db.erl diff --git a/src/chttpd/src/chttpd_auth_cache.erl b/src/chttpd/src/chttpd_auth_cache.erl index fdae27b79..81814d50f 100644 --- a/src/chttpd/src/chttpd_auth_cache.erl +++ b/src/chttpd/src/chttpd_auth_cache.erl @@ -52,7 +52,8 @@ get_user_creds(_Req, UserName) when is_binary(UserName) -> update_user_creds(_Req, UserDoc, _Ctx) -> {_, Ref} = spawn_monitor(fun() -> - case fabric:update_doc(dbname(), UserDoc, []) of + {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), + case fabric2_db:update_doc(Db, UserDoc) of {ok, _} -> exit(ok); Else -> @@ -100,6 +101,14 @@ maybe_increment_auth_cache_miss(UserName) -> %% gen_server callbacks init([]) -> + try + fabric2_db:open(dbname(), [?ADMIN_CTX]) + catch error:database_does_not_exist -> + case fabric2_db:create(dbname(), [?ADMIN_CTX]) of + {ok, _} -> ok; + {error, file_exists} -> ok + end + end, self() ! {start_listener, 0}, {ok, #state{}}. @@ -142,7 +151,8 @@ spawn_changes(Since) -> Pid. listen_for_changes(Since) -> - ensure_auth_ddoc_exists(dbname(), <<"_design/_auth">>), + {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), + ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), CBFun = fun ?MODULE:changes_callback/2, Args = #changes_args{ feed = "continuous", @@ -150,7 +160,8 @@ listen_for_changes(Since) -> heartbeat = true, filter = {default, main_only} }, - fabric:changes(dbname(), CBFun, Since, Args). + ChangesFun = chttpd_changes:handle_db_changes(Args, nil, Db), + ChangesFun({CBFun, Since}). changes_callback(waiting_for_updates, Acc) -> {ok, Acc}; @@ -159,7 +170,7 @@ changes_callback(start, Since) -> changes_callback({stop, EndSeq, _Pending}, _) -> exit({seq, EndSeq}); changes_callback({change, {Change}}, _) -> - case couch_util:get_value(id, Change) of + case couch_util:get_value(<<"id">>, Change) of <<"_design/", _/binary>> -> ok; DocId -> @@ -174,7 +185,8 @@ changes_callback({error, _}, EndSeq) -> exit({seq, EndSeq}). load_user_from_db(UserName) -> - try fabric:open_doc(dbname(), docid(UserName), [?ADMIN_CTX, ejson_body, conflicts]) of + {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), + try fabric2_db:open_doc(Db, docid(UserName), [conflicts]) of {ok, Doc} -> {Props} = couch_doc:to_json_obj(Doc, []), Props; @@ -186,7 +198,8 @@ load_user_from_db(UserName) -> end. dbname() -> - config:get("chttpd_auth", "authentication_db", "_users"). + DbNameStr = config:get("chttpd_auth", "authentication_db", "_users"), + iolist_to_binary(DbNameStr). docid(UserName) -> <<"org.couchdb.user:", UserName/binary>>. @@ -194,11 +207,11 @@ docid(UserName) -> username(<<"org.couchdb.user:", UserName/binary>>) -> UserName. -ensure_auth_ddoc_exists(DbName, DDocId) -> - case fabric:open_doc(DbName, DDocId, [?ADMIN_CTX, ejson_body]) of +ensure_auth_ddoc_exists(Db, DDocId) -> + case fabric2_db:open_doc(Db, DDocId) of {not_found, _Reason} -> {ok, AuthDesign} = couch_auth_cache:auth_design_doc(DDocId), - update_doc_ignoring_conflict(DbName, AuthDesign, [?ADMIN_CTX]); + update_doc_ignoring_conflict(Db, AuthDesign); {ok, Doc} -> {Props} = couch_doc:to_json_obj(Doc, []), case couch_util:get_value(<<"validate_doc_update">>, Props, []) of @@ -208,17 +221,18 @@ ensure_auth_ddoc_exists(DbName, DDocId) -> Props1 = lists:keyreplace(<<"validate_doc_update">>, 1, Props, {<<"validate_doc_update">>, ?AUTH_DB_DOC_VALIDATE_FUNCTION}), - update_doc_ignoring_conflict(DbName, couch_doc:from_json_obj({Props1}), [?ADMIN_CTX]) + NewDoc = couch_doc:from_json_obj({Props1}), + update_doc_ignoring_conflict(Db, NewDoc) end; {error, Reason} -> - couch_log:notice("Failed to ensure auth ddoc ~s/~s exists for reason: ~p", [DbName, DDocId, Reason]), + couch_log:notice("Failed to ensure auth ddoc ~s/~s exists for reason: ~p", [dbname(), DDocId, Reason]), ok end, ok. -update_doc_ignoring_conflict(DbName, Doc, Options) -> +update_doc_ignoring_conflict(DbName, Doc) -> try - fabric:update_doc(DbName, Doc, Options) + fabric2_db:update_doc(DbName, Doc) catch throw:conflict -> ok diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index fae90375e..f123be004 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -742,12 +742,10 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> db_req(#httpd{path_parts=[_,<<"_revs_diff">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); -db_req(#httpd{method='PUT',path_parts=[_,<<"_security">>],user_ctx=Ctx}=Req, - Db) -> - DbName = ?b2l(couch_db:name(Db)), - validate_security_can_be_edited(DbName), +db_req(#httpd{method = 'PUT',path_parts = [_, <<"_security">>]} = Req, Db) -> + validate_security_can_be_edited(fabric2_db:name(Db)), SecObj = chttpd:json_body(Req), - case fabric:set_security(Db, SecObj, [{user_ctx, Ctx}]) of + case fabric2_db:set_security(Db, SecObj) of ok -> send_json(Req, {[{<<"ok">>, true}]}); Else -> diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 48e50f11c..80028a645 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -149,9 +149,10 @@ create(DbName, Options) -> % We cache outside of the transaction so that we're sure % that the transaction was committed. case Result of - #{} = Db -> - ok = fabric2_server:store(Db), - {ok, Db#{tx := undefined}}; + #{} = Db0 -> + Db1 = maybe_add_sys_db_callbacks(Db0), + ok = fabric2_server:store(Db1), + {ok, Db1#{tx := undefined}}; Error -> Error end. @@ -167,9 +168,10 @@ open(DbName, Options) -> end), % Cache outside the transaction retry loop case Result of - #{} = Db -> - ok = fabric2_server:store(Db), - {ok, Db#{tx := undefined}}; + #{} = Db0 -> + Db1 = maybe_add_sys_db_callbacks(Db0), + ok = fabric2_server:store(Db1), + {ok, Db1#{tx := undefined}}; Error -> Error end @@ -552,18 +554,19 @@ update_docs(Db, Docs) -> update_docs(Db, Docs, []). -update_docs(Db, Docs, Options) -> +update_docs(Db, Docs0, Options) -> + Docs1 = apply_before_doc_update(Db, Docs0, Options), Resps0 = case lists:member(replicated_changes, Options) of false -> fabric2_fdb:transactional(Db, fun(TxDb) -> - update_docs_interactive(TxDb, Docs, Options) + update_docs_interactive(TxDb, Docs1, Options) end); true -> lists:map(fun(Doc) -> fabric2_fdb:transactional(Db, fun(TxDb) -> update_doc_int(TxDb, Doc, Options) end) - end, Docs) + end, Docs1) end, % Convert errors Resps1 = lists:map(fun(Resp) -> @@ -882,6 +885,19 @@ find_possible_ancestors(RevInfos, MissingRevs) -> end, RevInfos). +apply_before_doc_update(Db, Docs, Options) -> + #{before_doc_update := BDU} = Db, + UpdateType = case lists:member(replicated_changes, Options) of + true -> replicated_changes; + false -> interactive_edit + end, + if BDU == undefined -> Docs; true -> + lists:map(fun(Doc) -> + BDU(Doc, Db, UpdateType) + end, Docs) + end. + + update_doc_int(#{} = Db, #doc{} = Doc, Options) -> IsLocal = case Doc#doc.id of <> -> true; diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index d179387f6..4b0182646 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -944,7 +944,13 @@ fdb_to_doc(Db, DocId, Pos, Path, Bin) when is_binary(Bin) -> body = Body, atts = Atts, deleted = Deleted - }; + }, + + case Db of + #{after_doc_read := undefined} -> Doc0; + #{after_doc_read := ADR} -> ADR(Doc0, Db) + end; + fdb_to_doc(_Db, _DocId, _Pos, _Path, not_found) -> {not_found, missing}. diff --git a/src/fabric/src/fabric2_users_db.erl b/src/fabric/src/fabric2_users_db.erl new file mode 100644 index 000000000..9a8a462c3 --- /dev/null +++ b/src/fabric/src/fabric2_users_db.erl @@ -0,0 +1,144 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_users_db). + +-export([ + before_doc_update/3, + after_doc_read/2, + strip_non_public_fields/1 +]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(NAME, <<"name">>). +-define(PASSWORD, <<"password">>). +-define(DERIVED_KEY, <<"derived_key">>). +-define(PASSWORD_SCHEME, <<"password_scheme">>). +-define(SIMPLE, <<"simple">>). +-define(PASSWORD_SHA, <<"password_sha">>). +-define(PBKDF2, <<"pbkdf2">>). +-define(ITERATIONS, <<"iterations">>). +-define(SALT, <<"salt">>). +-define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})). + +-define( + DDOCS_ADMIN_ONLY, + <<"Only administrators can view design docs in the users database.">> +). + +% If the request's userCtx identifies an admin +% -> save_doc (see below) +% +% If the request's userCtx.name is null: +% -> save_doc +% // this is an anonymous user registering a new document +% // in case a user doc with the same id already exists, the anonymous +% // user will get a regular doc update conflict. +% If the request's userCtx.name doesn't match the doc's name +% -> 404 // Not Found +% Else +% -> save_doc +before_doc_update(Doc, Db, _UpdateType) -> + #user_ctx{name = Name} = fabric2_db:get_user_ctx(Db), + DocName = get_doc_name(Doc), + case (catch fabric2_db:check_is_admin(Db)) of + ok -> + save_doc(Doc); + _ when Name =:= DocName orelse Name =:= null -> + save_doc(Doc); + _ -> + throw(not_found) + end. + +% If newDoc.password == null || newDoc.password == undefined: +% -> +% noop +% Else -> // calculate password hash server side +% newDoc.password_sha = hash_pw(newDoc.password + salt) +% newDoc.salt = salt +% newDoc.password = null +save_doc(#doc{body={Body}} = Doc) -> + %% Support both schemes to smooth migration from legacy scheme + Scheme = config:get("couch_httpd_auth", "password_scheme", "pbkdf2"), + case {fabric2_util:get_value(?PASSWORD, Body), Scheme} of + {null, _} -> % server admins don't have a user-db password entry + Doc; + {undefined, _} -> + Doc; + {ClearPassword, "simple"} -> % deprecated + Salt = couch_uuids:random(), + PasswordSha = couch_passwords:simple(ClearPassword, Salt), + Body0 = ?replace(Body, ?PASSWORD_SCHEME, ?SIMPLE), + Body1 = ?replace(Body0, ?SALT, Salt), + Body2 = ?replace(Body1, ?PASSWORD_SHA, PasswordSha), + Body3 = proplists:delete(?PASSWORD, Body2), + Doc#doc{body={Body3}}; + {ClearPassword, "pbkdf2"} -> + Iterations = list_to_integer(config:get("couch_httpd_auth", "iterations", "1000")), + Salt = couch_uuids:random(), + DerivedKey = couch_passwords:pbkdf2(ClearPassword, Salt, Iterations), + Body0 = ?replace(Body, ?PASSWORD_SCHEME, ?PBKDF2), + Body1 = ?replace(Body0, ?ITERATIONS, Iterations), + Body2 = ?replace(Body1, ?DERIVED_KEY, DerivedKey), + Body3 = ?replace(Body2, ?SALT, Salt), + Body4 = proplists:delete(?PASSWORD, Body3), + Doc#doc{body={Body4}}; + {_ClearPassword, Scheme} -> + couch_log:error("[couch_httpd_auth] password_scheme value of '~p' is invalid.", [Scheme]), + throw({forbidden, "Server cannot hash passwords at this time."}) + end. + + +% If the doc is a design doc +% If the request's userCtx identifies an admin +% -> return doc +% Else +% -> 403 // Forbidden +% If the request's userCtx identifies an admin +% -> return doc +% If the request's userCtx.name doesn't match the doc's name +% -> 404 // Not Found +% Else +% -> return doc +after_doc_read(#doc{id = <>} = Doc, Db) -> + case (catch fabric2_db:check_is_admin(Db)) of + ok -> Doc; + _ -> throw({forbidden, ?DDOCS_ADMIN_ONLY}) + end; +after_doc_read(Doc, Db) -> + #user_ctx{name = Name} = fabric2_db:get_user_ctx(Db), + DocName = get_doc_name(Doc), + case (catch fabric2_db:check_is_admin(Db)) of + ok -> + Doc; + _ when Name =:= DocName -> + Doc; + _ -> + Doc1 = strip_non_public_fields(Doc), + case Doc1 of + #doc{body={[]}} -> throw(not_found); + _ -> Doc1 + end + end. + + +get_doc_name(#doc{id= <<"org.couchdb.user:", Name/binary>>}) -> + Name; +get_doc_name(_) -> + undefined. + + +strip_non_public_fields(#doc{body={Props}}=Doc) -> + PublicFields = config:get("couch_httpd_auth", "public_fields", ""), + Public = re:split(PublicFields, "\\s*,\\s*", [{return, binary}]), + Doc#doc{body={[{K, V} || {K, V} <- Props, lists:member(K, Public)]}}. diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 6e2df67c2..fb59d5923 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -24,6 +24,8 @@ validate_security_object/1, + dbname_ends_with/2, + get_value/2, get_value/3, to_hex/1, @@ -113,6 +115,11 @@ validate_json_list_of_strings(Member, Props) -> end. +dbname_ends_with(#{} = Db, Suffix) when is_binary(Suffix) -> + DbName = fabric2_db:name(Db), + Suffix == filename:basename(DbName). + + get_value(Key, List) -> get_value(Key, List, undefined). -- cgit v1.2.1 From e3725943eea5cbc102c138ab7e54b78b6301d702 Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Thu, 20 Jun 2019 10:41:30 -0300 Subject: Update get security to use fabric2 --- src/chttpd/src/chttpd_db.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index f123be004..7cffc54f5 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -753,7 +753,7 @@ db_req(#httpd{method = 'PUT',path_parts = [_, <<"_security">>]} = Req, Db) -> end; db_req(#httpd{method='GET',path_parts=[_,<<"_security">>]}=Req, Db) -> - send_json(Req, fabric:get_security(Db)); + send_json(Req, fabric2_db:get_security(Db)); db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); -- cgit v1.2.1 From 1797619af63b6a3b3b7d3cee74f025e7fdde2751 Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Thu, 20 Jun 2019 10:42:30 -0300 Subject: Fix arity in changes timeout callback --- src/chttpd/src/chttpd_auth_cache.erl | 2 +- src/chttpd/src/chttpd_changes.erl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/chttpd/src/chttpd_auth_cache.erl b/src/chttpd/src/chttpd_auth_cache.erl index 81814d50f..da6bccd40 100644 --- a/src/chttpd/src/chttpd_auth_cache.erl +++ b/src/chttpd/src/chttpd_auth_cache.erl @@ -179,7 +179,7 @@ changes_callback({change, {Change}}, _) -> ets_lru:remove(?CACHE, UserName) end, {ok, couch_util:get_value(seq, Change)}; -changes_callback(timeout, Acc) -> +changes_callback({timeout, _ResponseType}, Acc) -> {ok, Acc}; changes_callback({error, _}, EndSeq) -> exit({seq, EndSeq}). diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index 3389aebc0..b73efa327 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -391,10 +391,10 @@ get_changes_timeout(Args, Callback) -> end; true -> {DefaultTimeout, - fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end}; + fun(UserAcc) -> {ok, Callback({timeout, ResponseType}, UserAcc)} end}; _ -> {lists:min([DefaultTimeout, Heartbeat]), - fun(UserAcc) -> {ok, Callback(timeout, ResponseType, UserAcc)} end} + fun(UserAcc) -> {ok, Callback({timeout, ResponseType}, UserAcc)} end} end. start_sending_changes(Callback, UserAcc) -> -- cgit v1.2.1 From 1269278bb5ddc77073e9ccd920a544bbb7dd8a1e Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Thu, 20 Jun 2019 10:44:07 -0300 Subject: Fix exception in cache auth doc update --- src/chttpd/src/chttpd_auth_cache.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_auth_cache.erl b/src/chttpd/src/chttpd_auth_cache.erl index da6bccd40..c72b12667 100644 --- a/src/chttpd/src/chttpd_auth_cache.erl +++ b/src/chttpd/src/chttpd_auth_cache.erl @@ -234,7 +234,7 @@ update_doc_ignoring_conflict(DbName, Doc) -> try fabric2_db:update_doc(DbName, Doc) catch - throw:conflict -> + error:conflict -> ok end. -- cgit v1.2.1 From f10b554b3e0e26eb2a33e0bb991499d180539720 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 12 Jun 2019 16:11:56 -0400 Subject: CouchDB background jobs RFC: https://github.com/apache/couchdb-documentation/pull/409 Main API is in the `couch_jobs` module. Additional description of internals is in the README.md file. --- rebar.config.script | 1 + rel/overlay/etc/default.ini | 21 + rel/reltool.config | 2 + src/couch_jobs/.gitignore | 4 + src/couch_jobs/README.md | 62 ++ src/couch_jobs/rebar.config | 14 + src/couch_jobs/src/couch_jobs.app.src | 31 + src/couch_jobs/src/couch_jobs.erl | 378 ++++++++++++ src/couch_jobs/src/couch_jobs.hrl | 52 ++ src/couch_jobs/src/couch_jobs_activity_monitor.erl | 133 ++++ .../src/couch_jobs_activity_monitor_sup.erl | 64 ++ src/couch_jobs/src/couch_jobs_app.erl | 26 + src/couch_jobs/src/couch_jobs_fdb.erl | 679 +++++++++++++++++++++ src/couch_jobs/src/couch_jobs_notifier.erl | 285 +++++++++ src/couch_jobs/src/couch_jobs_notifier_sup.erl | 64 ++ src/couch_jobs/src/couch_jobs_pending.erl | 143 +++++ src/couch_jobs/src/couch_jobs_server.erl | 193 ++++++ src/couch_jobs/src/couch_jobs_sup.erl | 66 ++ src/couch_jobs/src/couch_jobs_type_monitor.erl | 84 +++ src/couch_jobs/test/couch_jobs_tests.erl | 606 ++++++++++++++++++ 20 files changed, 2908 insertions(+) create mode 100644 src/couch_jobs/.gitignore create mode 100644 src/couch_jobs/README.md create mode 100644 src/couch_jobs/rebar.config create mode 100644 src/couch_jobs/src/couch_jobs.app.src create mode 100644 src/couch_jobs/src/couch_jobs.erl create mode 100644 src/couch_jobs/src/couch_jobs.hrl create mode 100644 src/couch_jobs/src/couch_jobs_activity_monitor.erl create mode 100644 src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl create mode 100644 src/couch_jobs/src/couch_jobs_app.erl create mode 100644 src/couch_jobs/src/couch_jobs_fdb.erl create mode 100644 src/couch_jobs/src/couch_jobs_notifier.erl create mode 100644 src/couch_jobs/src/couch_jobs_notifier_sup.erl create mode 100644 src/couch_jobs/src/couch_jobs_pending.erl create mode 100644 src/couch_jobs/src/couch_jobs_server.erl create mode 100644 src/couch_jobs/src/couch_jobs_sup.erl create mode 100644 src/couch_jobs/src/couch_jobs_type_monitor.erl create mode 100644 src/couch_jobs/test/couch_jobs_tests.erl diff --git a/rebar.config.script b/rebar.config.script index 1d11e7d36..f87be50a1 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -130,6 +130,7 @@ SubDirs = [ "src/ddoc_cache", "src/dreyfus", "src/fabric", + "src/couch_jobs", "src/global_changes", "src/ioq", "src/ken", diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 246c17307..c115185a2 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -602,3 +602,24 @@ compaction = false ;source_close_timeout_sec = 600 ;require_node_param = false ;require_range_param = false + +[couch_jobs] +; +; Maximum jitter used when checking for active job timeouts +;activity_monitor_max_jitter_msec = 10000 +; +; Hold-off applied before notifying subscribers. Since active jobs can be +; queried more effiently using a range read, increasing this value should make +; notifications more performant, however, it would also increase notification +; latency. +;type_monitor_holdoff_msec = 50 +; +; Timeout used when waiting for the job type notification watches. The default +; value of "infinity" should work well in most cases. +;type_monitor_timeout_msec = infinity +; +; How often to check for the presense of new job types. +;type_check_period_msec = 15000 +; +; Jitter applied when checking for new job types. +;type_check_max_jitter_msec = 5000 diff --git a/rel/reltool.config b/rel/reltool.config index 5285504ba..8f153c8bc 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -33,6 +33,7 @@ config, couch, couch_epi, + couch_jobs, couch_index, couch_log, couch_mrview, @@ -92,6 +93,7 @@ {app, config, [{incl_cond, include}]}, {app, couch, [{incl_cond, include}]}, {app, couch_epi, [{incl_cond, include}]}, + {app, couch_jobs, [{incl_cond, include}]}, {app, couch_index, [{incl_cond, include}]}, {app, couch_log, [{incl_cond, include}]}, {app, couch_mrview, [{incl_cond, include}]}, diff --git a/src/couch_jobs/.gitignore b/src/couch_jobs/.gitignore new file mode 100644 index 000000000..6ef4c5212 --- /dev/null +++ b/src/couch_jobs/.gitignore @@ -0,0 +1,4 @@ +*.beam +.eunit +ebin/couch_jobs.app +.DS_Store \ No newline at end of file diff --git a/src/couch_jobs/README.md b/src/couch_jobs/README.md new file mode 100644 index 000000000..bc45d323c --- /dev/null +++ b/src/couch_jobs/README.md @@ -0,0 +1,62 @@ +CouchDB Jobs Application +======================== + +Run background jobs in CouchDB + +Design (RFC) discussion: https://github.com/apache/couchdb-documentation/pull/409/files + +This is a description of some of the modules: + + * `couch_jobs`: The main API module. It contains functions for creating, + accepting, executing, and monitoring jobs. A common pattern in this module + is to get a jobs transaction object (named `JTx` throughout the code), then + start a transaction and call a bunch of functions from `couch_jobs_fdb` in + that transaction. + + * `couch_jobs_fdb`: This is a layer that talks to FDB. There is a lot of tuple + packing and unpacking, reading ranges and also managing transaction objects. + + * `couch_jobs_pending`: This module implements the pending jobs queue. These + functions could all go in `couch_jobs_fdb` but the implemention was fairly + self-contained, with its own private helper functions, so it made sense to + move to a separate module. + + * `couch_jobs_activity_monitor`: Here is where the "activity monitor" + functionality is implemented. That's done with a `gen_server` instance + running for each type. This `gen_server` periodically check if there are + inactive jobs for its type, and if they are, it re-enqueues them. If the + timeout value changes, then it skips the pending check, until the new + timeout expires. + + * `couch_jobs_activity_monitor_sup` : This is a simple one-for-one supervisor + to spawn `couch_jobs_activity_monitor` instances for each type. + + * `couch_jobs_type_monitor` : This is a helper process meant to be + `spawn_link`-ed from a parent `gen_server`. It then monitors activity for a + particular job type. If any jobs of that type have an update it notifies the + parent process. + + * `couch_jobs_notifier`: Is responsible for subscriptions. Just like + with activity monitor there is a `gen_server` instance running per + each type. It uses a linked `couch_jobs_type_monitor` process to wait for + any job updates. When an update notification arrives, it can efficiently + find out if any active jobs have been updated, by reading the `(?JOBS, + ?ACTIVITY, Type, Sequence)` range. That should account for the bulk of + changes. The jobs that are not active anymore, are queried individually. + Subscriptions are managed in an ordered set ETS table. + + * `couch_jobs_notifier_sup`: A simple one-for-one supervisor to spawn + `couch_jobs_notifier` processes for each type. + + * `couch_jobs_server`: This is a `gen_server` which keeps track of job + types. It then starts or stops activity monitors and notifiers for each + type. To do that it queries the ` (?JOBS, ?ACTIVITY_TIMEOUT)` periodically. + + * `couch_jobs_sup`: This is the main application supervisor. The restart + strategy is `rest_for_one`, meaning that a when a child restarts, the + sibling following it will restart. One interesting entry there is the first + child which is used just to create an ETS table used by `couch_jobs_fdb` to + cache transaction object (`JTx` mentioned above). That child calls + `init_cache/0`, where it creates the ETS then returns with `ignore` so it + doesn't actually spawn a process. The ETS table will be owned by the + supervisor process. diff --git a/src/couch_jobs/rebar.config b/src/couch_jobs/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_jobs/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_jobs/src/couch_jobs.app.src b/src/couch_jobs/src/couch_jobs.app.src new file mode 100644 index 000000000..8ded14c6f --- /dev/null +++ b/src/couch_jobs/src/couch_jobs.app.src @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_jobs, [ + {description, "CouchDB Jobs"}, + {vsn, git}, + {mod, {couch_jobs_app, []}}, + {registered, [ + couch_jobs_sup, + couch_jobs_activity_monitor_sup, + couch_jobs_notifier_sup, + couch_jobs_server + ]}, + {applications, [ + kernel, + stdlib, + erlfdb, + couch_log, + config, + fabric + ]} +]}. diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl new file mode 100644 index 000000000..d469ed41a --- /dev/null +++ b/src/couch_jobs/src/couch_jobs.erl @@ -0,0 +1,378 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs). + +-export([ + % Job creation + add/4, + add/5, + remove/3, + get_job_data/3, + get_job_state/3, + + % Job processing + accept/1, + accept/2, + finish/2, + finish/3, + resubmit/2, + resubmit/3, + is_resubmitted/1, + update/2, + update/3, + + % Subscriptions + subscribe/2, + subscribe/3, + unsubscribe/1, + wait/2, + wait/3, + + % Type timeouts + set_type_timeout/2, + clear_type_timeout/1, + get_type_timeout/1 +]). + + +-include("couch_jobs.hrl"). + + +-define(MIN_ACCEPT_WAIT_MSEC, 100). + + +%% Job Creation API + +-spec add(jtx(), job_type(), job_id(), job_data()) -> ok | {error, any()}. +add(Tx, Type, JobId, JobData) -> + add(Tx, Type, JobId, JobData, 0). + + +-spec add(jtx(), job_type(), job_id(), job_data(), scheduled_time()) -> + ok | {error, any()}. +add(Tx, Type, JobId, JobData, ScheduledTime) when is_binary(JobId), + is_map(JobData), is_integer(ScheduledTime) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs_fdb:add(JTx, Type, JobId, JobData, ScheduledTime) of + {ok, _, _, _} -> ok; + {error, Error} -> {error, Error} + end + end). + + +-spec remove(jtx(), job_type(), job_id()) -> ok | {error, any()}. +remove(Tx, Type, JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:remove(JTx, job(Type, JobId)) + end). + + +-spec get_job_data(jtx(), job_type(), job_id()) -> {ok, job_data()} | {error, + any()}. +get_job_data(Tx, Type, JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs_fdb:get_job_state_and_data(JTx, job(Type, JobId)) of + {ok, _Seq, _State, Data} -> + {ok, couch_jobs_fdb:decode_data(Data)}; + {error, Error} -> + {error, Error} + end + end). + + +-spec get_job_state(jtx(), job_type(), job_id()) -> {ok, job_state()} | {error, + any()}. +get_job_state(Tx, Type, JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs_fdb:get_job_state_and_data(JTx, job(Type, JobId)) of + {ok, _Seq, State, _Data} -> + {ok, State}; + {error, Error} -> + {error, Error} + end + end). + + +%% Job processor API + +-spec accept(job_type()) -> {ok, job(), job_data()} | {error, any()}. +accept(Type) -> + accept(Type, #{}). + + +-spec accept(job_type(), job_accept_opts()) -> {ok, job()} | {error, any()}. +accept(Type, #{} = Opts) -> + NoSched = maps:get(no_schedule, Opts, false), + MaxSchedTimeDefault = case NoSched of + true -> 0; + false -> ?UNDEFINED_MAX_SCHEDULED_TIME + end, + MaxSchedTime = maps:get(max_sched_time, Opts, MaxSchedTimeDefault), + Timeout = maps:get(timeout, Opts, infinity), + case NoSched andalso MaxSchedTime =/= 0 of + true -> + {error, no_schedule_require_0_max_sched_time}; + false -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout) + end. + + +-spec finish(jtx(), job()) -> ok | {error, any()}. +finish(Tx, Job) -> + finish(Tx, Job, undefined). + + +-spec finish(jtx(), job(), job_data()) -> ok | {error, any()}. +finish(Tx, #{jlock := <<_/binary>>} = Job, JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:finish(JTx, Job, JobData) + end). + + +-spec resubmit(jtx(), job()) -> {ok, job()} | {error, any()}. +resubmit(Tx, Job) -> + resubmit(Tx, Job, ?UNDEFINED_MAX_SCHEDULED_TIME). + + +-spec resubmit(jtx(), job(), scheduled_time()) -> {ok, job()} | {error, any()}. +resubmit(Tx, #{jlock := <<_/binary>>} = Job, SchedTime) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:resubmit(JTx, Job, SchedTime) + end). + + +-spec is_resubmitted(job()) -> true | false. +is_resubmitted(#{job := true} = Job) -> + maps:get(resubmit, Job, false). + + +-spec update(jtx(), job()) -> {ok, job()} | {error, any()}. +update(Tx, Job) -> + update(Tx, Job, undefined). + + +-spec update(jtx(), job(), job_data()) -> {ok, job()} | {error, any()}. +update(Tx, #{jlock := <<_/binary>>} = Job, JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:update(JTx, Job, JobData) + end). + + +%% Subscription API + +% Receive events as messages. Wait for them using `wait/2,3` +% functions. +% + +-spec subscribe(job_type(), job_id()) -> {ok, job_subscription(), job_state(), + job_data()} | {ok, finished, job_data()} | {error, any()}. +subscribe(Type, JobId) -> + subscribe(undefined, Type, JobId). + + +-spec subscribe(jtx(), job_type(), job_id()) -> {ok, job_subscription(), + job_state(), job_data()} | {ok, finished, job_data()} | {error, any()}. +subscribe(Tx, Type, JobId) -> + StateData = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + Job = #{job => true, type => Type, id => JobId}, + couch_jobs_fdb:get_job_state_and_data(JTx, Job) + end), + case StateData of + {ok, _Seq, finished, Data} -> + {ok, finished, couch_jobs_fdb:decode_data(Data)}; + {ok, Seq, State, Data} -> + case couch_jobs_notifier:subscribe(Type, JobId, State, Seq) of + {ok, SubRef} -> + Data1 = couch_jobs_fdb:decode_data(Data), + {ok, SubRef, State, Data1}; + {error, Error} -> + {error, Error} + end; + {error, Error} -> + {error, Error} + end. + + +% Unsubscribe from getting notifications based on a particular subscription. +% Each subscription should be followed by its own unsubscription call. However, +% subscriber processes are also monitored and auto-unsubscribed if they exit. +% If subscribing process is exiting, calling this function is optional. +% +-spec unsubscribe(job_subscription()) -> ok. +unsubscribe({Server, Ref}) when is_pid(Server), is_reference(Ref) -> + try + couch_jobs_notifier:unsubscribe(Server, Ref) + after + flush_notifications(Ref) + end. + + +% Wait to receive job state updates +% +-spec wait(job_subscription() | [job_subscription()], timeout()) -> + {job_type(), job_id(), job_state(), job_data()} | timeout. +wait({_, Ref}, Timeout) -> + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data} -> + {Type, Id, State, couch_jobs_fdb:decode_data(Data)} + after + Timeout -> timeout + end; + +wait(Subs, Timeout) when is_list(Subs) -> + {Result, ResendQ} = wait_any(Subs, Timeout, []), + lists:foreach(fun(Msg) -> self() ! Msg end, ResendQ), + Result. + + +-spec wait(job_subscription() | [job_subscription()], job_state(), timeout()) + -> {job_type(), job_id(), job_state(), job_data()} | timeout. +wait({_, Ref} = Sub, State, Timeout) when is_atom(State) -> + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, MsgState, Data0} -> + case MsgState =:= State of + true -> + Data = couch_jobs_fdb:decode_data(Data0), + {Type, Id, State, Data}; + false -> + wait(Sub, State, Timeout) + end + after + Timeout -> timeout + end; + +wait(Subs, State, Timeout) when is_list(Subs), + is_atom(State) -> + {Result, ResendQ} = wait_any(Subs, State, Timeout, []), + lists:foreach(fun(Msg) -> self() ! Msg end, ResendQ), + Result. + + +%% Job type timeout API + +% These functions manipulate the activity timeout for each job type. + +-spec set_type_timeout(job_type(), timeout()) -> ok. +set_type_timeout(Type, Timeout) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:set_type_timeout(JTx, Type, Timeout) + end). + + +-spec clear_type_timeout(job_type()) -> ok. +clear_type_timeout(Type) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:clear_type_timeout(JTx, Type) + end). + + +-spec get_type_timeout(job_type()) -> timeout(). +get_type_timeout(Type) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:get_type_timeout(JTx, Type) + end). + + +%% Private utilities + +accept_loop(Type, NoSched, MaxSchedTime, Timeout) -> + TxFun = fun(JTx) -> + couch_jobs_fdb:accept(JTx, Type, MaxSchedTime, NoSched) + end, + case couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), TxFun) of + {ok, Job, Data} -> + {ok, Job, Data}; + {not_found, PendingWatch} -> + case wait_pending(PendingWatch, MaxSchedTime, Timeout) of + {error, not_found} -> + {error, not_found}; + ok -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout) + end + end. + + +job(Type, JobId) -> + #{job => true, type => Type, id => JobId}. + + +wait_pending(PendingWatch, _MaxSTime, 0) -> + erlfdb:cancel(PendingWatch, [flush]), + {error, not_found}; + +wait_pending(PendingWatch, MaxSTime, UserTimeout) -> + NowMSec = erlang:system_time(millisecond), + Timeout0 = max(?MIN_ACCEPT_WAIT_MSEC, MaxSTime * 1000 - NowMSec), + Timeout = min(limit_timeout(Timeout0), UserTimeout), + try + erlfdb:wait(PendingWatch, [{timeout, Timeout}]), + ok + catch + error:{timeout, _} -> + erlfdb:cancel(PendingWatch, [flush]), + {error, not_found} + end. + + +wait_any(Subs, Timeout0, ResendQ) when is_list(Subs) -> + Timeout = limit_timeout(Timeout0), + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data0} = Msg -> + case lists:keyfind(Ref, 2, Subs) of + false -> + wait_any(Subs, Timeout, [Msg | ResendQ]); + {_, Ref} -> + Data = couch_jobs_fdb:decode_data(Data0), + {{Type, Id, State, Data}, ResendQ} + end + after + Timeout -> {timeout, ResendQ} + end. + + +wait_any(Subs, State, Timeout0, ResendQ) when + is_list(Subs) -> + Timeout = limit_timeout(Timeout0), + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, MsgState, Data0} = Msg -> + case lists:keyfind(Ref, 2, Subs) of + false -> + wait_any(Subs, Timeout, [Msg | ResendQ]); + {_, Ref} -> + case MsgState =:= State of + true -> + Data = couch_jobs_fdb:decode_data(Data0), + {{Type, Id, State, Data}, ResendQ}; + false -> + wait_any(Subs, Timeout, ResendQ) + end + end + after + Timeout -> {timeout, ResendQ} + end. + + +limit_timeout(Timeout) when is_integer(Timeout), Timeout < 16#FFFFFFFF -> + Timeout; + +limit_timeout(_Timeout) -> + infinity. + + +flush_notifications(Ref) -> + receive + {?COUCH_JOBS_EVENT, Ref, _, _, _} -> + flush_notifications(Ref) + after + 0 -> ok + end. diff --git a/src/couch_jobs/src/couch_jobs.hrl b/src/couch_jobs/src/couch_jobs.hrl new file mode 100644 index 000000000..2a02d760f --- /dev/null +++ b/src/couch_jobs/src/couch_jobs.hrl @@ -0,0 +1,52 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +% Job map/json field definitions +% +-define(OPT_PRIORITY, <<"priority">>). +-define(OPT_DATA, <<"data">>). +-define(OPT_CANCEL, <<"cancel">>). +-define(OPT_RESUBMIT, <<"resubmit">>). + +% These might be in a fabric public hrl eventually +% +-define(uint2bin(I), binary:encode_unsigned(I, little)). +-define(bin2uint(I), binary:decode_unsigned(I, little)). +-define(UNSET_VS, {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF}). +-define(METADATA_VERSION_KEY, <<"$metadata_version_key$">>). + +% Data model definitions +% +-define(JOBS, 51). % coordinate with fabric2.hrl +-define(DATA, 1). +-define(PENDING, 2). +-define(WATCHES_PENDING, 3). +-define(WATCHES_ACTIVITY, 4). +-define(ACTIVITY_TIMEOUT, 5). +-define(ACTIVITY, 6). + + +-define(COUCH_JOBS_EVENT, '$couch_jobs_event'). +-define(COUCH_JOBS_CURRENT, '$couch_jobs_current'). +-define(UNDEFINED_MAX_SCHEDULED_TIME, 1 bsl 36). + + +-type jtx() :: map() | undefined | tuple(). +-type job_id() :: binary(). +-type job_type() :: tuple() | binary() | non_neg_integer(). +-type job() :: map(). +-type job_data() :: map() | undefined. +-type job_accept_opts() :: map(). +-type scheduled_time() :: non_neg_integer() | undefined. +-type job_state() :: running | pending | finished. +-type job_subscription() :: {pid(), reference()}. diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor.erl b/src/couch_jobs/src/couch_jobs_activity_monitor.erl new file mode 100644 index 000000000..ef82e6bd9 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_activity_monitor.erl @@ -0,0 +1,133 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_activity_monitor). + +-behaviour(gen_server). + + +-export([ + start_link/1 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + +-record(st, { + jtx, + type, + tref, + timeout = 0, + vs = not_found +}). + + +-define(MAX_JITTER_DEFAULT, 10000). +-define(MISSING_TIMEOUT_CHECK, 5000). + + +start_link(Type) -> + gen_server:start_link(?MODULE, [Type], []). + + +%% gen_server callbacks + +init([Type]) -> + St = #st{jtx = couch_jobs_fdb:get_jtx(), type = Type}, + {ok, schedule_check(St)}. + + +terminate(_, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(check_activity, St) -> + St1 = check_activity(St), + St2 = schedule_check(St1), + {noreply, St2}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Don't crash out couch_jobs_server and the whole application would need to + % eventually do proper cleanup in erlfdb:wait timeout code. + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +% Private helper functions + +check_activity(#st{jtx = JTx, type = Type, vs = not_found} = St) -> + NewVS = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_activity_vs(JTx1, Type) + end), + St#st{vs = NewVS}; + +check_activity(#st{jtx = JTx, type = Type, vs = VS} = St) -> + NewVS = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + NewVS = couch_jobs_fdb:get_activity_vs(JTx1, Type), + JobIds = couch_jobs_fdb:get_inactive_since(JTx1, Type, VS), + couch_jobs_fdb:re_enqueue_inactive(JTx1, Type, JobIds), + NewVS + end), + St#st{vs = NewVS}. + + +get_timeout_msec(JTx, Type) -> + TimeoutVal = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_type_timeout(JTx1, Type) + end), + case TimeoutVal of + not_found -> not_found; + ValSeconds -> timer:seconds(ValSeconds) + end. + + +schedule_check(#st{jtx = JTx, type = Type, timeout = OldTimeout} = St) -> + % Reset versionstamp if timeout changed. + St1 = case get_timeout_msec(JTx, Type) of + not_found -> + St#st{vs = not_found, timeout = ?MISSING_TIMEOUT_CHECK}; + OldTimeout -> + St; + NewTimeout -> + St#st{vs = not_found, timeout = NewTimeout} + end, + #st{timeout = Timeout} = St1, + MaxJitter = min(Timeout div 2, get_max_jitter_msec()), + Wait = Timeout + rand:uniform(max(1, MaxJitter)), + St1#st{tref = erlang:send_after(Wait, self(), check_activity)}. + + +get_max_jitter_msec()-> + config:get_integer("couch_jobs", "activity_monitor_max_jitter_msec", + ?MAX_JITTER_DEFAULT). diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl b/src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl new file mode 100644 index 000000000..b11161a24 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl @@ -0,0 +1,64 @@ +% +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_activity_monitor_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0, + + start_monitor/1, + stop_monitor/1, + get_child_pids/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +start_monitor(Type) -> + supervisor:start_child(?MODULE, [Type]). + + +stop_monitor(Pid) -> + supervisor:terminate_child(?MODULE, Pid). + + +get_child_pids() -> + lists:map(fun({_Id, Pid, _Type, _Mod}) -> + Pid + end, supervisor:which_children(?MODULE)). + + +init(_) -> + Flags = #{ + strategy => simple_one_for_one, + intensity => 10, + period => 3 + }, + Children = [ + #{ + id => couch_jobs_monitor, + restart => temporary, + start => {couch_jobs_activity_monitor, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_jobs/src/couch_jobs_app.erl b/src/couch_jobs/src/couch_jobs_app.erl new file mode 100644 index 000000000..720b94891 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_app.erl @@ -0,0 +1,26 @@ +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_app). + + +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_Type, []) -> + couch_jobs_sup:start_link(). + + +stop([]) -> + ok. diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl new file mode 100644 index 000000000..1317d03df --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -0,0 +1,679 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_fdb). + + +-export([ + add/5, + remove/2, + get_job_state_and_data/2, + get_jobs/2, + get_jobs/3, + + accept/4, + finish/3, + resubmit/3, + update/3, + + set_type_timeout/3, + clear_type_timeout/2, + get_type_timeout/2, + get_types/1, + + get_activity_vs/2, + get_activity_vs_and_watch/2, + get_active_since/3, + get_inactive_since/3, + re_enqueue_inactive/3, + + init_cache/0, + + encode_data/1, + decode_data/1, + + get_jtx/0, + get_jtx/1, + tx/2, + + get_job/2, + get_jobs/0 +]). + + +-include("couch_jobs.hrl"). + + +-record(jv, { + seq, + jlock, + stime, + resubmit, + data +}). + + +-define(JOBS_ETS_KEY, jobs). +-define(MD_TIMESTAMP_ETS_KEY, md_timestamp). +-define(MD_VERSION_MAX_AGE_SEC, 10). +-define(PENDING_SEQ, 0). + + +% Data model +% +% (?JOBS, ?DATA, Type, JobId) = (Sequence, Lock, SchedTime, Resubmit, JobData) +% (?JOBS, ?PENDING, Type, ScheduledTime, JobId) = "" +% (?JOBS, ?WATCHES_PENDING, Type) = Counter +% (?JOBS, ?WATCHES_ACTIVITY, Type) = Sequence +% (?JOBS, ?ACTIVITY_TIMEOUT, Type) = ActivityTimeout +% (?JOBS, ?ACTIVITY, Type, Sequence) = JobId +% +% In the ?DATA row Sequence can have these values: +% 0 - when the job is pending +% null - when the job is finished +% Versionstamp - when the job is running + + +% Job creation API + +add(#{jtx := true} = JTx0, Type, JobId, Data, STime) -> + #{tx := Tx} = JTx = get_jtx(JTx0), + Job = #{job => true, type => Type, id => JobId}, + case get_type_timeout(JTx, Type) of + not_found -> + {error, no_type_timeout}; + Int when is_integer(Int) -> + Key = job_key(JTx, Job), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + <<_/binary>> -> + {ok, Job1} = resubmit(JTx, Job, STime), + #{seq := Seq, state := State, data := Data1} = Job1, + {ok, State, Seq, Data1}; + not_found -> + try + maybe_enqueue(JTx, Type, JobId, STime, true, Data), + {ok, pending, ?PENDING_SEQ, Data} + catch + error:{json_encoding_error, Error} -> + {error, {json_encoding_error, Error}} + end + end + end. + + +remove(#{jtx := true} = JTx0, #{job := true} = Job) -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{type := Type, id := JobId} = Job, + Key = job_key(JTx, Job), + case get_job_val(Tx, Key) of + #jv{stime = STime} -> + couch_jobs_pending:remove(JTx, Type, JobId, STime), + erlfdb:clear(Tx, Key), + ok; + not_found -> + {error, not_found} + end. + + +get_job_state_and_data(#{jtx := true} = JTx, #{job := true} = Job) -> + case get_job_val(get_jtx(JTx), Job) of + #jv{seq = Seq, jlock = JLock, data = Data} -> + {ok, Seq, job_state(JLock, Seq), Data}; + not_found -> + {error, not_found} + end. + + +get_jobs(JTx, Type) -> + get_jobs(JTx, Type, fun(_) -> true end). + + +get_jobs(#{jtx := true} = JTx, Type, Filter) when is_function(Filter, 1) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?DATA, Type}, Jobs), + Opts = [{streaming_mode, want_all}], + Result = erlfdb:wait(erlfdb:get_range_startswith(Tx, Prefix, Opts)), + lists:foldl(fun({K, V}, #{} = Acc) -> + {JobId} = erlfdb_tuple:unpack(K, Prefix), + case Filter(JobId) of + true -> + {Seq, JLock, _, _, Data} = erlfdb_tuple:unpack(V), + Acc#{JobId => {Seq, job_state(JLock, Seq), Data}}; + false -> + Acc + end + end, #{}, Result). + + +% Job processor API + +accept(#{jtx := true} = JTx0, Type, MaxSTime, NoSched) + when is_integer(MaxSTime), is_boolean(NoSched) -> + #{jtx := true, tx := Tx} = JTx = get_jtx(JTx0), + case couch_jobs_pending:dequeue(JTx, Type, MaxSTime, NoSched) of + {not_found, PendingWatch} -> + {not_found, PendingWatch}; + {ok, JobId} -> + JLock = fabric2_util:uuid(), + Key = job_key(JTx, Type, JobId), + JV0 = get_job_val(Tx, Key), + #jv{jlock = null, data = Data} = JV0, + JV = JV0#jv{seq = ?UNSET_VS, jlock = JLock, resubmit = false}, + set_job_val(Tx, Key, JV), + update_activity(JTx, Type, JobId, null, Data), + Job = #{ + job => true, + type => Type, + id => JobId, + jlock => JLock + }, + {ok, Job, decode_data(Data)} + end. + + +finish(#{jtx := true} = JTx0, #{jlock := <<_/binary>>} = Job, Data) when + is_map(Data) orelse Data =:= undefined -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{type := Type, jlock := JLock, id := JobId} = Job, + case get_job_or_halt(Tx, job_key(JTx, Job), JLock) of + #jv{seq = Seq, stime = STime, resubmit = Resubmit, data = OldData} -> + NewData = case Data =:= undefined of + true -> OldData; + false -> Data + end, + try maybe_enqueue(JTx, Type, JobId, STime, Resubmit, NewData) of + ok -> + clear_activity(JTx, Type, Seq), + update_watch(JTx, Type) + catch + error:{json_encoding_error, Error} -> + {error, {json_encoding_error, Error}} + end; + halt -> + {error, halt} + end. + + +resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime) -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{type := Type, id := JobId} = Job, + Key = job_key(JTx, Job), + case get_job_val(Tx, Key) of + #jv{seq = Seq, jlock = JLock, stime = OldSTime, data = Data} = JV -> + STime = case NewSTime =:= undefined of + true -> OldSTime; + false -> NewSTime + end, + case job_state(JLock, Seq) of + finished -> + ok = maybe_enqueue(JTx, Type, JobId, STime, true, Data), + Job1 = Job#{ + seq => ?PENDING_SEQ, + state => pending, + data => Data + }, + {ok, Job1}; + pending -> + JV1 = JV#jv{seq = ?PENDING_SEQ, stime = STime}, + set_job_val(Tx, Key, JV1), + couch_jobs_pending:remove(JTx, Type, JobId, OldSTime), + couch_jobs_pending:enqueue(JTx, Type, STime, JobId), + Job1 = Job#{ + stime => STime, + seq => ?PENDING_SEQ, + state => pending, + data => Data + }, + {ok, Job1}; + running -> + JV1 = JV#jv{stime = STime, resubmit = true}, + set_job_val(Tx, Key, JV1), + {ok, Job#{resubmit => true, stime => STime, + state => running, seq => Seq, data => Data}} + end; + not_found -> + {error, not_found} + end. + + +update(#{jtx := true} = JTx0, #{jlock := <<_/binary>>} = Job, Data0) when + is_map(Data0) orelse Data0 =:= undefined -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{jlock := JLock, type := Type, id := JobId} = Job, + Key = job_key(JTx, Job), + case get_job_or_halt(Tx, Key, JLock) of + #jv{seq = Seq, stime = STime, resubmit = Resubmit} = JV0 -> + Data = case Data0 =:= undefined of + true -> JV0#jv.data; + false -> Data0 + end, + JV = JV0#jv{seq = ?UNSET_VS, data = Data}, + try set_job_val(Tx, Key, JV) of + ok -> + update_activity(JTx, Type, JobId, Seq, Data), + {ok, Job#{resubmit => Resubmit, stime => STime}} + catch + error:{json_encoding_error, Error} -> + {error, {json_encoding_error, Error}} + end; + halt -> + {error, halt} + end. + + +% Type and activity monitoring API + +set_type_timeout(#{jtx := true} = JTx, Type, Timeout) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT, Type}, Jobs), + Val = erlfdb_tuple:pack({Timeout}), + erlfdb:set(Tx, Key, Val). + + +clear_type_timeout(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT, Type}, Jobs), + erlfdb:clear(Tx, Key). + + +get_type_timeout(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT, Type}, Jobs), + case erlfdb:wait(erlfdb:get_ss(Tx, Key)) of + not_found -> + not_found; + Val -> + {Timeout} = erlfdb_tuple:unpack(Val), + Timeout + end. + + +get_types(#{jtx := true} = JTx) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT}, Jobs), + Opts = [{streaming_mode, want_all}], + Result = erlfdb:wait(erlfdb:get_range_startswith(Tx, Prefix, Opts)), + lists:map(fun({K, _V}) -> + {Type} = erlfdb_tuple:unpack(K, Prefix), + Type + end, Result). + + +get_activity_vs(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?WATCHES_ACTIVITY, Type}, Jobs), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> + not_found; + Val -> + {VS} = erlfdb_tuple:unpack(Val), + VS + end. + + +get_activity_vs_and_watch(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?WATCHES_ACTIVITY, Type}, Jobs), + Future = erlfdb:get(Tx, Key), + Watch = erlfdb:watch(Tx, Key), + case erlfdb:wait(Future) of + not_found -> + {not_found, Watch}; + Val -> + {VS} = erlfdb_tuple:unpack(Val), + {VS, Watch} + end. + + +get_active_since(#{jtx := true} = JTx, Type, Versionstamp) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?ACTIVITY}, Jobs), + StartKey = erlfdb_tuple:pack({Type, Versionstamp}, Prefix), + StartKeySel = erlfdb_key:first_greater_or_equal(StartKey), + {_, EndKey} = erlfdb_tuple:range({Type}, Prefix), + Opts = [{streaming_mode, want_all}], + Future = erlfdb:get_range(Tx, StartKeySel, EndKey, Opts), + maps:from_list(lists:map(fun({_K, V}) -> + erlfdb_tuple:unpack(V) + end, erlfdb:wait(Future))). + + +get_inactive_since(#{jtx := true} = JTx, Type, Versionstamp) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?ACTIVITY}, Jobs), + {StartKey, _} = erlfdb_tuple:range({Type}, Prefix), + EndKey = erlfdb_tuple:pack({Type, Versionstamp}, Prefix), + EndKeySel = erlfdb_key:first_greater_than(EndKey), + Opts = [{streaming_mode, want_all}], + Future = erlfdb:get_range(Tx, StartKey, EndKeySel, Opts), + lists:map(fun({_K, V}) -> + {JobId, _} = erlfdb_tuple:unpack(V), + JobId + end, erlfdb:wait(Future)). + + +re_enqueue_inactive(#{jtx := true} = JTx, Type, JobIds) when is_list(JobIds) -> + #{tx := Tx} = get_jtx(JTx), + lists:foreach(fun(JobId) -> + case get_job_val(Tx, job_key(JTx, Type, JobId)) of + #jv{seq = Seq, stime = STime, data = Data} -> + clear_activity(JTx, Type, Seq), + maybe_enqueue(JTx, Type, JobId, STime, true, Data); + not_found -> + ok + end + end, JobIds), + case length(JobIds) > 0 of + true -> update_watch(JTx, Type); + false -> ok + end. + + +% Cache initialization API. Called from the supervisor just to create the ETS +% table. It returns `ignore` to tell supervisor it won't actually start any +% process, which is what we want here. +% +init_cache() -> + ConcurrencyOpts = [{read_concurrency, true}, {write_concurrency, true}], + ets:new(?MODULE, [public, named_table] ++ ConcurrencyOpts), + ignore. + + +% Functions to encode / decode JobData +% +encode_data(#{} = JobData) -> + try + jiffy:encode(JobData) + catch + throw:{error, Error} -> + % legacy clause since new versions of jiffy raise error instead + error({json_encoding_error, Error}); + error:{error, Error} -> + error({json_encoding_error, Error}) + end. + + +decode_data(#{} = JobData) -> + JobData; + +decode_data(<<_/binary>> = JobData) -> + jiffy:decode(JobData, [return_maps]). + + +% Cached job transaction object. This object wraps a transaction, caches the +% directory lookup path, and the metadata version. The function can be used +% from inside or outside the transaction. When used from a transaction it will +% verify if the metadata was changed, and will refresh automatically. +% +get_jtx() -> + get_jtx(undefined). + + +get_jtx(#{tx := Tx} = _TxDb) -> + get_jtx(Tx); + +get_jtx(undefined = _Tx) -> + case ets:lookup(?MODULE, ?JOBS_ETS_KEY) of + [{_, #{} = JTx}] -> + JTx; + [] -> + JTx = update_jtx_cache(init_jtx(undefined)), + JTx#{tx := undefined} + end; + +get_jtx({erlfdb_transaction, _} = Tx) -> + case ets:lookup(?MODULE, ?JOBS_ETS_KEY) of + [{_, #{} = JTx}] -> + ensure_current(JTx#{tx := Tx}); + [] -> + update_jtx_cache(init_jtx(Tx)) + end. + + +% Transaction processing to be used with couch jobs' specific transaction +% contexts +% +tx(#{jtx := true} = JTx, Fun) when is_function(Fun, 1) -> + fabric2_fdb:transactional(JTx, Fun). + + +% Debug and testing API + +get_job(Type, JobId) -> + fabric2_fdb:transactional(fun(Tx) -> + JTx = init_jtx(Tx), + case get_job_val(Tx, job_key(JTx, Type, JobId)) of + #jv{seq = Seq, jlock = JLock} = JV -> + #{ + job => true, + type => Type, + id => JobId, + seq => Seq, + jlock => JLock, + stime => JV#jv.stime, + resubmit => JV#jv.resubmit, + data => decode_data(JV#jv.data), + state => job_state(JLock, Seq) + }; + not_found -> + not_found + end + end). + + +get_jobs() -> + fabric2_fdb:transactional(fun(Tx) -> + #{jobs_path := Jobs} = init_jtx(Tx), + Prefix = erlfdb_tuple:pack({?DATA}, Jobs), + Opts = [{streaming_mode, want_all}], + Result = erlfdb:wait(erlfdb:get_range_startswith(Tx, Prefix, Opts)), + lists:map(fun({K, V}) -> + {Type, JobId} = erlfdb_tuple:unpack(K, Prefix), + {Seq, JLock, _, _, Data} = erlfdb_tuple:unpack(V), + JobState = job_state(JLock, Seq), + {Type, JobId, JobState, decode_data(Data)} + end, Result) + end). + + +% Private helper functions + +maybe_enqueue(#{jtx := true} = JTx, Type, JobId, STime, Resubmit, Data) -> + #{tx := Tx} = JTx, + Key = job_key(JTx, Type, JobId), + JV = #jv{ + seq = null, + jlock = null, + stime = STime, + resubmit = false, + data = Data + }, + case Resubmit of + true -> + set_job_val(Tx, Key, JV#jv{seq = ?PENDING_SEQ}), + couch_jobs_pending:enqueue(JTx, Type, STime, JobId); + false -> + set_job_val(Tx, Key, JV) + end, + ok. + + +job_key(#{jtx := true, jobs_path := Jobs}, Type, JobId) -> + erlfdb_tuple:pack({?DATA, Type, JobId}, Jobs). + + +job_key(JTx, #{type := Type, id := JobId}) -> + job_key(JTx, Type, JobId). + + +get_job_val(#{jtx := true, tx := Tx} = JTx, #{job := true} = Job) -> + get_job_val(Tx, job_key(JTx, Job)); + +get_job_val(Tx = {erlfdb_transaction, _}, Key) -> + case erlfdb:wait(erlfdb:get(Tx, Key)) of + <<_/binary>> = Val -> + {Seq, JLock, STime, Resubmit, Data} = erlfdb_tuple:unpack(Val), + #jv{ + seq = Seq, + jlock = JLock, + stime = STime, + resubmit = Resubmit, + data = Data + }; + not_found -> + not_found + end. + + +set_job_val(Tx = {erlfdb_transaction, _}, Key, #jv{} = JV) -> + #jv{ + seq = Seq, + jlock = JLock, + stime = STime, + resubmit = Resubmit, + data = Data0 + } = JV, + Data = case Data0 of + #{} -> encode_data(Data0); + <<_/binary>> -> Data0 + end, + case Seq of + ?UNSET_VS -> + Val = erlfdb_tuple:pack_vs({Seq, JLock, STime, Resubmit, Data}), + erlfdb:set_versionstamped_value(Tx, Key, Val); + _Other -> + Val = erlfdb_tuple:pack({Seq, JLock, STime, Resubmit, Data}), + erlfdb:set(Tx, Key, Val) + end, + ok. + + +get_job_or_halt(Tx, Key, JLock) -> + case get_job_val(Tx, Key) of + #jv{jlock = CurJLock} when CurJLock =/= JLock -> + halt; + #jv{} = Res -> + Res; + not_found -> + halt + end. + + +update_activity(#{jtx := true} = JTx, Type, JobId, Seq, Data0) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + case Seq =/= null of + true -> clear_activity(JTx, Type, Seq); + false -> ok + end, + Key = erlfdb_tuple:pack_vs({?ACTIVITY, Type, ?UNSET_VS}, Jobs), + Data = case Data0 of + #{} -> encode_data(Data0); + <<_/binary>> -> Data0 + end, + Val = erlfdb_tuple:pack({JobId, Data}), + erlfdb:set_versionstamped_key(Tx, Key, Val), + update_watch(JTx, Type). + + +clear_activity(#{jtx := true} = JTx, Type, Seq) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?ACTIVITY, Type, Seq}, Jobs), + erlfdb:clear(Tx, Key). + + +update_watch(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?WATCHES_ACTIVITY, Type}, Jobs), + Val = erlfdb_tuple:pack_vs({?UNSET_VS}), + erlfdb:set_versionstamped_value(Tx, Key, Val), + ok. + + +job_state(JLock, Seq) -> + case {JLock, Seq} of + {null, null} -> finished; + {JLock, _} when JLock =/= null -> running; + {null, Seq} when Seq =/= null -> pending + end. + + +% This a transaction context object similar to the Db = #{} one from +% fabric2_fdb. It's is used to cache the jobs path directory (to avoid extra +% lookups on every operation) and to check for metadata changes (in case +% directory changes). +% +init_jtx(undefined) -> + fabric2_fdb:transactional(fun(Tx) -> init_jtx(Tx) end); + +init_jtx({erlfdb_transaction, _} = Tx) -> + Root = erlfdb_directory:root(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + LayerPrefix = erlfdb_directory:get_name(CouchDB), + Jobs = erlfdb_tuple:pack({?JOBS}, LayerPrefix), + Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), + % layer_prefix, md_version and tx here match db map fields in fabric2_fdb + % but we also assert that this is a job transaction using the jtx => true + % field + #{ + jtx => true, + tx => Tx, + layer_prefix => LayerPrefix, + jobs_path => Jobs, + md_version => Version + }. + + +ensure_current(#{jtx := true, tx := Tx} = JTx) -> + case get(?COUCH_JOBS_CURRENT) of + Tx -> + JTx; + _ -> + JTx1 = update_current(JTx), + put(?COUCH_JOBS_CURRENT, Tx), + JTx1 + end. + + +update_current(#{tx := Tx, md_version := Version} = JTx) -> + case get_md_version_age(Version) of + Age when Age =< ?MD_VERSION_MAX_AGE_SEC -> + % Looked it up not too long ago. Avoid looking it up to frequently + JTx; + _ -> + case erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) of + Version -> + update_md_version_timestamp(Version), + JTx; + _NewVersion -> + update_jtx_cache(init_jtx(Tx)) + end + end. + + +update_jtx_cache(#{jtx := true, md_version := Version} = JTx) -> + CachedJTx = JTx#{tx := undefined}, + ets:insert(?MODULE, {?JOBS_ETS_KEY, CachedJTx}), + update_md_version_timestamp(Version), + JTx. + + +get_md_version_age(Version) -> + Timestamp = case ets:lookup(?MODULE, ?MD_TIMESTAMP_ETS_KEY) of + [{_, Version, Ts}] -> Ts; + _ -> 0 + end, + erlang:system_time(second) - Timestamp. + + +update_md_version_timestamp(Version) -> + Ts = erlang:system_time(second), + ets:insert(?MODULE, {?MD_TIMESTAMP_ETS_KEY, Version, Ts}). diff --git a/src/couch_jobs/src/couch_jobs_notifier.erl b/src/couch_jobs/src/couch_jobs_notifier.erl new file mode 100644 index 000000000..1c554a0c0 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_notifier.erl @@ -0,0 +1,285 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_notifier). + +-behaviour(gen_server). + + +-export([ + start_link/1, + subscribe/4, + unsubscribe/2 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-include("couch_jobs.hrl"). + + +-define(TYPE_MONITOR_HOLDOFF_DEFAULT, 50). +-define(TYPE_MONITOR_TIMEOUT_DEFAULT, "infinity"). +-define(GET_JOBS_RANGE_RATIO, 0.5). + + +-record(st, { + jtx, + type, + monitor_pid, + subs, % #{JobId => #{Ref => {Pid, State, Seq}}} + pidmap, % #{{Jobid, Pid} => Ref} + refmap % #{Ref => JobId} +}). + + +start_link(Type) -> + gen_server:start_link(?MODULE, [Type], []). + + +subscribe(Type, JobId, State, Seq) -> + case couch_jobs_server:get_notifier_server(Type) of + {ok, Server} -> + CallArgs = {subscribe, JobId, State, Seq, self()}, + Ref = gen_server:call(Server, CallArgs, infinity), + {ok, {Server, Ref}}; + {error, Error} -> + {error, Error} + end. + + +unsubscribe(Server, Ref) when is_reference(Ref) -> + gen_server:call(Server, {unsubscribe, Ref, self()}, infinity). + + +init([Type]) -> + JTx = couch_jobs_fdb:get_jtx(), + St = #st{ + jtx = JTx, + type = Type, + subs = #{}, + pidmap = #{}, + refmap = #{} + }, + VS = get_type_vs(St), + HoldOff = get_holdoff(), + Timeout = get_timeout(), + Pid = couch_jobs_type_monitor:start(Type, VS, HoldOff, Timeout), + {ok, St#st{monitor_pid = Pid}}. + + +terminate(_, _St) -> + ok. + + +handle_call({subscribe, JobId, State, Seq, Pid}, _From, #st{} = St) -> + #st{pidmap = PidMap, refmap = RefMap} = St, + case maps:get({JobId, Pid}, PidMap, not_found) of + not_found -> + Ref = erlang:monitor(process, Pid), + St1 = update_sub(JobId, Ref, Pid, State, Seq, St), + St2 = St1#st{pidmap = PidMap#{{JobId, Pid} => Ref}}, + St3 = St2#st{refmap = RefMap#{Ref => JobId}}, + {reply, Ref, St3}; + Ref when is_reference(Ref) -> + St1 = update_sub(JobId, Ref, Pid, State, Seq, St), + {reply, Ref, St1} + end; + +handle_call({unsubscribe, Ref, Pid}, _From, #st{} = St) -> + {reply, ok, unsubscribe_int(Ref, Pid, St)}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info({type_updated, VS}, St) -> + VSMax = flush_type_updated_messages(VS), + {noreply, notify_subscribers(VSMax, St)}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Don't crash out couch_jobs_server and the whole application would need to + % eventually do proper cleanup in erlfdb:wait timeout code. + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info({'DOWN', Ref, process, Pid, _}, #st{} = St) -> + {noreply, unsubscribe_int(Ref, Pid, St)}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +update_subs(JobId, Refs, #st{subs = Subs} = St) when map_size(Refs) =:= 0 -> + St#st{subs = maps:remove(JobId, Subs)}; + +update_subs(JobId, Refs, #st{subs = Subs} = St) when map_size(Refs) > 0 -> + St#st{subs = Subs#{JobId => Refs}}. + + +update_sub(JobId, Ref, Pid, State, Seq, #st{subs = Subs} = St) -> + Refs = maps:get(JobId, Subs, #{}), + update_subs(JobId, Refs#{Ref => {Pid, State, Seq}}, St). + + +remove_sub(JobId, Ref, #st{subs = Subs} = St) -> + case maps:get(JobId, Subs, not_found) of + not_found -> St; + #{} = Refs -> update_subs(JobId, maps:remove(Ref, Refs), St) + end. + + +unsubscribe_int(Id, Ref, Pid, #st{pidmap = PidMap, refmap = RefMap} = St) -> + St1 = remove_sub(Id, Ref, St), + erlang:demonitor(Ref, [flush]), + St1#st{ + pidmap = maps:remove({Id, Pid}, PidMap), + refmap = maps:remove(Ref, RefMap) + }. + + +unsubscribe_int(Ref, Pid, #st{refmap = RefMap} = St) -> + case maps:get(Ref, RefMap, not_found) of + not_found -> St; + Id -> unsubscribe_int(Id, Ref, Pid, St) + end. + + +flush_type_updated_messages(VSMax) -> + receive + {type_updated, VS} -> + flush_type_updated_messages(max(VS, VSMax)) + after + 0 -> VSMax + end. + + +get_jobs(#st{jtx = JTx, type = Type}, InactiveIdMap, Ratio) + when Ratio >= ?GET_JOBS_RANGE_RATIO -> + Filter = fun(JobId) -> maps:is_key(JobId, InactiveIdMap) end, + JobMap = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_jobs(JTx1, Type, Filter) + end), + maps:map(fun(JobId, _) -> + case maps:is_key(JobId, JobMap) of + true -> maps:get(JobId, JobMap); + false -> {null, not_found, not_found} + end + end, InactiveIdMap); + +get_jobs(#st{jtx = JTx, type = Type}, InactiveIdMap, _) -> + couch_jobs_fdb:tx(JTx, fun(JTx1) -> + maps:map(fun(JobId, _) -> + Job = #{job => true, type => Type, id => JobId}, + case couch_jobs_fdb:get_job_state_and_data(JTx1, Job) of + {ok, Seq, State, Data} -> + {Seq, State, Data}; + {error, not_found} -> + {null, not_found, not_found} + end + end, InactiveIdMap) + end). + + +get_type_vs(#st{jtx = JTx, type = Type}) -> + couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_activity_vs(JTx1, Type) + end). + + +% "Active since" is the set of jobs that have been active (running) +% and updated at least once since the given versionstamp. These are relatively +% cheap to find as it's just a range read in the ?ACTIVITY subspace. +% +get_active_since(#st{} = _St, not_found) -> + #{}; + +get_active_since(#st{jtx = JTx, type = Type, subs = Subs}, VS) -> + AllUpdated = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_active_since(JTx1, Type, VS) + end), + maps:map(fun(_JobId, Data) -> + {VS, running, Data} + end, maps:with(maps:keys(Subs), AllUpdated)). + + +notify_subscribers(_, #st{subs = Subs} = St) when map_size(Subs) =:= 0 -> + St; + +notify_subscribers(ActiveVS, #st{} = St1) -> + % First gather the easy (cheap) active jobs. Then with those out of way + % inspect each job to get its state. + Active = get_active_since(St1, ActiveVS), + St2 = notify_job_ids(Active, St1), + ActiveIds = maps:keys(Active), + Subs = St2#st.subs, + InactiveIdMap = maps:without(ActiveIds, Subs), + InactiveRatio = maps:size(InactiveIdMap) / maps:size(Subs), + Inactive = get_jobs(St2, InactiveIdMap, InactiveRatio), + notify_job_ids(Inactive, St2). + + +notify_job_ids(#{} = Jobs, #st{type = Type} = St0) -> + maps:fold(fun(Id, {VS, State, Data}, #st{} = StAcc) -> + DoUnsub = lists:member(State, [finished, not_found]), + maps:fold(fun + (_Ref, {_Pid, running, OldVS}, St) when State =:= running, + OldVS >= VS -> + St; + (Ref, {Pid, running, OldVS}, St) when State =:= running, + OldVS < VS -> + % For running state send updates even if state doesn't change + notify(Pid, Ref, Type, Id, State, Data), + update_sub(Id, Ref, Pid, running, VS, St); + (_Ref, {_Pid, OldState, _VS}, St) when OldState =:= State -> + St; + (Ref, {Pid, _State, _VS}, St) -> + notify(Pid, Ref, Type, Id, State, Data), + case DoUnsub of + true -> unsubscribe_int(Id, Ref, Pid, St); + false -> update_sub(Id, Ref, Pid, State, VS, St) + end + end, StAcc, maps:get(Id, StAcc#st.subs, #{})) + end, St0, Jobs). + + +notify(Pid, Ref, Type, Id, State, Data) -> + Pid ! {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data}. + + +get_holdoff() -> + config:get_integer("couch_jobs", "type_monitor_holdoff_msec", + ?TYPE_MONITOR_HOLDOFF_DEFAULT). + + +get_timeout() -> + Default = ?TYPE_MONITOR_TIMEOUT_DEFAULT, + case config:get("couch_jobs", "type_monitor_timeout_msec", Default) of + "infinity" -> infinity; + Milliseconds -> list_to_integer(Milliseconds) + end. diff --git a/src/couch_jobs/src/couch_jobs_notifier_sup.erl b/src/couch_jobs/src/couch_jobs_notifier_sup.erl new file mode 100644 index 000000000..81d93493b --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_notifier_sup.erl @@ -0,0 +1,64 @@ +% +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_notifier_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0, + + start_notifier/1, + stop_notifier/1, + get_child_pids/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +start_notifier(Type) -> + supervisor:start_child(?MODULE, [Type]). + + +stop_notifier(Pid) -> + supervisor:terminate_child(?MODULE, Pid). + + +get_child_pids() -> + lists:map(fun({_Id, Pid, _Type, _Mod}) -> + Pid + end, supervisor:which_children(?MODULE)). + + +init(_) -> + Flags = #{ + strategy => simple_one_for_one, + intensity => 10, + period => 3 + }, + Children = [ + #{ + id => couch_jobs_notifier, + restart => temporary, + start => {couch_jobs_notifier, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_jobs/src/couch_jobs_pending.erl b/src/couch_jobs/src/couch_jobs_pending.erl new file mode 100644 index 000000000..ab53c59d1 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_pending.erl @@ -0,0 +1,143 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_pending). + + +-export([ + enqueue/4, + dequeue/4, + remove/4 +]). + + +-include("couch_jobs.hrl"). + + +-define(RANGE_LIMIT, 1024). + + +enqueue(#{jtx := true} = JTx, Type, STime, JobId) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?PENDING, Type, STime, JobId}, Jobs), + erlfdb:set(Tx, Key, <<>>), + WatchKey = erlfdb_tuple:pack({?WATCHES_PENDING, Type}, Jobs), + erlfdb:add(Tx, WatchKey, 1), + ok. + + +dequeue(#{jtx := true} = JTx, Type, _, true) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Prefix = erlfdb_tuple:pack({?PENDING, Type, 0}, Jobs), + case get_random_item(Tx, Prefix) of + {error, not_found} -> + {not_found, get_pending_watch(JTx, Type)}; + {ok, PendingKey} -> + erlfdb:clear(Tx, PendingKey), + {JobId} = erlfdb_tuple:unpack(PendingKey, Prefix), + {ok, JobId} + end; + +dequeue(#{jtx := true} = JTx, Type, MaxPriority, _) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Prefix = erlfdb_tuple:pack({?PENDING, Type}, Jobs), + StartKeySel = erlfdb_key:first_greater_than(Prefix), + End = erlfdb_tuple:pack({MaxPriority, <<16#FF>>}, Prefix), + EndKeySel = erlfdb_key:first_greater_or_equal(End), + case clear_random_key_from_range(Tx, StartKeySel, EndKeySel) of + {error, not_found} -> + {not_found, get_pending_watch(JTx, Type)}; + {ok, PendingKey} -> + {_, JobId} = erlfdb_tuple:unpack(PendingKey, Prefix), + {ok, JobId} + end. + + +remove(#{jtx := true} = JTx, Type, JobId, STime) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?PENDING, Type, STime, JobId}, Jobs), + erlfdb:clear(Tx, Key). + + +%% Private functions + + +% Pick a random item from the range without reading the keys in first. However +% the constraint it that IDs should looks like random UUIDs +get_random_item(Tx, Prefix) -> + Id = fabric2_util:uuid(), + Snapshot = erlfdb:snapshot(Tx), + % Try to be fair and switch evently between trying ids before or after the + % randomly generated one. Otherwise, trying before first, will leave a lot + % of <<"fff...">> IDs in the queue for too long and trying "after" first + % will leave a lot of <"00...">> ones waiting. + case rand:uniform() > 0.5 of + true -> + case get_after(Snapshot, Prefix, Id) of + {error, not_found} -> get_before(Snapshot, Prefix, Id); + {ok, Key} -> {ok, Key} + end; + false -> + case get_before(Snapshot, Prefix, Id) of + {error, not_found} -> get_after(Snapshot, Prefix, Id); + {ok, Key} -> {ok, Key} + end + end. + + +get_before(Snapshot, Prefix, Id) -> + KSel = erlfdb_key:last_less_or_equal(erlfdb_tuple:pack({Id}, Prefix)), + PrefixSize = byte_size(Prefix), + case erlfdb:wait(erlfdb:get_key(Snapshot, KSel)) of + <> = Key -> {ok, Key}; + _ -> {error, not_found} + end. + + +get_after(Snapshot, Prefix, Id) -> + KSel = erlfdb_key:first_greater_or_equal(erlfdb_tuple:pack({Id}, Prefix)), + PrefixSize = byte_size(Prefix), + case erlfdb:wait(erlfdb:get_key(Snapshot, KSel)) of + <> = Key -> {ok, Key}; + _ -> {error, not_found} + end. + + +% Pick a random key from the range snapshot. Then radomly pick a key to clear. +% Before clearing, ensure there is a read conflict on the key in in case other +% workers have picked the same key. +% +clear_random_key_from_range(Tx, Start, End) -> + Opts = [ + {limit, ?RANGE_LIMIT}, + {snapshot, true} + ], + case erlfdb:wait(erlfdb:get_range(Tx, Start, End, Opts)) of + [] -> + {error, not_found}; + [{Key, _}] -> + erlfdb:add_read_conflict_key(Tx, Key), + erlfdb:clear(Tx, Key), + {ok, Key}; + [{_, _} | _] = KVs -> + Index = rand:uniform(length(KVs)), + {Key, _} = lists:nth(Index, KVs), + erlfdb:add_read_conflict_key(Tx, Key), + erlfdb:clear(Tx, Key), + {ok, Key} + end. + + +get_pending_watch(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = couch_jobs_fdb:get_jtx(JTx), + Key = erlfdb_tuple:pack({?WATCHES_PENDING, Type}, Jobs), + erlfdb:watch(Tx, Key). diff --git a/src/couch_jobs/src/couch_jobs_server.erl b/src/couch_jobs/src/couch_jobs_server.erl new file mode 100644 index 000000000..2e03c7dcf --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_server.erl @@ -0,0 +1,193 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_server). + +-behaviour(gen_server). + + +-export([ + start_link/0, + get_notifier_server/1, + force_check_types/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-define(TYPE_CHECK_PERIOD_DEFAULT, 15000). +-define(MAX_JITTER_DEFAULT, 5000). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, nil, []). + + +get_notifier_server(Type) -> + case get_type_pid_refs(Type) of + {{_, _}, {NotifierPid, _}} -> + {ok, NotifierPid}; + not_found -> + force_check_types(), + case get_type_pid_refs(Type) of + {{_, _}, {NotifierPid, _}} -> + {ok, NotifierPid}; + not_found -> + {error, not_found} + end + end. + + +force_check_types() -> + gen_server:call(?MODULE, check_types, infinity). + + +init(_) -> + % If couch_jobs_server is after the notifiers and activity supervisor. If + % it restart, there could be some stale notifier or activity monitors. Kill + % those as later on we'd start new ones anyway. + reset_monitors(), + reset_notifiers(), + ets:new(?MODULE, [protected, named_table]), + check_types(), + schedule_check(), + {ok, nil}. + + +terminate(_, _St) -> + ok. + + +handle_call(check_types, _From, St) -> + check_types(), + {reply, ok, St}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(check_types, St) -> + check_types(), + schedule_check(), + {noreply, St}; + +handle_info({'DOWN', _Ref, process, Pid, Reason}, St) -> + LogMsg = "~p : process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unexpected_process_exit, Pid, Reason}, St}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Don't crash out couch_jobs_server and the whole application would need to + % eventually do proper cleanup in erlfdb:wait timeout code. + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +check_types() -> + FdbTypes = fdb_types(), + EtsTypes = ets_types(), + ToStart = FdbTypes -- EtsTypes, + ToStop = EtsTypes -- FdbTypes, + lists:foreach(fun(Type) -> start_monitors(Type) end, ToStart), + lists:foreach(fun(Type) -> stop_monitors(Type) end, ToStop). + + +start_monitors(Type) -> + MonPidRef = case couch_jobs_activity_monitor_sup:start_monitor(Type) of + {ok, Pid1} -> {Pid1, monitor(process, Pid1)}; + {error, Error1} -> error({failed_to_start_monitor, Type, Error1}) + end, + NotifierPidRef = case couch_jobs_notifier_sup:start_notifier(Type) of + {ok, Pid2} -> {Pid2, monitor(process, Pid2)}; + {error, Error2} -> error({failed_to_start_notifier, Type, Error2}) + end, + ets:insert_new(?MODULE, {Type, MonPidRef, NotifierPidRef}). + + +stop_monitors(Type) -> + {{MonPid, MonRef}, {NotifierPid, NotifierRef}} = get_type_pid_refs(Type), + ok = couch_jobs_activity_monitor_sup:stop_monitor(MonPid), + demonitor(MonRef, [flush]), + ok = couch_jobs_notifier_sup:stop_notifier(NotifierPid), + demonitor(NotifierRef, [flush]), + ets:delete(?MODULE, Type). + + +reset_monitors() -> + lists:foreach(fun(Pid) -> + couch_jobs_activity_monitor_sup:stop_monitor(Pid) + end, couch_jobs_activity_monitor_sup:get_child_pids()). + + +reset_notifiers() -> + lists:foreach(fun(Pid) -> + couch_jobs_notifier_sup:stop_notifier(Pid) + end, couch_jobs_notifier_sup:get_child_pids()). + + +get_type_pid_refs(Type) -> + case ets:lookup(?MODULE, Type) of + [{_, MonPidRef, NotifierPidRef}] -> {MonPidRef, NotifierPidRef}; + [] -> not_found + end. + + +ets_types() -> + lists:flatten(ets:match(?MODULE, {'$1', '_', '_'})). + + +fdb_types() -> + try + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:get_types(JTx) + end) + catch + error:{timeout, _} -> + couch_log:warning("~p : Timed out connecting to FDB", [?MODULE]), + [] + end. + + +schedule_check() -> + Timeout = get_period_msec(), + MaxJitter = max(Timeout div 2, get_max_jitter_msec()), + Wait = Timeout + rand:uniform(max(1, MaxJitter)), + erlang:send_after(Wait, self(), check_types). + + +get_period_msec() -> + config:get_integer("couch_jobs", "type_check_period_msec", + ?TYPE_CHECK_PERIOD_DEFAULT). + + +get_max_jitter_msec() -> + config:get_integer("couch_jobs", "type_check_max_jitter_msec", + ?MAX_JITTER_DEFAULT). diff --git a/src/couch_jobs/src/couch_jobs_sup.erl b/src/couch_jobs/src/couch_jobs_sup.erl new file mode 100644 index 000000000..d79023777 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_sup.erl @@ -0,0 +1,66 @@ +% +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +init([]) -> + Flags = #{ + strategy => rest_for_one, + intensity => 3, + period => 10 + }, + Children = [ + #{ + id => couch_jobs_fdb, + restart => transient, + start => {couch_jobs_fdb, init_cache, []} + }, + #{ + id => couch_jobs_activity_monitor_sup, + restart => permanent, + shutdown => brutal_kill, + type => supervisor, + start => {couch_jobs_activity_monitor_sup, start_link, []} + }, + #{ + id => couch_jobs_notifier_sup, + restart => permanent, + shutdown => brutal_kill, + type => supervisor, + start => {couch_jobs_notifier_sup, start_link, []} + }, + #{ + id => couch_jobs_server, + restart => permanent, + shutdown => brutal_kill, + start => {couch_jobs_server, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_jobs/src/couch_jobs_type_monitor.erl b/src/couch_jobs/src/couch_jobs_type_monitor.erl new file mode 100644 index 000000000..562a866da --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_type_monitor.erl @@ -0,0 +1,84 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_type_monitor). + + +-export([ + start/4 +]). + + +-include("couch_jobs.hrl"). + + +-record(st, { + jtx, + type, + vs, + parent, + timestamp, + holdoff, + timeout +}). + + +start(Type, VS, HoldOff, Timeout) -> + Parent = self(), + spawn_link(fun() -> + loop(#st{ + jtx = couch_jobs_fdb:get_jtx(), + type = Type, + vs = VS, + parent = Parent, + timestamp = 0, + holdoff = HoldOff, + timeout = Timeout + }) + end). + + +loop(#st{vs = VS, timeout = Timeout} = St) -> + {St1, Watch} = case get_vs_and_watch(St) of + {VS1, W} when VS1 =/= VS -> {notify(St#st{vs = VS1}), W}; + {VS, W} -> {St, W} + end, + try + erlfdb:wait(Watch, [{timeout, Timeout}]) + catch + error:{erlfdb_error, 1009} -> + erlfdb:cancel(Watch, [flush]), + ok; + error:{timeout, _} -> + erlfdb:cancel(Watch, [flush]), + ok + end, + loop(St1). + + +notify(#st{} = St) -> + #st{holdoff = HoldOff, parent = Pid, timestamp = Ts, vs = VS} = St, + Now = erlang:system_time(millisecond), + case Now - Ts of + Dt when Dt < HoldOff -> + timer:sleep(max(HoldOff - Dt, 0)); + _ -> + ok + end, + Pid ! {type_updated, VS}, + St#st{timestamp = Now}. + + +get_vs_and_watch(#st{jtx = JTx, type = Type}) -> + couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_activity_vs_and_watch(JTx1, Type) + end). diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl new file mode 100644 index 000000000..a7e085e40 --- /dev/null +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -0,0 +1,606 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +% Job creation API can take an undefined Tx object +% in that case it will start its own transaction +-define(TX, undefined). + + +couch_jobs_basic_test_() -> + { + "Test couch jobs basics", + { + setup, + fun setup_couch/0, fun teardown_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun add_remove_pending/1, + fun add_remove_errors/1, + fun get_job_data_and_state/1, + fun resubmit_as_job_creator/1, + fun type_timeouts_and_server/1, + fun dead_notifier_restarts_jobs_server/1, + fun bad_messages_restart_couch_jobs_server/1, + fun bad_messages_restart_notifier/1, + fun bad_messages_restart_activity_monitor/1, + fun basic_accept_and_finish/1, + fun accept_blocking/1, + fun job_processor_update/1, + fun resubmit_enqueues_job/1, + fun resubmit_custom_schedtime/1, + fun accept_max_schedtime/1, + fun accept_no_schedule/1, + fun subscribe/1, + fun subscribe_wait_multiple/1, + fun enqueue_inactive/1, + fun remove_running_job/1, + fun check_get_jobs/1, + fun use_fabric_transaction_object/1 + ] + } + } + }. + + +setup_couch() -> + test_util:start_couch([fabric]). + + +teardown_couch(Ctx) -> + test_util:stop_couch(Ctx), + meck:unload(). + + +setup() -> + application:start(couch_jobs), + clear_jobs(), + T1 = {<<"t1">>, 1024}, % a complex type should work + T2 = 42, % a number should work as well + T1Timeout = 2, + T2Timeout = 3, + couch_jobs:set_type_timeout(T1, T1Timeout), + couch_jobs:set_type_timeout(T2, T2Timeout), + #{ + t1 => T1, + t2 => T2, + t1_timeout => T1Timeout, + j1 => <<"j1">>, + j2 => <<"j2">>, + dbname => ?tempdb() + }. + + +teardown(#{dbname := DbName}) -> + clear_jobs(), + application:stop(couch_jobs), + AllDbs = fabric2_db:list_dbs(), + case lists:member(DbName, AllDbs) of + true -> ok = fabric2_db:delete(DbName, []); + false -> ok + end, + meck:unload(). + + +clear_jobs() -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + #{jobs_path := Jobs, tx := Tx} = JTx, + erlfdb:clear_range_startswith(Tx, Jobs) + end). + + +restart_app() -> + application:stop(couch_jobs), + application:start(couch_jobs), + couch_jobs_server:force_check_types(). + + +get_job(Type, JobId) -> + couch_jobs_fdb:get_job(Type, JobId). + + +add_remove_pending(#{t1 := T1, j1 := J1, t2 := T2, j2 := J2}) -> + ?_test(begin + ?assertEqual(ok, couch_jobs:add(?TX, T1, J1, #{})), + ?assertMatch(#{state := pending, data := #{}}, get_job(T1, J1)), + ?assertEqual(ok, couch_jobs:remove(?TX, T1, J1)), + % Data and numeric type should work as well. Also do it in a + % transaction + Data = #{<<"x">> => 42}, + ?assertEqual(ok, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:add(Tx, T2, J2, Data) + end)), + ?assertMatch(#{state := pending, data := Data}, get_job(T2, J2)), + ?assertEqual(ok, couch_jobs:remove(?TX, T2, J2)) + end). + + +get_job_data_and_state(#{t1 := T, j1 := J}) -> + ?_test(begin + Data = #{<<"x">> => 42}, + ok = couch_jobs:add(?TX, T, J, Data), + ?assertEqual({ok, Data}, couch_jobs:get_job_data(?TX, T, J)), + ?assertEqual({ok, pending}, couch_jobs:get_job_state(?TX, T, J)), + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:get_job_data(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:get_job_state(?TX, T, J)) + end). + + +add_remove_errors(#{t1 := T, j1 := J}) -> + ?_test(begin + ?assertEqual({error, not_found}, couch_jobs:remove(?TX, 999, <<"x">>)), + ?assertMatch({error, {json_encoding_error, _}}, couch_jobs:add(?TX, T, + J, #{1 => 2})), + ?assertEqual({error, no_type_timeout}, couch_jobs:add(?TX, <<"x">>, J, + #{})), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{})), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{})), + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)) + end). + + +resubmit_as_job_creator(#{t1 := T, j1 := J}) -> + ?_test(begin + Data = #{<<"x">> => 42}, + ok = couch_jobs:add(?TX, T, J, Data, 15), + + % Job was pending, doesn't get resubmitted + ok = couch_jobs:add(?TX, T, J, Data, 16), + ?assertMatch(#{state := pending, stime := 16}, get_job(T, J)), + + {ok, Job1, Data} = couch_jobs:accept(T), + + % If is running, it gets flagged to be resubmitted + ok = couch_jobs:add(?TX, T, J, Data, 17), + ?assertMatch(#{state := running, stime := 17}, get_job(T, J)), + ?assertEqual(true, couch_jobs:is_resubmitted(get_job(T, J))), + + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + % It should be pending according to the resubmit flag + ?assertMatch(#{state := pending, stime := 17}, get_job(T, J)), + + % A finished job will be re-enqueued + {ok, Job2, _} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job2)), + ?assertMatch(#{state := finished, stime := 17}, get_job(T, J)), + ok = couch_jobs:add(?TX, T, J, Data, 18), + ?assertMatch(#{state := pending, stime := 18}, get_job(T, J)) + end). + + +type_timeouts_and_server(#{t1 := T, t1_timeout := T1Timeout}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + ?assertEqual(T1Timeout, couch_jobs:get_type_timeout(T)), + + ?assertEqual(2, + length(couch_jobs_activity_monitor_sup:get_child_pids())), + ?assertEqual(2, length(couch_jobs_notifier_sup:get_child_pids())), + ?assertMatch({ok, _}, couch_jobs_server:get_notifier_server(T)), + + ?assertEqual(ok, couch_jobs:set_type_timeout(<<"t3">>, 8)), + couch_jobs_server:force_check_types(), + ?assertEqual(3, + length(couch_jobs_activity_monitor_sup:get_child_pids())), + ?assertEqual(3, length(couch_jobs_notifier_sup:get_child_pids())), + + ?assertEqual(ok, couch_jobs:clear_type_timeout(<<"t3">>)), + couch_jobs_server:force_check_types(), + ?assertEqual(2, + length(couch_jobs_activity_monitor_sup:get_child_pids())), + ?assertEqual(2, + length(couch_jobs_notifier_sup:get_child_pids())), + ?assertMatch({error, _}, + couch_jobs_server:get_notifier_server(<<"t3">>)), + + ?assertEqual(not_found, couch_jobs:get_type_timeout(<<"t3">>)) + end). + + +dead_notifier_restarts_jobs_server(#{}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + ServerPid = whereis(couch_jobs_server), + Ref = monitor(process, ServerPid), + + [Notifier1, _Notifier2] = couch_jobs_notifier_sup:get_child_pids(), + exit(Notifier1, kill), + + % Killing a notifier should kill the server as well + receive {'DOWN', Ref, _, _, _} -> ok end + end). + + +bad_messages_restart_couch_jobs_server(#{}) -> + ?_test(begin + % couch_jobs_server dies on bad cast + ServerPid1 = whereis(couch_jobs_server), + Ref1 = monitor(process, ServerPid1), + gen_server:cast(ServerPid1, bad_cast), + receive {'DOWN', Ref1, _, _, _} -> ok end, + + restart_app(), + + % couch_jobs_server dies on bad call + ServerPid2 = whereis(couch_jobs_server), + Ref2 = monitor(process, ServerPid2), + catch gen_server:call(ServerPid2, bad_call), + receive {'DOWN', Ref2, _, _, _} -> ok end, + + restart_app(), + + % couch_jobs_server dies on bad info + ServerPid3 = whereis(couch_jobs_server), + Ref3 = monitor(process, ServerPid3), + ServerPid3 ! a_random_message, + receive {'DOWN', Ref3, _, _, _} -> ok end, + + restart_app() + end). + + +bad_messages_restart_notifier(#{}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + % bad cast kills the activity monitor + [AMon1, _] = couch_jobs_notifier_sup:get_child_pids(), + Ref1 = monitor(process, AMon1), + gen_server:cast(AMon1, bad_cast), + receive {'DOWN', Ref1, _, _, _} -> ok end, + + restart_app(), + + % bad calls restart activity monitor + [AMon2, _] = couch_jobs_notifier_sup:get_child_pids(), + Ref2 = monitor(process, AMon2), + catch gen_server:call(AMon2, bad_call), + receive {'DOWN', Ref2, _, _, _} -> ok end, + + restart_app(), + + % bad info message kills activity monitor + [AMon3, _] = couch_jobs_notifier_sup:get_child_pids(), + Ref3 = monitor(process, AMon3), + AMon3 ! a_bad_message, + receive {'DOWN', Ref3, _, _, _} -> ok end, + + + restart_app() + end). + + +bad_messages_restart_activity_monitor(#{}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + % bad cast kills the activity monitor + [AMon1, _] = couch_jobs_activity_monitor_sup:get_child_pids(), + Ref1 = monitor(process, AMon1), + gen_server:cast(AMon1, bad_cast), + receive {'DOWN', Ref1, _, _, _} -> ok end, + + restart_app(), + + % bad calls restart activity monitor + [AMon2, _] = couch_jobs_activity_monitor_sup:get_child_pids(), + Ref2 = monitor(process, AMon2), + catch gen_server:call(AMon2, bad_call), + receive {'DOWN', Ref2, _, _, _} -> ok end, + + restart_app(), + + % bad info message kills activity monitor + [AMon3, _] = couch_jobs_activity_monitor_sup:get_child_pids(), + Ref3 = monitor(process, AMon3), + AMon3 ! a_bad_message, + receive {'DOWN', Ref3, _, _, _} -> ok end, + + restart_app() + end). + + +basic_accept_and_finish(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job, #{}} = couch_jobs:accept(T), + ?assertMatch(#{state := running}, get_job(T, J)), + % check json validation for bad data in finish + ?assertMatch({error, {json_encoding_error, _}}, + fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:finish(Tx, Job, #{1 => 1}) + end)), + Data = #{<<"x">> => 42}, + ?assertEqual(ok, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:finish(Tx, Job, Data) + end)), + ?assertMatch(#{state := finished, data := Data}, get_job(T, J)) + end). + + +accept_blocking(#{t1 := T, j1 := J1, j2 := J2}) -> + ?_test(begin + Accept = fun() -> exit(couch_jobs:accept(T)) end, + WaitAccept = fun(Ref) -> + receive + {'DOWN', Ref, _, _, Res} -> Res + after + 500 -> timeout + end + end, + {_, Ref1} = spawn_monitor(Accept), + ok = couch_jobs:add(?TX, T, J1, #{}), + ?assertMatch({ok, #{id := J1}, #{}}, WaitAccept(Ref1)), + {_, Ref2} = spawn_monitor(Accept), + ?assertEqual(timeout, WaitAccept(Ref2)), + ok = couch_jobs:add(?TX, T, J2, #{}), + ?assertMatch({ok, #{id := J2}, #{}}, WaitAccept(Ref2)) + end). + + +job_processor_update(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job, #{}} = couch_jobs:accept(T), + + % Use proper transactions in a few places here instead of passing in + % ?TX This is mostly to increase code coverage + + ?assertMatch({ok, #{job := true}}, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job, #{<<"x">> => 1}) + end)), + + ?assertMatch(#{data := #{<<"x">> := 1}, state := running}, + get_job(T, J)), + + ?assertMatch({ok, #{job := true}}, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job) + end)), + + ?assertMatch(#{data := #{<<"x">> := 1}, state := running}, + get_job(T, J)), + + ?assertMatch({ok, #{job := true}}, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job, #{<<"x">> => 2}) + end)), + + % check json validation for bad data in update + ?assertMatch({error, {json_encoding_error, _}}, + fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job, #{1 => 1}) + end)), + + ?assertMatch(#{data := #{<<"x">> := 2}, state := running}, + get_job(T, J)), + + % Finish may update the data as well + ?assertEqual(ok, couch_jobs:finish(?TX, Job, #{<<"x">> => 3})), + ?assertMatch(#{data := #{<<"x">> := 3}, state := finished}, + get_job(T, J)) + end). + + +resubmit_enqueues_job(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job1, 6)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch(#{state := pending, stime := 6}, get_job(T, J)), + {ok, Job2, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job2)), + ?assertMatch(#{state := finished}, get_job(T, J)) + end). + + +resubmit_custom_schedtime(#{t1 := T, j1 := J}) -> + ?_test(begin + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{}, 7)), + {ok, Job, #{}} = couch_jobs:accept(T), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job, 9)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job)), + ?assertMatch(#{stime := 9, state := pending}, get_job(T, J)) + end). + + +accept_max_schedtime(#{t1 := T, j1 := J1, j2 := J2}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J1, #{}, 5000), + ok = couch_jobs:add(?TX, T, J2, #{}, 3000), + ?assertEqual({error, not_found}, couch_jobs:accept(T, + #{max_sched_time => 1000})), + ?assertMatch({ok, #{id := J2}, _}, couch_jobs:accept(T, + #{max_sched_time => 3000})), + ?assertMatch({ok, #{id := J1}, _}, couch_jobs:accept(T, + #{max_sched_time => 9000})) + end). + + +accept_no_schedule(#{t1 := T}) -> + ?_test(begin + JobCount = 25, + Jobs = [fabric2_util:uuid() || _ <- lists:seq(1, JobCount)], + [couch_jobs:add(?TX, T, J, #{}) || J <- Jobs], + InvalidOpts = #{no_schedule => true, max_sched_time => 1}, + ?assertMatch({error, _}, couch_jobs:accept(T, InvalidOpts)), + AcceptOpts = #{no_schedule => true}, + Accepted = [begin + {ok, #{id := J}, _} = couch_jobs:accept(T, AcceptOpts), + J + end || _ <- lists:seq(1, JobCount)], + ?assertEqual(lists:sort(Jobs), lists:sort(Accepted)) + end). + + +subscribe(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{<<"z">> => 1}), + + ?assertEqual({error, not_found}, couch_jobs:subscribe(<<"xyz">>, J)), + ?assertEqual({error, not_found}, couch_jobs:subscribe(T, <<"j5">>)), + + SubRes0 = couch_jobs:subscribe(T, J), + ?assertMatch({ok, {_, _}, pending, #{<<"z">> := 1}}, SubRes0), + {ok, SubId0, pending, _} = SubRes0, + + SubRes1 = couch_jobs:subscribe(T, J), + ?assertEqual(SubRes0, SubRes1), + + ?assertEqual(ok, couch_jobs:unsubscribe(SubId0)), + + SubRes = couch_jobs:subscribe(T, J), + ?assertMatch({ok, {_, _}, pending, #{<<"z">> := 1}}, SubRes), + {ok, SubId, pending, _} = SubRes, + + {ok, Job, _} = couch_jobs:accept(T), + ?assertMatch({T, J, running, #{<<"z">> := 1}}, + couch_jobs:wait(SubId, 5000)), + + % Make sure we get intermediate `running` updates + ?assertMatch({ok, _}, couch_jobs:update(?TX, Job, #{<<"z">> => 2})), + ?assertMatch({T, J, running, #{<<"z">> := 2}}, + couch_jobs:wait(SubId, 5000)), + + ?assertEqual(ok, couch_jobs:finish(?TX, Job, #{<<"z">> => 3})), + ?assertMatch({T, J, finished, #{<<"z">> := 3}}, + couch_jobs:wait(SubId, finished, 5000)), + + ?assertEqual(timeout, couch_jobs:wait(SubId, 50)), + + ?assertEqual({ok, finished, #{<<"z">> => 3}}, + couch_jobs:subscribe(T, J)), + + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:subscribe(T, J)) + end). + + +subscribe_wait_multiple(#{t1 := T, j1 := J1, j2 := J2}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J1, #{}), + ok = couch_jobs:add(?TX, T, J2, #{}), + + {ok, S1, pending, #{}} = couch_jobs:subscribe(T, J1), + {ok, S2, pending, #{}} = couch_jobs:subscribe(T, J2), + + Subs = [S1, S2], + + % Accept one job. Only one running update is expected. PJob1 and PJob2 + % do not necessarily correspond got Job1 and Job2, they could be + % accepted as Job2 and Job1 respectively. + {ok, PJob1, _} = couch_jobs:accept(T), + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + ?assertMatch(timeout, couch_jobs:wait(Subs, 50)), + + % Accept another job. Expect another update. + {ok, PJob2, _} = couch_jobs:accept(T), + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + ?assertMatch(timeout, couch_jobs:wait(Subs, 50)), + + ?assertMatch({ok, _}, couch_jobs:update(?TX, PJob1, #{<<"q">> => 5})), + ?assertMatch({ok, _}, couch_jobs:update(?TX, PJob2, #{<<"r">> => 6})), + + % Each job was updated once, expect two running updates. + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + + % Finish one job. Expect one finished update only. + ?assertEqual(ok, couch_jobs:finish(?TX, PJob1)), + + ?assertMatch({_, _, finished, #{<<"q">> := 5}}, + couch_jobs:wait(Subs, finished, 5000)), + ?assertMatch(timeout, couch_jobs:wait(Subs, finished, 50)), + + % Finish another job. However, unsubscribe should flush the + % the message and we should not get it. + ?assertEqual(ok, couch_jobs:finish(?TX, PJob2)), + ?assertEqual(ok, couch_jobs:unsubscribe(S1)), + ?assertEqual(ok, couch_jobs:unsubscribe(S2)), + ?assertMatch(timeout, couch_jobs:wait(Subs, finished, 50)) + end). + + +enqueue_inactive(#{t1 := T, j1 := J, t1_timeout := Timeout}) -> + {timeout, 10, ?_test(begin + couch_jobs_server:force_check_types(), + + ok = couch_jobs:add(?TX, T, J, #{<<"y">> => 1}), + {ok, Job, _} = couch_jobs:accept(T), + + {ok, SubId, running, #{<<"y">> := 1}} = couch_jobs:subscribe(T, J), + Wait = 3 * Timeout * 1000, + ?assertEqual({T, J, pending, #{<<"y">> => 1}}, + couch_jobs:wait(SubId, pending, Wait)), + ?assertMatch(#{state := pending}, get_job(T, J)), + + % After job was re-enqueued, old job processor can't update it anymore + ?assertEqual({error, halt}, couch_jobs:update(?TX, Job)), + ?assertEqual({error, halt}, couch_jobs:finish(?TX, Job)) + end)}. + + +remove_running_job(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job, _} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, halt}, couch_jobs:update(?TX, Job)), + ?assertEqual({error, halt}, couch_jobs:finish(?TX, Job)) + end). + + +check_get_jobs(#{t1 := T1, j1 := J1, t2 := T2, j2 := J2}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T1, J1, #{}), + ok = couch_jobs:add(?TX, T2, J2, #{}), + ?assertMatch([ + {T2, J2, pending, #{}}, + {T1, J1, pending, #{}} + ], lists:sort(couch_jobs_fdb:get_jobs())), + {ok, _, _} = couch_jobs:accept(T1), + ?assertMatch([ + {T2, J2, pending, #{}}, + {T1, J1, running, #{}} + ], lists:sort(couch_jobs_fdb:get_jobs())) + end). + + +use_fabric_transaction_object(#{t1 := T1, j1 := J1, dbname := DbName}) -> + ?_test(begin + {ok, Db} = fabric2_db:create(DbName, []), + ?assertEqual(ok, couch_jobs:add(Db, T1, J1, #{})), + ?assertMatch(#{state := pending, data := #{}}, get_job(T1, J1)), + {ok, Job, _} = couch_jobs:accept(T1), + ?assertEqual(ok, fabric2_fdb:transactional(Db, fun(Db1) -> + {ok, #{}} = couch_jobs:get_job_data(Db1, T1, J1), + Doc1 = #doc{id = <<"1">>, body = {[]}}, + {ok, {_, _}} = fabric2_db:update_doc(Db1, Doc1), + Doc2 = #doc{id = <<"2">>, body = {[]}}, + {ok, {_, _}} = fabric2_db:update_doc(Db1, Doc2), + couch_jobs:finish(Db1, Job, #{<<"d">> => 1}) + end)), + ok = couch_jobs:remove(#{tx => undefined}, T1, J1), + ok = fabric2_db:delete(DbName, []) + end). -- cgit v1.2.1 From 65b6fe3691ef5170756cb4b12f3b90fa2a5b2a5f Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 18 Jun 2019 15:32:13 -0500 Subject: Remove tests for deprecated features. Neither partitioned databases or shard splitting will exist in a FoundationDB layer. --- test/elixir/test/partition_all_docs_test.exs | 204 ------- test/elixir/test/partition_crud_test.exs | 369 ------------- test/elixir/test/partition_ddoc_test.exs | 179 ------- test/elixir/test/partition_design_docs_test.exs | 16 - test/elixir/test/partition_helpers.exs | 76 --- test/elixir/test/partition_mango_test.exs | 683 ------------------------ test/elixir/test/partition_size_limit_test.exs | 305 ----------- test/elixir/test/partition_size_test.exs | 361 ------------- test/elixir/test/partition_view_test.exs | 374 ------------- test/elixir/test/partition_view_update_test.exs | 160 ------ test/elixir/test/reshard_all_docs_test.exs | 79 --- test/elixir/test/reshard_basic_test.exs | 174 ------ test/elixir/test/reshard_changes_feed.exs | 81 --- test/elixir/test/reshard_helpers.exs | 114 ---- test/elixir/test/test_helper.exs | 2 - 15 files changed, 3177 deletions(-) delete mode 100644 test/elixir/test/partition_all_docs_test.exs delete mode 100644 test/elixir/test/partition_crud_test.exs delete mode 100644 test/elixir/test/partition_ddoc_test.exs delete mode 100644 test/elixir/test/partition_design_docs_test.exs delete mode 100644 test/elixir/test/partition_helpers.exs delete mode 100644 test/elixir/test/partition_mango_test.exs delete mode 100644 test/elixir/test/partition_size_limit_test.exs delete mode 100644 test/elixir/test/partition_size_test.exs delete mode 100644 test/elixir/test/partition_view_test.exs delete mode 100644 test/elixir/test/partition_view_update_test.exs delete mode 100644 test/elixir/test/reshard_all_docs_test.exs delete mode 100644 test/elixir/test/reshard_basic_test.exs delete mode 100644 test/elixir/test/reshard_changes_feed.exs delete mode 100644 test/elixir/test/reshard_helpers.exs diff --git a/test/elixir/test/partition_all_docs_test.exs b/test/elixir/test/partition_all_docs_test.exs deleted file mode 100644 index 816a8d6ed..000000000 --- a/test/elixir/test/partition_all_docs_test.exs +++ /dev/null @@ -1,204 +0,0 @@ -defmodule PartitionAllDocsTest do - use CouchTestCase - import PartitionHelpers - - @moduledoc """ - Test Partition functionality for for all_docs - """ - - setup_all do - db_name = random_db_name() - {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) - on_exit(fn -> delete_db(db_name) end) - - create_partition_docs(db_name) - - {:ok, [db_name: db_name]} - end - - test "all_docs with partitioned:true returns partitioned fields", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert Enum.dedup(partitions) == ["foo"] - - url = "/#{db_name}/_partition/bar/_all_docs" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert Enum.dedup(partitions) == ["bar"] - end - - test "partition all_docs errors with incorrect partition supplied", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/_bar/_all_docs" - resp = Couch.get(url) - assert resp.status_code == 400 - - url = "/#{db_name}/_partition//_all_docs" - resp = Couch.get(url) - assert resp.status_code == 400 - end - - test "partitioned _all_docs works with startkey, endkey range", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.get(url, query: %{start_key: "\"foo:12\"", end_key: "\"foo:2\""}) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert Enum.dedup(partitions) == ["foo"] - end - - test "partitioned _all_docs works with keys", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.post(url, body: %{keys: ["foo:2", "foo:4", "foo:6"]}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 3 - assert ids == ["foo:2", "foo:4", "foo:6"] - end - - test "partition _all_docs works with limit", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.get(url, query: %{limit: 5}) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert Enum.dedup(partitions) == ["foo"] - end - - test "partition _all_docs with descending", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.get(url, query: %{descending: true, limit: 5}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 5 - assert ids == ["foo:98", "foo:96", "foo:94", "foo:92", "foo:90"] - - resp = Couch.get(url, query: %{descending: false, limit: 5}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 5 - assert ids == ["foo:10", "foo:100", "foo:12", "foo:14", "foo:16"] - end - - test "partition _all_docs with skip", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.get(url, query: %{skip: 5, limit: 5}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 5 - assert ids == ["foo:18", "foo:2", "foo:20", "foo:22", "foo:24"] - end - - test "partition _all_docs with key", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_all_docs" - resp = Couch.get(url, query: %{key: "\"foo:22\""}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 1 - assert ids == ["foo:22"] - end - - test "partition all docs can set query limits", context do - set_config({"query_server_config", "partition_query_limit", "2000"}) - - db_name = context[:db_name] - create_partition_docs(db_name) - create_partition_ddoc(db_name) - - url = "/#{db_name}/_partition/foo/_all_docs" - - resp = - Couch.get( - url, - query: %{ - limit: 20 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 20 - - resp = Couch.get(url) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 50 - - resp = - Couch.get( - url, - query: %{ - limit: 2000 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 50 - - resp = - Couch.get( - url, - query: %{ - limit: 2001 - } - ) - - assert resp.status_code == 400 - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/Limit is too large/, reason) - - resp = - Couch.get( - url, - query: %{ - limit: 2000, - skip: 25 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 25 - end - - # This test is timing based so it could be a little flaky. - # If that turns out to be the case we should probably just skip it - @tag :pending - test "partition _all_docs with timeout", context do - set_config({"fabric", "partition_view_timeout", "1"}) - - db_name = context[:db_name] - create_partition_docs(db_name) - - retry_until(fn -> - url = "/#{db_name}/_partition/foo/_all_docs" - - case Couch.get(url) do - %{:body => %{"reason" => reason}} -> - Regex.match?(~r/not be processed in a reasonable amount of time./, reason) - - _ -> - false - end - end) - end -end diff --git a/test/elixir/test/partition_crud_test.exs b/test/elixir/test/partition_crud_test.exs deleted file mode 100644 index 7e32abbdc..000000000 --- a/test/elixir/test/partition_crud_test.exs +++ /dev/null @@ -1,369 +0,0 @@ -defmodule PartitionCrudTest do - use CouchTestCase - - @tag :with_partitioned_db - test "Sets partition in db info", context do - db_name = context[:db_name] - resp = Couch.get("/#{db_name}") - %{body: body} = resp - assert body["props"] == %{"partitioned" => true} - end - - @tag :with_partitioned_db - test "PUT and GET document", context do - db_name = context[:db_name] - id = "my-partition:doc" - url = "/#{db_name}/#{id}" - - resp = Couch.put(url, body: %{partitioned_doc: true}) - %{body: doc} = resp - assert resp.status_code in [201, 202] - assert doc["id"] == id - - resp = Couch.get(url) - assert resp.status_code == 200 - - %{body: doc} = resp - assert doc["_id"] == id - end - - @tag :with_partitioned_db - test "PUT fails if a partition key is not supplied", context do - db_name = context[:db_name] - id = "not-partitioned" - url = "/#{db_name}/#{id}" - - resp = Couch.put(url, body: %{partitioned_doc: false}) - assert resp.status_code == 400 - - error = %{ - "error" => "illegal_docid", - "reason" => "Doc id must be of form partition:id" - } - - assert Map.get(resp, :body) == error - end - - @tag :with_partitioned_db - test "PUT fails for partitions with _", context do - db_name = context[:db_name] - id = "_bad:partitioned" - url = "/#{db_name}/#{id}" - - resp = Couch.put(url, body: %{partitioned_doc: false}) - - error = %{ - "error" => "illegal_docid", - "reason" => "Only reserved document ids may start with underscore." - } - - assert resp.status_code == 400 - assert Map.get(resp, :body) == error - end - - @tag :with_partitioned_db - test "PUT fails for bad partitions", context do - db_name = context[:db_name] - id = "bad:" - url = "/#{db_name}/#{id}" - - resp = Couch.put(url, body: %{partitioned_doc: false}) - - error = %{ - "error" => "illegal_docid", - "reason" => "Document id must not be empty" - } - - assert resp.status_code == 400 - assert Map.get(resp, :body) == error - end - - @tag :with_partitioned_db - test "POST and GET document", context do - db_name = context[:db_name] - id = "my-partition-post:doc" - url = "/#{db_name}" - - resp = Couch.post(url, body: %{_id: id, partitioned_doc: true}) - assert resp.status_code in [201, 202] - - resp = Couch.get("#{url}/#{id}") - assert resp.status_code == 200 - - %{body: doc} = resp - assert doc["_id"] == id - end - - @tag :with_partitioned_db - test "GET to partition returns 400", context do - db_name = context[:db_name] - url = "/#{db_name}/_partition" - - resp = Couch.get("#{url}") - assert resp.status_code == 400 - end - - @tag :with_partitioned_db - test "POST and _bulk_get document", context do - db_name = context[:db_name] - id = "my-partition-post:doc" - url = "/#{db_name}" - - resp = Couch.post(url, body: %{_id: id, partitioned_doc: true}) - assert resp.status_code in [201, 202] - - resp = Couch.post("#{url}/_bulk_get", body: %{docs: [%{id: id}]}) - assert resp.status_code == 200 - - %{body: body} = resp - - assert %{ - "results" => [ - %{ - "docs" => [ - %{ - "ok" => %{ - "_id" => "my-partition-post:doc", - "_rev" => "1-43d86359741cb629c0953a2beb6e9d7a", - "partitioned_doc" => true - } - } - ], - "id" => "my-partition-post:doc" - } - ] - } == body - end - - @tag :with_partitioned_db - test "_bulk_get bad partitioned document", context do - db_name = context[:db_name] - id = "my-partition-post" - url = "/#{db_name}" - - resp = Couch.post("#{url}/_bulk_get", body: %{docs: [%{id: id}]}) - assert resp.status_code == 200 - %{:body => body} = resp - - assert %{ - "results" => [ - %{ - "docs" => [ - %{ - "error" => %{ - "error" => "illegal_docid", - "id" => "my-partition-post", - "reason" => "Doc id must be of form partition:id", - "rev" => :null - } - } - ], - "id" => "my-partition-post" - } - ] - } == body - end - - @tag :with_partitioned_db - test "POST fails if a partition key is not supplied", context do - db_name = context[:db_name] - id = "not-partitioned-post" - url = "/#{db_name}" - - resp = Couch.post(url, body: %{_id: id, partitited_doc: false}) - assert resp.status_code == 400 - end - - @tag :with_partitioned_db - test "_bulk_docs saves docs with partition key", context do - db_name = context[:db_name] - - docs = [ - %{_id: "foo:1"}, - %{_id: "bar:1"} - ] - - url = "/#{db_name}" - resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) - assert resp.status_code in [201, 202] - - resp = Couch.get("#{url}/foo:1") - assert resp.status_code == 200 - - resp = Couch.get("#{url}/bar:1") - assert resp.status_code == 200 - end - - @tag :with_partitioned_db - test "_bulk_docs errors with missing partition key", context do - db_name = context[:db_name] - - docs = [ - %{_id: "foo1"} - ] - - error = %{ - "error" => "illegal_docid", - "reason" => "Doc id must be of form partition:id" - } - - url = "/#{db_name}" - resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) - assert resp.status_code == 400 - assert Map.get(resp, :body) == error - end - - @tag :with_partitioned_db - test "_bulk_docs errors with bad partition key", context do - db_name = context[:db_name] - - docs = [ - %{_id: "_foo:1"} - ] - - error = %{ - "error" => "illegal_docid", - "reason" => "Only reserved document ids may start with underscore." - } - - url = "/#{db_name}" - resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) - assert resp.status_code == 400 - assert Map.get(resp, :body) == error - end - - @tag :with_partitioned_db - test "_bulk_docs errors with bad doc key", context do - db_name = context[:db_name] - - docs = [ - %{_id: "foo:"} - ] - - error = %{ - "error" => "illegal_docid", - "reason" => "Document id must not be empty" - } - - url = "/#{db_name}" - resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) - assert resp.status_code == 400 - assert Map.get(resp, :body) == error - end - - @tag :with_partitioned_db - test "saves attachment with partitioned doc", context do - db_name = context[:db_name] - id = "foo:doc-with-attachment" - - doc = %{ - _id: id, - _attachments: %{ - "foo.txt": %{ - content_type: "text/plain", - data: Base.encode64("This is a text document to save") - } - } - } - - resp = Couch.put("/#{db_name}/#{id}", body: doc) - - assert resp.status_code in [201, 202] - - resp = Couch.get("/#{db_name}/#{id}") - assert resp.status_code == 200 - body = Map.get(resp, :body) - rev = Map.get(body, "_rev") - - assert body["_attachments"] == %{ - "foo.txt" => %{ - "content_type" => "text/plain", - # "digest" => "md5-OW2BoZAtMqs1E+fAnLpNBw==", - # Temp remove the digest part since the digest value - # seems to be different on travis - "digest" => body["_attachments"]["foo.txt"]["digest"], - "length" => 31, - "revpos" => 1, - "stub" => true - } - } - - resp = Couch.get("/#{db_name}/#{id}/foo.txt") - assert Map.get(resp, :body) == "This is a text document to save" - - resp = - Couch.put( - "/#{db_name}/#{id}/bar.txt?rev=#{rev}", - headers: ["Content-Type": "text/plain"], - body: "This is another document" - ) - - assert resp.status_code in [201, 202] - %{:body => body} = resp - assert body["ok"] == true - assert body["id"] == id - end - - @tag :with_partitioned_db - test "can purge partitioned db docs", context do - db_name = context[:db_name] - - doc = %{ - _id: "foo:bar", - value: "some value" - } - - resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) - assert resp.status_code in [201, 202] - %{body: body} = resp - rev = body["rev"] - - resp = Couch.get("/#{db_name}/foo:bar") - assert resp.status_code == 200 - - body = %{"foo:bar" => [rev]} - resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) - assert resp.status_code in [201, 202] - - resp = Couch.get("/#{db_name}/foo:bar") - assert resp.status_code == 404 - assert resp.body == %{"error" => "not_found", "reason" => "missing"} - end - - @tag :with_partitioned_db - test "purge rejects unpartitioned docid", context do - db_name = context[:db_name] - body = %{"no_partition" => ["1-967a00dff5e02add41819138abb3284d"]} - resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) - assert resp.status_code == 400 - %{body: body} = resp - assert body["error"] == "illegal_docid" - end - - test "create database with bad `partitioned` value", _context do - resp = Couch.put("/bad-db?partitioned=tru") - assert resp.status_code == 400 - - assert Map.get(resp, :body) == %{ - "error" => "bad_request", - "reason" => "Invalid `partitioned` parameter" - } - end - - test "can create unpartitioned system db", _context do - Couch.delete("/_replicator") - resp = Couch.put("/_replicator") - assert resp.status_code in [201, 202] - assert resp.body == %{"ok" => true} - end - - test "cannot create partitioned system db", _context do - Couch.delete("/_replicator") - - resp = Couch.put("/_replicator?partitioned=true") - assert resp.status_code == 400 - - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/Cannot partition a system database/, reason) - end -end diff --git a/test/elixir/test/partition_ddoc_test.exs b/test/elixir/test/partition_ddoc_test.exs deleted file mode 100644 index 9fdfb9260..000000000 --- a/test/elixir/test/partition_ddoc_test.exs +++ /dev/null @@ -1,179 +0,0 @@ -defmodule PartitionDDocTest do - use CouchTestCase - - @moduledoc """ - Test partition design doc interactions - """ - - setup do - db_name = random_db_name() - {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) - on_exit(fn -> delete_db(db_name) end) - - {:ok, [db_name: db_name]} - end - - test "PUT /dbname/_design/foo", context do - db_name = context[:db_name] - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) - assert resp.status_code in [201, 202] - end - - test "PUT /dbname/_design/foo to update", context do - db_name = context[:db_name] - ddoc_id = "_design/foo" - - ddoc = %{ - _id: ddoc_id, - stuff: "here" - } - - resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) - assert resp.status_code in [201, 202] - %{body: body} = resp - - ddoc = Map.put(ddoc, :_rev, body["rev"]) - ddoc = Map.put(ddoc, :other, "attribute") - resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) - assert resp.status_code in [201, 202] - end - - test "PUT /dbname/_design/foo/readme.txt", context do - db_name = context[:db_name] - ddoc_id = "_design/foo" - - ddoc = %{ - _id: ddoc_id, - stuff: "here" - } - - resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) - assert resp.status_code in [201, 202] - %{body: body} = resp - - att = "This is a readme.txt" - - opts = [ - headers: [{:"Content-Type", "text/plain"}], - query: [rev: body["rev"]], - body: att - ] - - resp = Couch.put("/#{db_name}/#{ddoc_id}/readme.txt", opts) - assert resp.status_code in [201, 202] - end - - test "DELETE /dbname/_design/foo", context do - db_name = context[:db_name] - ddoc_id = "_design/foo" - - ddoc = %{ - _id: ddoc_id, - stuff: "here" - } - - resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) - assert resp.status_code in [201, 202] - %{body: body} = resp - - resp = Couch.delete("/#{db_name}/#{ddoc_id}", query: [rev: body["rev"]]) - assert resp.status_code == 200 - end - - test "POST /dbname with design doc", context do - db_name = context[:db_name] - body = %{_id: "_design/foo", stuff: "here"} - resp = Couch.post("/#{db_name}", body: body) - assert resp.status_code in [201, 202] - end - - test "POST /dbname/_bulk_docs with design doc", context do - db_name = context[:db_name] - body = %{:docs => [%{_id: "_design/foo", stuff: "here"}]} - resp = Couch.post("/#{db_name}/_bulk_docs", body: body) - assert resp.status_code in [201, 202] - end - - test "GET /dbname/_design/foo", context do - db_name = context[:db_name] - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) - assert resp.status_code in [201, 202] - - resp = Couch.get("/#{db_name}/_design/foo") - assert resp.status_code == 200 - end - - test "GET /dbname/_design/foo?rev=$rev", context do - db_name = context[:db_name] - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) - assert resp.status_code in [201, 202] - %{body: body} = resp - - resp = Couch.get("/#{db_name}/_design/foo", query: [rev: body["rev"]]) - assert resp.status_code == 200 - end - - test "GET /dbname/_bulk_get", context do - db_name = context[:db_name] - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) - assert resp.status_code in [201, 202] - - body = %{docs: [%{id: "_design/foo"}]} - resp = Couch.post("/#{db_name}/_bulk_get", body: body) - assert resp.status_code == 200 - %{body: body} = resp - - assert length(body["results"]) == 1 - - %{"results" => [%{"id" => "_design/foo", "docs" => [%{"ok" => _}]}]} = body - end - - test "GET /dbname/_bulk_get with rev", context do - db_name = context[:db_name] - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) - assert resp.status_code in [201, 202] - %{body: body} = resp - - body = %{docs: [%{id: "_design/foo", rev: body["rev"]}]} - resp = Couch.post("/#{db_name}/_bulk_get", body: body) - assert resp.status_code == 200 - %{body: body} = resp - - assert length(body["results"]) == 1 - %{"results" => [%{"id" => "_design/foo", "docs" => [%{"ok" => _}]}]} = body - end - - test "GET /dbname/_all_docs?key=$ddoc_id", context do - db_name = context[:db_name] - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}, query: [w: 3]) - assert resp.status_code in [201, 202] - - resp = Couch.get("/#{db_name}/_all_docs", query: [key: "\"_design/foo\""]) - assert resp.status_code == 200 - %{body: body} = resp - - assert length(body["rows"]) == 1 - assert %{"rows" => [%{"id" => "_design/foo"}]} = body - end - - @tag :skip_on_jenkins - test "GET /dbname/_design_docs", context do - db_name = context[:db_name] - - retry_until( - fn -> - resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) - assert resp.status_code in [201, 202] - - resp = Couch.get("/#{db_name}/_design_docs") - assert resp.status_code == 200 - %{body: body} = resp - - assert length(body["rows"]) == 1 - %{"rows" => [%{"id" => "_design/foo"}]} = body - end, - 500, - 10_000 - ) - end -end diff --git a/test/elixir/test/partition_design_docs_test.exs b/test/elixir/test/partition_design_docs_test.exs deleted file mode 100644 index 4ccd63fe0..000000000 --- a/test/elixir/test/partition_design_docs_test.exs +++ /dev/null @@ -1,16 +0,0 @@ -defmodule PartitionDesignDocsTest do - use CouchTestCase - - @moduledoc """ - Test Partition functionality for partition design docs - """ - - @tag :with_partitioned_db - test "/_partition/:pk/_design/doc 404", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/fakekey/_design/mrtest/_view/some" - resp = Couch.get(url) - assert resp.status_code == 404 - end -end diff --git a/test/elixir/test/partition_helpers.exs b/test/elixir/test/partition_helpers.exs deleted file mode 100644 index 3322ed7f5..000000000 --- a/test/elixir/test/partition_helpers.exs +++ /dev/null @@ -1,76 +0,0 @@ -defmodule PartitionHelpers do - use ExUnit.Case - - def create_partition_docs(db_name, pk1 \\ "foo", pk2 \\ "bar") do - docs = - for i <- 1..100 do - id = - if rem(i, 2) == 0 do - "#{pk1}:#{i}" - else - "#{pk2}:#{i}" - end - - group = - if rem(i, 3) == 0 do - "one" - else - "two" - end - - %{ - :_id => id, - :value => i, - :some => "field", - :group => group - } - end - - resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:w => 3, :docs => docs}) - assert resp.status_code in [201, 202] - end - - def create_partition_ddoc(db_name, opts \\ %{}) do - map_fn = """ - function(doc) { - if (doc.some) { - emit(doc.value, doc.some); - } - } - """ - - default_ddoc = %{ - views: %{ - some: %{ - map: map_fn - } - } - } - - ddoc = Enum.into(opts, default_ddoc) - - resp = Couch.put("/#{db_name}/_design/mrtest", body: ddoc) - assert resp.status_code in [201, 202] - assert Map.has_key?(resp.body, "ok") == true - end - - def get_ids(resp) do - %{:body => %{"rows" => rows}} = resp - Enum.map(rows, fn row -> row["id"] end) - end - - def get_partitions(resp) do - %{:body => %{"rows" => rows}} = resp - - Enum.map(rows, fn row -> - [partition, _] = String.split(row["id"], ":") - partition - end) - end - - def assert_correct_partition(partitions, correct_partition) do - assert Enum.all?(partitions, fn partition -> - partition == correct_partition - end) - end -end diff --git a/test/elixir/test/partition_mango_test.exs b/test/elixir/test/partition_mango_test.exs deleted file mode 100644 index 992999fb9..000000000 --- a/test/elixir/test/partition_mango_test.exs +++ /dev/null @@ -1,683 +0,0 @@ -defmodule PartitionMangoTest do - use CouchTestCase - import PartitionHelpers, except: [get_partitions: 1] - - @moduledoc """ - Test Partition functionality for mango - """ - def create_index(db_name, fields \\ ["some"], opts \\ %{}) do - default_index = %{ - index: %{ - fields: fields - } - } - - index = Enum.into(opts, default_index) - resp = Couch.post("/#{db_name}/_index", body: index) - - assert resp.status_code == 200 - assert resp.body["result"] == "created" - assert resp.body["id"] != nil - assert resp.body["name"] != nil - - # wait until the database reports the index as available - retry_until(fn -> - get_index(db_name, resp.body["id"], resp.body["name"]) != nil - end) - end - - def list_indexes(db_name) do - resp = Couch.get("/#{db_name}/_index") - assert resp.status_code == 200 - resp.body["indexes"] - end - - def get_index(db_name, ddocid, name) do - indexes = list_indexes(db_name) - Enum.find(indexes, fn(index) -> - match?(%{"ddoc" => ^ddocid, "name" => ^name}, index) - end) - end - - def get_partitions(resp) do - %{:body => %{"docs" => docs}} = resp - - Enum.map(docs, fn doc -> - [partition, _] = String.split(doc["_id"], ":") - partition - end) - end - - @tag :with_partitioned_db - test "query using _id and partition works", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - _id: %{ - "$gt": "foo:" - } - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - _id: %{ - "$lt": "foo:" - } - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "bar") - end - - @tag :with_partitioned_db - test "query using _id works for global and local query", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - _id: %{ - "$gt": 0 - } - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - _id: %{ - "$gt": 0 - } - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "bar") - end - - @tag :with_partitioned_db - test "query with partitioned:true using index and $eq", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_partition/bar/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "bar") - end - - @tag :with_partitioned_db - test "partitioned query using _all_docs with $eq", context do - db_name = context[:db_name] - create_partition_docs(db_name) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_partition/bar/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - }, - limit: 20 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 20 - assert_correct_partition(partitions, "bar") - end - - @tag :with_db - test "non-partitioned query using _all_docs and $eq", context do - db_name = context[:db_name] - create_partition_docs(db_name) - - url = "/#{db_name}/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - }, - skip: 40, - limit: 5 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert partitions == ["bar", "bar", "bar", "bar", "bar"] - - url = "/#{db_name}/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - }, - skip: 50, - limit: 5 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert partitions == ["foo", "foo", "foo", "foo", "foo"] - end - - @tag :with_partitioned_db - test "partitioned query using index and range scan", context do - db_name = context[:db_name] - create_partition_docs(db_name, "foo", "bar42") - create_index(db_name, ["value"]) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_partition/bar42/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert_correct_partition(partitions, "bar42") - end - - @tag :with_partitioned_db - test "partitioned query using _all_docs and range scan", context do - db_name = context[:db_name] - create_partition_docs(db_name) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_partition/bar/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert_correct_partition(partitions, "bar") - end - - @tag :with_partitioned_db - test "partitioned query using _all_docs", context do - db_name = context[:db_name] - create_partition_docs(db_name, "foo", "bar42") - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert_correct_partition(partitions, "foo") - - url = "/#{db_name}/_partition/bar42/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert_correct_partition(partitions, "bar42") - end - - @tag :with_partitioned_db - test "explain works with partitions", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name, ["some"]) - - url = "/#{db_name}/_partition/foo/_explain" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - %{:body => body} = resp - - assert body["index"]["name"] == "_all_docs" - assert body["mrargs"]["partition"] == "foo" - - url = "/#{db_name}/_partition/bar/_explain" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - } - } - ) - - %{:body => body} = resp - - assert body["index"]["def"] == %{"fields" => [%{"some" => "asc"}]} - assert body["mrargs"]["partition"] == "bar" - end - - @tag :with_db - test "explain works with non partitioned db", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name, ["some"]) - - url = "/#{db_name}/_explain" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - } - } - ) - - %{:body => body} = resp - - assert body["index"]["name"] == "_all_docs" - assert body["mrargs"]["partition"] == :null - - resp = - Couch.post( - url, - body: %{ - selector: %{ - some: "field" - } - } - ) - - %{:body => body} = resp - - assert body["index"]["def"] == %{"fields" => [%{"some" => "asc"}]} - assert body["mrargs"]["partition"] == :null - end - - @tag :with_partitioned_db - test "partitioned query using bookmarks", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name, ["value"]) - - url = "/#{db_name}/_partition/foo/_find" - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - }, - limit: 3 - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 3 - assert_correct_partition(partitions, "foo") - - %{:body => %{"bookmark" => bookmark}} = resp - - resp = - Couch.post( - url, - body: %{ - selector: %{ - value: %{ - "$gte": 6, - "$lt": 16 - } - }, - limit: 3, - bookmark: bookmark - } - ) - - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 2 - assert_correct_partition(partitions, "foo") - end - - @tag :with_partitioned_db - test "global query uses global index", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name, ["some"], %{partitioned: false}) - - url = "/#{db_name}/_explain" - - selector = %{ - selector: %{ - some: "field" - }, - limit: 100 - } - - resp = Couch.post(url, body: selector) - assert resp.status_code == 200 - %{:body => body} = resp - assert body["index"]["def"] == %{"fields" => [%{"some" => "asc"}]} - - url = "/#{db_name}/_find" - resp = Couch.post(url, body: selector) - assert resp.status_code == 200 - - partitions = get_partitions(resp) - assert length(partitions) == 100 - end - - @tag :with_partitioned_db - test "global query does not use partition index", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name, ["some"]) - - url = "/#{db_name}/_explain" - - selector = %{ - selector: %{ - some: "field" - }, - limit: 100 - } - - resp = Couch.post(url, body: selector) - %{:body => body} = resp - assert body["index"]["name"] == "_all_docs" - - url = "/#{db_name}/_find" - resp = Couch.post(url, body: selector) - - assert resp.status_code == 200 - - partitions = get_partitions(resp) - assert length(partitions) == 100 - end - - @tag :with_partitioned_db - test "partitioned query does not use global index", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_index(db_name, ["some"], %{partitioned: false}) - - url = "/#{db_name}/_partition/foo/_explain" - - selector = %{ - selector: %{ - some: "field" - }, - limit: 50 - } - - resp = Couch.post(url, body: selector) - assert resp.status_code == 200 - %{:body => body} = resp - assert body["index"]["name"] == "_all_docs" - - url = "/#{db_name}/_partition/foo/_find" - resp = Couch.post(url, body: selector) - assert resp.status_code == 200 - - partitions = get_partitions(resp) - assert length(partitions) == 50 - assert_correct_partition(partitions, "foo") - end - - @tag :with_partitioned_db - test "partitioned _find and _explain with missing partition returns 400", context do - db_name = context[:db_name] - - selector = %{ - selector: %{ - some: "field" - } - } - - resp = Couch.get("/#{db_name}/_partition/_find", body: selector) - validate_missing_partition(resp) - - resp = Couch.get("/#{db_name}/_partition/_explain", body: selector) - validate_missing_partition(resp) - end - - defp validate_missing_partition(resp) do - assert resp.status_code == 400 - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/Partition must not start/, reason) - end - - @tag :with_partitioned_db - test "partitioned query sends correct errors for sort errors", context do - db_name = context[:db_name] - create_partition_docs(db_name) - - url = "/#{db_name}/_partition/foo/_find" - - selector = %{ - selector: %{ - some: "field" - }, - sort: ["some"], - limit: 50 - } - - resp = Couch.post(url, body: selector) - assert resp.status_code == 400 - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/No partitioned index exists for this sort/, reason) - - url = "/#{db_name}/_find" - resp = Couch.post(url, body: selector) - assert resp.status_code == 400 - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/No global index exists for this sort/, reason) - end -end diff --git a/test/elixir/test/partition_size_limit_test.exs b/test/elixir/test/partition_size_limit_test.exs deleted file mode 100644 index 5141d0d8b..000000000 --- a/test/elixir/test/partition_size_limit_test.exs +++ /dev/null @@ -1,305 +0,0 @@ -defmodule PartitionSizeLimitTest do - use CouchTestCase - - @moduledoc """ - Test Partition size limit functionality - """ - - @max_size 10_240 - - setup do - db_name = random_db_name() - {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) - on_exit(fn -> delete_db(db_name) end) - - set_config({"couchdb", "max_partition_size", Integer.to_string(@max_size)}) - - {:ok, [db_name: db_name]} - end - - defp get_db_info(dbname) do - resp = Couch.get("/#{dbname}") - assert resp.status_code in [200, 202] - %{:body => body} = resp - body - end - - defp get_partition_info(dbname, partition) do - resp = Couch.get("/#{dbname}/_partition/#{partition}") - assert resp.status_code in [200, 202] - %{:body => body} = resp - body - end - - defp open_doc(db_name, docid, status_assert \\ [200, 202]) do - resp = Couch.get("/#{db_name}/#{docid}") - assert resp.status_code in status_assert - %{:body => body} = resp - body - end - - defp save_doc(db_name, doc, status_assert \\ [201, 202]) do - resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) - assert resp.status_code in status_assert - %{:body => body} = resp - body["rev"] - end - - defp delete_doc(db_name, doc, status_assert \\ [200, 202]) do - url = "/#{db_name}/#{doc["_id"]}" - rev = doc["_rev"] - resp = Couch.delete(url, query: [w: 3, rev: rev]) - assert resp.status_code in status_assert - %{:body => body} = resp - body["rev"] - end - - defp fill_partition(db_name, partition \\ "foo") do - docs = - 1..15 - |> Enum.map(fn i -> - id = i |> Integer.to_string() |> String.pad_leading(4, "0") - docid = "#{partition}:#{id}" - %{_id: docid, value: "0" |> String.pad_leading(1024)} - end) - - body = %{:w => 3, :docs => docs} - resp = Couch.post("/#{db_name}/_bulk_docs", body: body) - assert resp.status_code in [201, 202] - end - - defp compact(db) do - assert Couch.post("/#{db}/_compact").status_code == 202 - - retry_until( - fn -> - Couch.get("/#{db}").body["compact_running"] == false - end, - 200, - 20_000 - ) - end - - test "fill partition manually", context do - db_name = context[:db_name] - partition = "foo" - - resp = - 1..1000 - |> Enum.find_value(0, fn i -> - id = i |> Integer.to_string() |> String.pad_leading(4, "0") - docid = "#{partition}:#{id}" - doc = %{_id: docid, value: "0" |> String.pad_leading(1024)} - resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) - - if resp.status_code in [201, 202] do - false - else - resp - end - end) - - assert resp.status_code == 403 - %{body: body} = resp - assert body["error"] == "partition_overflow" - - info = get_partition_info(db_name, partition) - assert info["sizes"]["external"] >= @max_size - end - - test "full partitions reject POST /dbname", context do - db_name = context[:db_name] - fill_partition(db_name) - - doc = %{_id: "foo:bar", value: "stuff"} - resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) - assert resp.status_code == 403 - %{body: body} = resp - assert body["error"] == "partition_overflow" - end - - test "full partitions reject PUT /dbname/docid", context do - db_name = context[:db_name] - fill_partition(db_name) - - doc = %{value: "stuff"} - resp = Couch.put("/#{db_name}/foo:bar", query: [w: 3], body: doc) - assert resp.status_code == 403 - %{body: body} = resp - assert body["error"] == "partition_overflow" - end - - test "full partitions reject POST /dbname/_bulk_docs", context do - db_name = context[:db_name] - fill_partition(db_name) - - body = %{w: 3, docs: [%{_id: "foo:bar"}]} - resp = Couch.post("/#{db_name}/_bulk_docs", query: [w: 3], body: body) - assert resp.status_code in [201, 202] - %{body: body} = resp - doc_resp = Enum.at(body, 0) - assert doc_resp["error"] == "partition_overflow" - end - - test "full partitions with mixed POST /dbname/_bulk_docs", context do - db_name = context[:db_name] - fill_partition(db_name) - - body = %{w: 3, docs: [%{_id: "foo:bar"}, %{_id: "baz:bang"}]} - resp = Couch.post("/#{db_name}/_bulk_docs", query: [w: 3], body: body) - assert resp.status_code in [201, 202] - %{body: body} = resp - - doc_resp1 = Enum.at(body, 0) - assert doc_resp1["error"] == "partition_overflow" - - doc_resp2 = Enum.at(body, 1) - assert doc_resp2["ok"] - end - - test "full partitions are still readable", context do - db_name = context[:db_name] - fill_partition(db_name) - open_doc(db_name, "foo:0001") - end - - test "full partitions can accept deletes", context do - db_name = context[:db_name] - fill_partition(db_name) - - doc = open_doc(db_name, "foo:0001") - delete_doc(db_name, doc) - end - - test "full partitions can accept updates that reduce size", context do - db_name = context[:db_name] - fill_partition(db_name) - - doc = open_doc(db_name, "foo:0001") - save_doc(db_name, %{doc | "value" => ""}) - end - - test "full partition does not affect other partitions", context do - db_name = context[:db_name] - fill_partition(db_name) - save_doc(db_name, %{_id: "bar:foo", value: "stuff"}) - end - - test "full partition does not affect design documents", context do - db_name = context[:db_name] - fill_partition(db_name) - rev1 = save_doc(db_name, %{_id: "_design/foo", value: "stuff"}) - save_doc(db_name, %{_id: "_design/foo", _rev: rev1, value: "hi"}) - doc = open_doc(db_name, "_design/foo") - delete_doc(db_name, doc) - end - - test "replication into a full partition works", context do - db_name = context[:db_name] - fill_partition(db_name) - save_doc(db_name, %{_id: "foo:bar", value: "stuff"}, [403]) - - doc = %{ - _id: "foo:bar", - _rev: <<"1-23202479633c2b380f79507a776743d5">>, - value: "stuff" - } - - url = "/#{db_name}/#{doc[:_id]}" - query = [new_edits: false, w: 3] - resp = Couch.put(url, query: query, body: doc) - assert resp.status_code in [201, 202] - end - - test "compacting a full partition works", context do - db_name = context[:db_name] - db_info1 = get_db_info(db_name) - fill_partition(db_name) - compact(db_name) - db_info2 = get_db_info(db_name) - assert db_info2["sizes"]["file"] != db_info1["sizes"]["file"] - end - - test "indexing a full partition works", context do - db_name = context[:db_name] - fill_partition(db_name) - - ddoc = %{ - _id: "_design/foo", - views: %{ - bar: %{ - map: "function(doc) {emit(doc.group, 1);}" - } - } - } - - save_doc(db_name, ddoc) - - url = "/#{db_name}/_partition/foo/_design/foo/_view/bar" - resp = Couch.get(url) - assert resp.status_code in [200, 202] - %{body: body} = resp - - assert length(body["rows"]) > 0 - end - - test "purging docs allows writes", context do - db_name = context[:db_name] - fill_partition(db_name) - - info = get_partition_info(db_name, "foo") - limit = info["doc_count"] - 1 - - query = [ - start_key: "\"foo:0000\"", - end_key: "\"foo:9999\"", - limit: limit - ] - - resp = Couch.get("/#{db_name}/_all_docs", query: query) - assert resp.status_code in [200, 202] - %{body: body} = resp - - pbody = - body["rows"] - |> Enum.reduce(%{}, fn row, acc -> - Map.put(acc, row["id"], [row["value"]["rev"]]) - end) - - resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: pbody) - assert resp.status_code in [201, 202] - - save_doc(db_name, %{_id: "foo:bar", value: "some value"}) - end - - test "increasing partition size allows more writes", context do - db_name = context[:db_name] - fill_partition(db_name) - - # We use set_config_raw so that we're not setting - # on_exit handlers that might interfere with the original - # config change done in setup of this test - new_size = Integer.to_string(@max_size * 1000) - set_config_raw("couchdb", "max_partition_size", new_size) - - save_doc(db_name, %{_id: "foo:bar", value: "stuff"}) - end - - test "decreasing partition size disables more writes", context do - db_name = context[:db_name] - - # We use set_config_raw so that we're not setting - # on_exit handlers that might interfere with the original - # config change done in setup of this test - new_size = Integer.to_string(@max_size * 1000) - set_config_raw("couchdb", "max_partition_size", new_size) - - fill_partition(db_name) - save_doc(db_name, %{_id: "foo:bar", value: "stuff"}) - - old_size = Integer.to_string(@max_size) - set_config_raw("couchdb", "max_partition_size", old_size) - - save_doc(db_name, %{_id: "foo:baz", value: "stuff"}, [403]) - end -end diff --git a/test/elixir/test/partition_size_test.exs b/test/elixir/test/partition_size_test.exs deleted file mode 100644 index 2ba8139fc..000000000 --- a/test/elixir/test/partition_size_test.exs +++ /dev/null @@ -1,361 +0,0 @@ -defmodule PartitionSizeTest do - use CouchTestCase - - @moduledoc """ - Test Partition size functionality - """ - - setup do - db_name = random_db_name() - {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) - on_exit(fn -> delete_db(db_name) end) - - {:ok, [db_name: db_name]} - end - - def get_db_info(dbname) do - resp = Couch.get("/#{dbname}") - assert resp.status_code == 200 - %{:body => body} = resp - body - end - - def get_partition_info(dbname, partition) do - resp = Couch.get("/#{dbname}/_partition/#{partition}") - assert resp.status_code == 200 - %{:body => body} = resp - body - end - - def mk_partition(i) do - i |> rem(10) |> Integer.to_string() |> String.pad_leading(3, "0") - end - - def mk_docid(i) do - id = i |> Integer.to_string() |> String.pad_leading(4, "0") - "#{mk_partition(i)}:#{id}" - end - - def mk_docs(db_name) do - docs = - for i <- 1..1000 do - group = Integer.to_string(rem(i, 3)) - - %{ - :_id => mk_docid(i), - :value => i, - :some => "field", - :group => group - } - end - - body = %{:w => 3, :docs => docs} - - retry_until(fn -> - resp = Couch.post("/#{db_name}/_bulk_docs", body: body) - assert resp.status_code in [201, 202] - end) - end - - def save_doc(db_name, doc) do - resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) - assert resp.status_code in [201, 202] - %{:body => body} = resp - body["rev"] - end - - test "get empty partition", context do - db_name = context[:db_name] - partition = "non_existent_partition" - - info = get_partition_info(db_name, partition) - - assert info["doc_count"] == 0 - assert info["doc_del_count"] == 0 - assert info["partition"] == partition - assert info["sizes"]["external"] == 0 - assert info["sizes"]["active"] == 0 - end - - test "unknown partition return's zero", context do - db_name = context[:db_name] - mk_docs(db_name) - - info = get_partition_info(db_name, "unknown") - assert info["doc_count"] == 0 - assert info["doc_del_count"] == 0 - assert info["sizes"]["external"] == 0 - assert info["sizes"]["active"] == 0 - end - - test "simple partition size", context do - db_name = context[:db_name] - save_doc(db_name, %{_id: "foo:bar", val: 42}) - - info = get_partition_info(db_name, "foo") - assert info["doc_count"] == 1 - assert info["doc_del_count"] == 0 - assert info["sizes"]["external"] > 0 - assert info["sizes"]["active"] > 0 - end - - test "adding docs increases partition sizes", context do - db_name = context[:db_name] - save_doc(db_name, %{_id: "foo:bar", val: 42}) - pre_info = get_partition_info(db_name, "foo") - - save_doc(db_name, %{_id: "foo:baz", val: 24}) - post_info = get_partition_info(db_name, "foo") - - assert post_info["doc_count"] == 2 - assert post_info["doc_del_count"] == 0 - assert post_info["sizes"]["external"] > pre_info["sizes"]["external"] - assert post_info["sizes"]["active"] > pre_info["sizes"]["active"] - end - - test "updating docs affects partition sizes", context do - db_name = context[:db_name] - rev1 = save_doc(db_name, %{_id: "foo:bar", val: ""}) - info1 = get_partition_info(db_name, "foo") - - rev2 = - save_doc(db_name, %{ - _id: "foo:bar", - _rev: rev1, - val: "this is a very long string that is so super long its beyond long" - }) - - info2 = get_partition_info(db_name, "foo") - - save_doc(db_name, %{ - _id: "foo:bar", - _rev: rev2, - val: "this string is shorter" - }) - - info3 = get_partition_info(db_name, "foo") - - assert info3["doc_count"] == 1 - assert info3["doc_del_count"] == 0 - - assert info3["sizes"]["external"] > info1["sizes"]["external"] - assert info2["sizes"]["external"] > info3["sizes"]["external"] - end - - test "deleting a doc affects partition sizes", context do - db_name = context[:db_name] - rev1 = save_doc(db_name, %{_id: "foo:bar", val: "some stuff here"}) - info1 = get_partition_info(db_name, "foo") - - save_doc(db_name, %{_id: "foo:bar", _rev: rev1, _deleted: true}) - info2 = get_partition_info(db_name, "foo") - - assert info1["doc_count"] == 1 - assert info1["doc_del_count"] == 0 - - assert info2["doc_count"] == 0 - assert info2["doc_del_count"] == 1 - - assert info2["sizes"]["external"] < info1["sizes"]["external"] - end - - test "design docs do not affect partition sizes", context do - db_name = context[:db_name] - mk_docs(db_name) - - pre_infos = - 0..9 - |> Enum.map(fn i -> - get_partition_info(db_name, mk_partition(i)) - end) - - 0..5 - |> Enum.map(fn i -> - base = i |> Integer.to_string() |> String.pad_leading(5, "0") - docid = "_design/#{base}" - save_doc(db_name, %{_id: docid, value: "some stuff here"}) - end) - - post_infos = - 0..9 - |> Enum.map(fn i -> - get_partition_info(db_name, mk_partition(i)) - end) - - assert post_infos == pre_infos - end - - @tag :skip_on_jenkins - test "get all partition sizes", context do - db_name = context[:db_name] - mk_docs(db_name) - - {esum, asum} = - 0..9 - |> Enum.reduce({0, 0}, fn i, {esize, asize} -> - partition = mk_partition(i) - info = get_partition_info(db_name, partition) - assert info["doc_count"] == 100 - assert info["doc_del_count"] == 0 - assert info["sizes"]["external"] > 0 - assert info["sizes"]["active"] > 0 - {esize + info["sizes"]["external"], asize + info["sizes"]["active"]} - end) - - db_info = get_db_info(db_name) - assert db_info["sizes"]["external"] >= esum - assert db_info["sizes"]["active"] >= asum - end - - test "get partition size with attachment", context do - db_name = context[:db_name] - - doc = %{ - _id: "foo:doc-with-attachment", - _attachments: %{ - "foo.txt": %{ - content_type: "text/plain", - data: Base.encode64("This is a text document to save") - } - } - } - - save_doc(db_name, doc) - - db_info = get_db_info(db_name) - foo_info = get_partition_info(db_name, "foo") - - assert foo_info["doc_count"] == 1 - assert foo_info["doc_del_count"] == 0 - assert foo_info["sizes"]["active"] > 0 - assert foo_info["sizes"]["external"] > 0 - - assert foo_info["sizes"]["active"] <= db_info["sizes"]["active"] - assert foo_info["sizes"]["external"] <= db_info["sizes"]["external"] - end - - test "attachments don't affect other partitions", context do - db_name = context[:db_name] - mk_docs(db_name) - - pre_infos = - 0..9 - |> Enum.map(fn i -> - get_partition_info(db_name, mk_partition(i)) - end) - - doc = %{ - _id: "foo:doc-with-attachment", - _attachments: %{ - "foo.txt": %{ - content_type: "text/plain", - data: Base.encode64("This is a text document to save") - } - } - } - - save_doc(db_name, doc) - - att_info = get_partition_info(db_name, "foo") - assert att_info["doc_count"] == 1 - assert att_info["sizes"]["external"] > 0 - - post_infos = - 0..9 - |> Enum.map(fn i -> - get_partition_info(db_name, mk_partition(i)) - end) - - assert post_infos == pre_infos - - esize = - ([att_info] ++ post_infos) - |> Enum.reduce(0, fn info, acc -> - info["sizes"]["external"] + acc - end) - - db_info = get_db_info(db_name) - assert esize == db_info["sizes"]["external"] - end - - test "partition activity not affect other partition sizes", context do - db_name = context[:db_name] - mk_docs(db_name) - - partition1 = "000" - partition2 = "001" - - info2 = get_partition_info(db_name, partition2) - - doc_id = "#{partition1}:doc-with-attachment" - - doc = %{ - _id: doc_id, - _attachments: %{ - "foo.txt": %{ - content_type: "text/plain", - data: Base.encode64("This is a text document to save") - } - } - } - - doc_rev = save_doc(db_name, doc) - - info2_attach = get_partition_info(db_name, partition2) - assert info2_attach == info2 - - doc = - Enum.into( - %{ - another: "add another field", - _rev: doc_rev - }, - doc - ) - - doc_rev = save_doc(db_name, doc) - - info2_update = get_partition_info(db_name, partition2) - assert info2_update == info2 - - resp = Couch.delete("/#{db_name}/#{doc_id}", query: %{rev: doc_rev}) - assert resp.status_code == 200 - - info2_delete = get_partition_info(db_name, partition2) - assert info2_delete == info2 - end - - test "purging docs decreases partition size", context do - db_name = context[:db_name] - mk_docs(db_name) - - partition = "000" - - query = [ - start_key: "\"#{partition}:0000\"", - end_key: "\"#{partition}:9999\"", - limit: 50 - ] - - resp = Couch.get("/#{db_name}/_all_docs", query: query) - assert resp.status_code == 200 - %{body: body} = resp - - pre_info = get_partition_info(db_name, partition) - - pbody = - body["rows"] - |> Enum.reduce(%{}, fn row, acc -> - Map.put(acc, row["id"], [row["value"]["rev"]]) - end) - - resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: pbody) - assert resp.status_code in [201, 202] - - post_info = get_partition_info(db_name, partition) - assert post_info["doc_count"] == pre_info["doc_count"] - 50 - assert post_info["doc_del_count"] == 0 - assert post_info["sizes"]["active"] < pre_info["sizes"]["active"] - assert post_info["sizes"]["external"] < pre_info["sizes"]["external"] - end -end diff --git a/test/elixir/test/partition_view_test.exs b/test/elixir/test/partition_view_test.exs deleted file mode 100644 index 0a55c2443..000000000 --- a/test/elixir/test/partition_view_test.exs +++ /dev/null @@ -1,374 +0,0 @@ -defmodule ViewPartitionTest do - use CouchTestCase - import PartitionHelpers - - @moduledoc """ - Test Partition functionality for views - """ - - setup_all do - db_name = random_db_name() - {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) - on_exit(fn -> delete_db(db_name) end) - - create_partition_docs(db_name) - - map_fun1 = """ - function(doc) { - if (doc.some) { - emit(doc.value, doc.some); - } - } - """ - - map_fun2 = """ - function(doc) { - if (doc.group) { - emit([doc.some, doc.group], 1); - } - } - """ - - query = %{:w => 3} - - body = %{ - :docs => [ - %{ - _id: "_design/map", - views: %{some: %{map: map_fun1}} - }, - %{ - _id: "_design/map_some", - views: %{some: %{map: map_fun2}} - }, - %{ - _id: "_design/partitioned_true", - views: %{some: %{map: map_fun1}}, - options: %{partitioned: true} - }, - %{ - _id: "_design/partitioned_false", - views: %{some: %{map: map_fun1}}, - options: %{partitioned: false} - }, - %{ - _id: "_design/reduce", - views: %{some: %{map: map_fun2, reduce: "_count"}} - }, - %{ - _id: "_design/include_ddocs", - views: %{some: %{map: map_fun1}}, - options: %{include_design: true} - } - ] - } - - resp = Couch.post("/#{db_name}/_bulk_docs", query: query, body: body) - Enum.each(resp.body, &assert(&1["ok"])) - - {:ok, [db_name: db_name]} - end - - def get_reduce_result(resp) do - %{:body => %{"rows" => rows}} = resp - rows - end - - test "query with partitioned:true returns partitioned fields", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/partitioned_true/_view/some" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert Enum.dedup(partitions) == ["foo"] - - url = "/#{db_name}/_partition/bar/_design/partitioned_true/_view/some" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert Enum.dedup(partitions) == ["bar"] - end - - test "default view query returns partitioned fields", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert Enum.dedup(partitions) == ["foo"] - - url = "/#{db_name}/_partition/bar/_design/map/_view/some" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert Enum.dedup(partitions) == ["bar"] - end - - test "conflicting partitions in path and query string rejected", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{partition: "bar"}) - assert resp.status_code == 400 - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/Conflicting value/, reason) - end - - test "query will return zero results for wrong inputs", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{start_key: "\"foo:12\""}) - assert resp.status_code == 200 - assert Map.get(resp, :body)["rows"] == [] - end - - test "partitioned ddoc cannot be used in global query", context do - db_name = context[:db_name] - - url = "/#{db_name}/_design/map/_view/some" - resp = Couch.get(url) - %{:body => %{"reason" => reason}} = resp - assert resp.status_code == 400 - assert Regex.match?(~r/mandatory for queries to this view./, reason) - end - - test "partitioned query cannot be used with global ddoc", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/partitioned_false/_view/some" - resp = Couch.get(url) - %{:body => %{"reason" => reason}} = resp - assert resp.status_code == 400 - assert Regex.match?(~r/is not supported in this design doc/, reason) - end - - test "view query returns all docs for global query", context do - db_name = context[:db_name] - - url = "/#{db_name}/_design/partitioned_false/_view/some" - resp = Couch.get(url) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 100 - end - - test "partition query errors with incorrect partition supplied", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/_bar/_design/map/_view/some" - resp = Couch.get(url) - assert resp.status_code == 400 - - url = "/#{db_name}/_partition//_design/map/_view/some" - resp = Couch.get(url) - assert resp.status_code == 400 - end - - test "partitioned query works with startkey, endkey range", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{start_key: 12, end_key: 20}) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert Enum.dedup(partitions) == ["foo"] - end - - test "partitioned query works with keys", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.post(url, body: %{keys: [2, 4, 6]}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 3 - assert ids == ["foo:2", "foo:4", "foo:6"] - end - - test "global query works with keys", context do - db_name = context[:db_name] - - url = "/#{db_name}/_design/partitioned_false/_view/some" - resp = Couch.post(url, body: %{keys: [2, 4, 6]}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 3 - assert ids == ["foo:2", "foo:4", "foo:6"] - end - - test "partition query works with limit", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{limit: 5}) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 5 - assert Enum.dedup(partitions) == ["foo"] - end - - test "partition query with descending", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{descending: true, limit: 5}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 5 - assert ids == ["foo:100", "foo:98", "foo:96", "foo:94", "foo:92"] - - resp = Couch.get(url, query: %{descending: false, limit: 5}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 5 - assert ids == ["foo:2", "foo:4", "foo:6", "foo:8", "foo:10"] - end - - test "partition query with skip", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{skip: 5, limit: 5}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 5 - assert ids == ["foo:12", "foo:14", "foo:16", "foo:18", "foo:20"] - end - - test "partition query with key", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map/_view/some" - resp = Couch.get(url, query: %{key: 22}) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 1 - assert ids == ["foo:22"] - end - - test "partition query with startkey_docid and endkey_docid", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/map_some/_view/some" - - resp = - Couch.get( - url, - query: %{ - startkey: "[\"field\",\"one\"]", - endkey: "[\"field\",\"one\"]", - startkey_docid: "foo:12", - endkey_docid: "foo:30" - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert ids == ["foo:12", "foo:18", "foo:24", "foo:30"] - end - - test "query with reduce works", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/reduce/_view/some" - resp = Couch.get(url, query: %{reduce: true, group_level: 1}) - assert resp.status_code == 200 - results = get_reduce_result(resp) - assert results == [%{"key" => ["field"], "value" => 50}] - - resp = Couch.get(url, query: %{reduce: true, group_level: 2}) - results = get_reduce_result(resp) - - assert results == [ - %{"key" => ["field", "one"], "value" => 16}, - %{"key" => ["field", "two"], "value" => 34} - ] - - resp = Couch.get(url, query: %{reduce: true, group: true}) - results = get_reduce_result(resp) - - assert results == [ - %{"key" => ["field", "one"], "value" => 16}, - %{"key" => ["field", "two"], "value" => 34} - ] - end - - test "partition query can set query limits", context do - set_config({"query_server_config", "partition_query_limit", "2000"}) - - db_name = context[:db_name] - create_partition_docs(db_name) - create_partition_ddoc(db_name) - - url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" - - resp = - Couch.get( - url, - query: %{ - limit: 20 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 20 - - resp = Couch.get(url) - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 50 - - resp = - Couch.get( - url, - query: %{ - limit: 2000 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 50 - - resp = - Couch.get( - url, - query: %{ - limit: 2001 - } - ) - - assert resp.status_code == 400 - %{:body => %{"reason" => reason}} = resp - assert Regex.match?(~r/Limit is too large/, reason) - - resp = - Couch.get( - url, - query: %{ - limit: 2000, - skip: 25 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert length(ids) == 25 - end - - test "include_design works correctly", context do - db_name = context[:db_name] - - url = "/#{db_name}/_partition/foo/_design/include_ddocs/_view/some" - resp = Couch.get(url) - assert resp.status_code == 200 - partitions = get_partitions(resp) - assert length(partitions) == 50 - assert Enum.dedup(partitions) == ["foo"] - end -end diff --git a/test/elixir/test/partition_view_update_test.exs b/test/elixir/test/partition_view_update_test.exs deleted file mode 100644 index 5c1cb09f0..000000000 --- a/test/elixir/test/partition_view_update_test.exs +++ /dev/null @@ -1,160 +0,0 @@ -defmodule PartitionViewUpdateTest do - use CouchTestCase - import PartitionHelpers - - @moduledoc """ - Test Partition view update functionality - """ - @tag :with_partitioned_db - test "view updates properly remove old keys", context do - db_name = context[:db_name] - create_partition_docs(db_name, "foo", "bar") - create_partition_ddoc(db_name) - - check_key = fn key, num_rows -> - url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" - resp = Couch.get(url, query: [key: key]) - assert resp.status_code == 200 - assert length(resp.body["rows"]) == num_rows - end - - check_key.(2, 1) - - resp = Couch.get("/#{db_name}/foo:2") - doc = Map.put(resp.body, "value", 4) - resp = Couch.put("/#{db_name}/foo:2", query: [w: 3], body: doc) - assert resp.status_code >= 201 and resp.status_code <= 202 - - check_key.(4, 2) - check_key.(2, 0) - end - - @tag :skip_on_jenkins - @tag :with_partitioned_db - test "query with update=false works", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_partition_ddoc(db_name) - - url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" - - resp = - Couch.get( - url, - query: %{ - update: "true", - limit: 3 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert ids == ["foo:2", "foo:4", "foo:6"] - - # Avoid race conditions by attempting to get a full response - # from every shard before we do our update:false test - for _ <- 1..12 do - resp = Couch.get(url) - assert resp.status_code == 200 - end - - Couch.put("/#{db_name}/foo:1", body: %{some: "field"}) - - retry_until(fn -> - resp = - Couch.get( - url, - query: %{ - update: "false", - limit: 3 - } - ) - - assert resp.status_code == 200 - ids = get_ids(resp) - assert ids == ["foo:2", "foo:4", "foo:6"] - end) - end - - @tag :with_partitioned_db - test "purge removes view rows", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_partition_ddoc(db_name) - - url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" - - resp = Couch.get(url) - assert resp.status_code == 200 - %{body: body} = resp - assert length(body["rows"]) == 50 - - resp = Couch.get("/#{db_name}/foo:2") - assert resp.status_code == 200 - %{body: body} = resp - rev = body["_rev"] - - body = %{"foo:2" => [rev]} - resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) - assert resp.status_code in [201, 202] - - resp = Couch.get(url) - assert resp.status_code == 200 - %{body: body} = resp - assert length(body["rows"]) == 49 - end - - @tag :with_partitioned_db - test "purged conflict changes view rows", context do - db_name = context[:db_name] - create_partition_docs(db_name) - create_partition_ddoc(db_name) - - url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" - - resp = Couch.get(url) - assert resp.status_code == 200 - %{body: body} = resp - assert length(body["rows"]) == 50 - - # Create a conflict on foo:2. Since the 4096 - # value is deeper than the conflict we can assert - # that's in the view before the purge and assert - # that 8192 is in the view after the purge. - resp = Couch.get("/#{db_name}/foo:2") - assert resp.status_code == 200 - %{body: body} = resp - rev1 = body["_rev"] - - doc = %{_id: "foo:2", _rev: rev1, value: 4096, some: "field"} - resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) - assert resp.status_code in [201, 202] - %{body: body} = resp - rev2 = body["rev"] - - query = [w: 3, new_edits: false] - conflict_rev = "1-4a75b4efa0804859b3dfd327cbc1c2f9" - doc = %{_id: "foo:2", _rev: conflict_rev, value: 8192, some: "field"} - resp = Couch.put("/#{db_name}/foo:2", query: query, body: doc) - assert resp.status_code in [201, 202] - - # Check that our expected row exists - resp = Couch.get(url, query: [key: 4096]) - assert resp.status_code == 200 - %{body: body} = resp - [row] = body["rows"] - assert row["id"] == "foo:2" - - # Remove the current row to be replaced with - # a row from the conflict - body = %{"foo:2" => [rev2]} - resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) - assert resp.status_code in [201, 202] - - resp = Couch.get(url, query: [key: 8192]) - assert resp.status_code == 200 - %{body: body} = resp - [row] = body["rows"] - assert row["id"] == "foo:2" - end -end diff --git a/test/elixir/test/reshard_all_docs_test.exs b/test/elixir/test/reshard_all_docs_test.exs deleted file mode 100644 index ab8c6b75b..000000000 --- a/test/elixir/test/reshard_all_docs_test.exs +++ /dev/null @@ -1,79 +0,0 @@ -defmodule ReshardAllDocsTest do - use CouchTestCase - import ReshardHelpers - - @moduledoc """ - Test _all_docs interaction with resharding - """ - - setup do - db = random_db_name() - {:ok, _} = create_db(db, query: %{q: 2}) - - on_exit(fn -> - reset_reshard_state() - delete_db(db) - end) - - {:ok, [db: db]} - end - - test "all_docs after splitting all shards on node1", context do - db = context[:db] - node1 = get_first_node() - docs = add_docs(1..100, db) - - before_split_all_docs = all_docs(db) - assert docs == before_split_all_docs - - resp = post_job_node(db, node1) - assert resp.status_code in [201, 202] - jobid = hd(resp.body)["id"] - wait_job_completed(jobid) - - assert before_split_all_docs == all_docs(db) - - assert remove_job(jobid).status_code == 200 - end - - test "all_docs after splitting the same range on all nodes", context do - db = context[:db] - docs = add_docs(1..100, db) - - before_split_all_docs = all_docs(db) - assert docs == before_split_all_docs - - resp = post_job_range(db, "00000000-7fffffff") - assert resp.status_code in [201, 202] - - resp.body - |> Enum.map(fn j -> j["id"] end) - |> Enum.each(fn id -> wait_job_completed(id) end) - - assert before_split_all_docs == all_docs(db) - - get_jobs() - |> Enum.map(fn j -> j["id"] end) - |> Enum.each(fn id -> remove_job(id) end) - end - - defp add_docs(range, db) do - docs = create_docs(range) - w3 = %{:w => 3} - resp = Couch.post("/#{db}/_bulk_docs", body: %{docs: docs}, query: w3) - assert resp.status_code in [201, 202] - assert length(resp.body) == length(docs) - - docs - |> rev(resp.body) - |> Enum.into(%{}, fn %{:_id => id, :_rev => rev} -> {id, rev} end) - end - - defp all_docs(db, query \\ %{}) do - resp = Couch.get("/#{db}/_all_docs", query: query) - assert resp.status_code == 200 - - resp.body["rows"] - |> Enum.into(%{}, fn %{"id" => id, "value" => v} -> {id, v["rev"]} end) - end -end diff --git a/test/elixir/test/reshard_basic_test.exs b/test/elixir/test/reshard_basic_test.exs deleted file mode 100644 index dcb198c46..000000000 --- a/test/elixir/test/reshard_basic_test.exs +++ /dev/null @@ -1,174 +0,0 @@ -defmodule ReshardBasicTest do - use CouchTestCase - import ReshardHelpers - - @moduledoc """ - Test resharding basic functionality - """ - - setup_all do - db1 = random_db_name() - {:ok, _} = create_db(db1, query: %{q: 1}) - db2 = random_db_name() - {:ok, _} = create_db(db2, query: %{q: 2}) - - on_exit(fn -> - reset_reshard_state() - delete_db(db1) - delete_db(db2) - end) - - {:ok, [db1: db1, db2: db2]} - end - - test "basic api querying, no jobs present" do - summary = get_summary() - assert summary["state"] == "running" - assert summary["state_reason"] == :null - assert summary["total"] == 0 - assert summary["completed"] == 0 - assert summary["failed"] == 0 - assert summary["stopped"] == 0 - assert get_state() == %{"state" => "running", "reason" => :null} - assert get_jobs() == [] - end - - test "check validation of invalid parameters", context do - db1 = context[:db1] - node1 = get_first_node() - - resp = post_job_node(db1, "badnode") - assert resp.status_code == 400 - - resp = post_job_node("badresharddb", node1) - assert resp.status_code == 400 - - resp = post_job_db("badresharddb") - assert resp.status_code == 400 - - resp = post_job_range("badresharddb", "randomgarbage") - assert resp.status_code == 400 - - resp = get_job("badjobid") - assert resp.status_code == 404 - - resp = remove_job("badjobid") - assert resp.status_code == 404 - end - - test "toggle global state" do - assert get_state() == %{"state" => "running", "reason" => :null} - put_state_stopped("xyz") - assert get_state() == %{"state" => "stopped", "reason" => "xyz"} - put_state_running() - assert get_state() == %{"state" => "running", "reason" => :null} - end - - test "split q=1 db shards on node1 (1 job)", context do - db = context[:db1] - node1 = get_first_node() - - resp = post_job_node(db, node1) - assert resp.status_code in [201, 202] - - body = resp.body - assert is_list(body) - assert length(body) == 1 - - [job] = body - id = job["id"] - assert is_binary(id) - node = job["node"] - assert is_binary(node) - assert node == node1 - assert job["ok"] == true - shard = job["shard"] - assert is_binary(shard) - - resp = get_job(id) - assert resp.status_code == 200 - - body = resp.body - assert body["type"] == "split" - assert body["id"] == id - assert body["source"] == shard - assert is_list(body["history"]) - assert body["job_state"] in ["new", "running", "completed"] - assert is_list(body["target"]) - assert length(body["target"]) == 2 - - wait_job_completed(id) - - resp = get_job(id) - assert resp.status_code == 200 - - body = resp.body - assert body["job_state"] == "completed" - assert body["split_state"] == "completed" - - resp = Couch.get("/#{db}/_shards") - assert resp.status_code == 200 - shards = resp.body["shards"] - assert node1 not in Map.get(shards, "00000000-ffffffff", []) - assert shards["00000000-7fffffff"] == [node1] - assert shards["80000000-ffffffff"] == [node1] - - summary = get_summary() - assert summary["total"] == 1 - assert summary["completed"] == 1 - - resp = remove_job(id) - assert resp.status_code == 200 - - assert get_jobs() == [] - - summary = get_summary() - assert summary["total"] == 0 - assert summary["completed"] == 0 - end - - test "split q=2 shards on node1 (2 jobs)", context do - db = context[:db2] - node1 = get_first_node() - - resp = post_job_node(db, node1) - assert resp.status_code in [201, 202] - - body = resp.body - assert is_list(body) - assert length(body) == 2 - - [job1, job2] = Enum.sort(body) - {id1, id2} = {job1["id"], job2["id"]} - - assert get_job(id1).body["id"] == id1 - assert get_job(id2).body["id"] == id2 - - summary = get_summary() - assert summary["total"] == 2 - - wait_job_completed(id1) - wait_job_completed(id2) - - summary = get_summary() - assert summary["completed"] == 2 - - resp = Couch.get("/#{db}/_shards") - assert resp.status_code == 200 - shards = resp.body["shards"] - assert node1 not in Map.get(shards, "00000000-7fffffff", []) - assert node1 not in Map.get(shards, "80000000-ffffffff", []) - assert shards["00000000-3fffffff"] == [node1] - assert shards["40000000-7fffffff"] == [node1] - assert shards["80000000-bfffffff"] == [node1] - assert shards["c0000000-ffffffff"] == [node1] - - # deleting the source db should remove the jobs - delete_db(db) - wait_job_removed(id1) - wait_job_removed(id2) - - summary = get_summary() - assert summary["total"] == 0 - end -end diff --git a/test/elixir/test/reshard_changes_feed.exs b/test/elixir/test/reshard_changes_feed.exs deleted file mode 100644 index 5498ded7b..000000000 --- a/test/elixir/test/reshard_changes_feed.exs +++ /dev/null @@ -1,81 +0,0 @@ -defmodule ReshardChangesFeedTest do - use CouchTestCase - import ReshardHelpers - - @moduledoc """ - Test _changes interaction with resharding - """ - - setup do - db = random_db_name() - {:ok, _} = create_db(db, query: %{q: 2}) - - on_exit(fn -> - reset_reshard_state() - delete_db(db) - end) - - {:ok, [db: db]} - end - - test "all_docs after splitting all shards on node1", context do - db = context[:db] - add_docs(1..3, db) - - all_before = changes(db) - first_seq = hd(all_before["results"])["seq"] - last_seq = all_before["last_seq"] - since_1_before = docset(changes(db, %{:since => first_seq})) - since_last_before = docset(changes(db, %{:since => last_seq})) - - resp = post_job_range(db, "00000000-7fffffff") - assert resp.status_code in [201, 202] - - resp.body - |> Enum.map(fn j -> j["id"] end) - |> Enum.each(fn id -> wait_job_completed(id) end) - - all_after = changes(db) - since_1_after = docset(changes(db, %{:since => first_seq})) - since_last_after = docset(changes(db, %{:since => last_seq})) - - assert docset(all_before) == docset(all_after) - assert MapSet.subset?(since_1_before, since_1_after) - assert MapSet.subset?(since_last_before, since_last_after) - - get_jobs() - |> Enum.map(fn j -> j["id"] end) - |> Enum.each(fn id -> remove_job(id) end) - end - - defp docset(changes) do - changes["results"] - |> Enum.map(fn %{"id" => id} -> id end) - |> MapSet.new() - end - - defp changes(db, query \\ %{}) do - resp = Couch.get("/#{db}/_changes", query: query) - assert resp.status_code == 200 - resp.body - end - - defp add_docs(range, db) do - docs = create_docs(range) - w3 = %{:w => 3} - resp = Couch.post("/#{db}/_bulk_docs", body: %{docs: docs}, query: w3) - assert resp.status_code in [201, 202] - assert length(resp.body) == length(docs) - - docs - |> rev(resp.body) - |> Enum.into(%{}, fn %{:_id => id, :_rev => rev} -> {id, rev} end) - end - - # (Keep for debugging) - # defp unpack_seq(seq) when is_binary(seq) do - # [_, opaque] = String.split(seq, "-") - # {:ok, binblob} = Base.url_decode64(opaque, padding: false) - # :erlang.binary_to_term(binblob) - # end -end diff --git a/test/elixir/test/reshard_helpers.exs b/test/elixir/test/reshard_helpers.exs deleted file mode 100644 index 282d98c82..000000000 --- a/test/elixir/test/reshard_helpers.exs +++ /dev/null @@ -1,114 +0,0 @@ -defmodule ReshardHelpers do - use CouchTestCase - - def get_summary do - resp = Couch.get("/_reshard") - assert resp.status_code == 200 - resp.body - end - - def get_state do - resp = Couch.get("/_reshard/state") - assert resp.status_code == 200 - resp.body - end - - def put_state_running do - resp = Couch.put("/_reshard/state", body: %{:state => "running"}) - assert resp.status_code == 200 - resp - end - - def put_state_stopped(reason \\ "") do - body = %{:state => "stopped", :reason => reason} - resp = Couch.put("/_reshard/state", body: body) - assert resp.status_code == 200 - resp - end - - def get_jobs do - resp = Couch.get("/_reshard/jobs") - assert resp.status_code == 200 - resp.body["jobs"] - end - - def post_job_db(db) do - body = %{:type => :split, :db => db} - Couch.post("/_reshard/jobs", body: body) - end - - def post_job_node(db, node) do - body = %{:type => :split, :db => db, :node => node} - Couch.post("/_reshard/jobs", body: body) - end - - def post_job_range(db, range) do - body = %{:type => :split, :db => db, :range => range} - Couch.post("/_reshard/jobs", body: body) - end - - def post_job_node_and_range(db, node, range) do - body = %{:type => :split, :db => db, :node => node, :range => range} - Couch.post("/_reshard/jobs", body: body) - end - - def get_job(id) when is_binary(id) do - Couch.get("/_reshard/jobs/#{id}") - end - - def remove_job(id) when is_binary(id) do - Couch.delete("/_reshard/jobs/#{id}") - end - - def get_job_state(id) when is_binary(id) do - resp = Couch.get("/_reshard/jobs/#{id}/state") - assert resp.status_code == 200 - resp.body["state"] - end - - def stop_job(id, reason \\ "") when is_binary(id) do - body = %{:state => "stopped", :reason => reason} - Couch.post("/_reshard/jobs/#{id}/state", body: body) - end - - def resume_job(id) when is_binary(id) do - body = %{:state => "running"} - Couch.post("/_reshard/jobs/#{id}/state", body: body) - end - - def job_ids(jobs) do - Enum.map(fn job -> job["id"] end, jobs) - end - - def get_first_node do - mresp = Couch.get("/_membership") - assert mresp.status_code == 200 - all_nodes = mresp.body["all_nodes"] - - mresp.body["cluster_nodes"] - |> Enum.filter(fn n -> n in all_nodes end) - |> Enum.sort() - |> hd() - end - - def wait_job_removed(id) do - retry_until(fn -> get_job(id).status_code == 404 end, 200, 60_000) - end - - def wait_job_completed(id) do - wait_job_state(id, "completed") - end - - def wait_job_state(id, state) do - retry_until(fn -> get_job_state(id) == state end, 200, 60_000) - end - - def reset_reshard_state do - get_jobs() - |> Enum.map(fn j -> j["id"] end) - |> Enum.each(fn id -> remove_job(id) end) - - assert get_jobs() == [] - put_state_running() - end -end diff --git a/test/elixir/test/test_helper.exs b/test/elixir/test/test_helper.exs index 4bf65bcf6..6311fca44 100644 --- a/test/elixir/test/test_helper.exs +++ b/test/elixir/test/test_helper.exs @@ -14,5 +14,3 @@ ExUnit.configure( ) ExUnit.start() -Code.require_file("partition_helpers.exs", __DIR__) -Code.require_file("reshard_helpers.exs", __DIR__) -- cgit v1.2.1 From c9eee15da773aa6bc89ad43f3f2755bc1acb0712 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 19 Jun 2019 11:58:47 -0500 Subject: Implement _all_dbs/_all_docs API parameters This adds the mapping of CouchDB start/end keys and so on to the similar yet slightly different concepts in FoundationDB. The handlers for `_all_dbs` and `_all_docs` have been udpated to use this new logic. --- src/chttpd/src/chttpd_changes.erl | 16 +- src/chttpd/src/chttpd_db.erl | 220 +++++++++++++++++---------- src/chttpd/src/chttpd_misc.erl | 67 ++++---- src/fabric/src/fabric2_db.erl | 143 +++++++++++++++-- src/fabric/src/fabric2_fdb.erl | 236 ++++++++++++++--------------- src/fabric/test/fabric2_doc_fold_tests.erl | 84 +++++++++- test/elixir/test/all_docs_test.exs | 3 +- 7 files changed, 515 insertions(+), 254 deletions(-) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index b73efa327..d4318b5ce 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -639,15 +639,19 @@ changes_row(Results, Change, Acc) -> maybe_get_changes_doc(Value, #changes_acc{include_docs=true}=Acc) -> #changes_acc{ db = Db, - doc_options = DocOpts, + doc_options = DocOpts0, conflicts = Conflicts, filter = Filter } = Acc, - Opts = case Conflicts of - true -> [deleted, conflicts]; - false -> [deleted] - end, - load_doc(Db, Value, Opts, DocOpts, Filter); + OpenOpts = case Conflicts of + true -> [deleted, conflicts]; + false -> [deleted] + end, + DocOpts1 = case Conflicts of + true -> [conflicts | DocOpts0]; + false -> DocOpts0 + end, + load_doc(Db, Value, OpenOpts, DocOpts1, Filter); maybe_get_changes_doc(_Value, _Acc) -> []. diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 7cffc54f5..4bde3b007 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric.hrl"). -include_lib("mem3/include/mem3.hrl"). -export([handle_request/1, handle_compact_req/2, handle_design_req/2, @@ -844,21 +845,151 @@ multi_all_docs_view(Req, Db, OP, Queries) -> {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). -all_docs_view(Req, Db, _Keys, _OP) -> - % Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), - % Args1 = Args0#mrargs{view_type=map}, - % Args2 = fabric_util:validate_all_docs_args(Db, Args1), - % Args3 = set_namespace(OP, Args2), - Options = [{user_ctx, Req#httpd.user_ctx}], +all_docs_view(Req, Db, Keys, OP) -> + Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), + Aparse_body_and_queryrgs1 = set_namespace(OP, Args0), Max = chttpd:chunked_response_buffer_size(), - VAcc = #vacc{db=Db, req=Req, threshold=Max}, - {ok, Resp} = fabric2_db:fold_docs(Db, fun view_cb/2, VAcc, Options), - {ok, Resp#vacc.resp}. + VAcc0 = #vacc{ + db = Db, + req = Req, + threshold = Max + }, + case Args1#mrargs.keys of + undefined -> + Options = [ + {user_ctx, Req#httpd.user_ctx}, + {dir, Args1#mrargs.direction}, + {start_key, Args1#mrargs.start_key}, + {end_key, Args1#mrargs.end_key}, + {limit, Args1#mrargs.limit}, + {skip, Args1#mrargs.skip}, + {update_seq, Args1#mrargs.update_seq} + ], + Acc = {iter, Db, Args1, VAcc0}, + {ok, {iter, _, _, Resp}} = + fabric2_db:fold_docs(Db, fun view_cb/2, Acc, Options), + {ok, Resp#vacc.resp}; + Keys0 when is_list(Keys0) -> + Keys1 = apply_args_to_keylist(Args1, Keys0), + %% namespace can be _set_ to `undefined`, so we + %% want simulate enum here + NS = case couch_util:get_value(namespace, Args1#mrargs.extra) of + <<"_all_docs">> -> <<"_all_docs">>; + <<"_design">> -> <<"_design">>; + <<"_local">> -> <<"_local">>; + _ -> <<"_all_docs">> + end, + TotalRows = fabric2_db:get_doc_count(Db, NS), + Meta = case Args1#mrargs.update_seq of + true -> + UpdateSeq = fabric2_db:get_update_seq(Db), + [{update_seq, UpdateSeq}]; + false -> + [] + end ++ [{total, TotalRows}, {offset, null}], + {ok, VAcc1} = view_cb({meta, Meta}, VAcc0), + DocOpts = case Args1#mrargs.conflicts of + true -> [conflicts | Args1#mrargs.doc_options]; + _ -> Args1#mrargs.doc_options + end ++ [{user_ctx, Req#httpd.user_ctx}], + IncludeDocs = Args1#mrargs.include_docs, + VAcc2 = lists:foldl(fun(DocId, Acc) -> + OpenOpts = [deleted | DocOpts], + Row0 = case fabric2_db:open_doc(Db, DocId, OpenOpts) of + {not_found, missing} -> + #view_row{key = DocId}; + {ok, #doc{deleted = true, revs = Revs}} -> + {RevPos, [RevId | _]} = Revs, + Value = {[ + {rev, couch_doc:rev_to_str({RevPos, RevId})}, + {deleted, true} + ]}, + DocValue = if not IncludeDocs -> undefined; true -> + null + end, + #view_row{ + key = DocId, + id = DocId, + value = Value, + doc = DocValue + }; + {ok, #doc{revs = Revs} = Doc0} -> + {RevPos, [RevId | _]} = Revs, + Value = {[ + {rev, couch_doc:rev_to_str({RevPos, RevId})} + ]}, + DocValue = if not IncludeDocs -> undefined; true -> + couch_doc:to_json_obj(Doc0, DocOpts) + end, + #view_row{ + key = DocId, + id = DocId, + value = Value, + doc = DocValue + } + end, + Row1 = fabric_view:transform_row(Row0), + {ok, NewAcc} = view_cb(Row1, Acc), + NewAcc + end, VAcc1, Keys1), + {ok, VAcc3} = view_cb(complete, VAcc2), + {ok, VAcc3#vacc.resp} + end. + + +apply_args_to_keylist(Args, Keys0) -> + Keys1 = case Args#mrargs.direction of + fwd -> Keys0; + _ -> lists:reverse(Keys0) + end, + Keys2 = case Args#mrargs.skip < length(Keys1) of + true -> lists:nthtail(Args#mrargs.skip, Keys1); + false -> [] + end, + case Args#mrargs.limit < length(Keys2) of + true -> lists:sublist(Keys2, Args#mrargs.limit); + false -> Keys2 + end. + + +view_cb({row, Row}, {iter, Db, Args, VAcc}) -> + NewRow = case lists:keymember(doc, 1, Row) of + true -> + chttpd_stats:incr_reads(); + false when Args#mrargs.include_docs -> + {id, DocId} = lists:keyfind(id, 1, Row), + chttpd_stats:incr_reads(), + DocOpts = case Args#mrargs.conflicts of + true -> [conflicts | Args#mrargs.doc_options]; + _ -> Args#mrargs.doc_options + end ++ [{user_ctx, (VAcc#vacc.req)#httpd.user_ctx}], + OpenOpts = [deleted | DocOpts], + DocMember = case fabric2_db:open_doc(Db, DocId, OpenOpts) of + {not_found, missing} -> + []; + {ok, #doc{deleted = true}} -> + [{doc, null}]; + {ok, #doc{} = Doc} -> + [{doc, couch_doc:to_json_obj(Doc, DocOpts)}] + end, + Row ++ DocMember; + _ -> + Row + end, + chttpd_stats:incr_rows(), + {Go, NewVAcc} = couch_mrview_http:view_cb({row, NewRow}, VAcc), + {Go, {iter, Db, Args, NewVAcc}}; + +view_cb(Msg, {iter, Db, Args, VAcc}) -> + {Go, NewVAcc} = couch_mrview_http:view_cb(Msg, VAcc), + {Go, {iter, Db, Args, NewVAcc}}; view_cb({row, Row} = Msg, Acc) -> case lists:keymember(doc, 1, Row) of - true -> chttpd_stats:incr_reads(); - false -> ok + true -> + chttpd_stats:incr_reads(); + false -> + ok end, chttpd_stats:incr_rows(), couch_mrview_http:view_cb(Msg, Acc); @@ -2040,70 +2171,3 @@ bulk_get_json_error(DocId, Rev, Error, Reason) -> {<<"rev">>, Rev}, {<<"error">>, Error}, {<<"reason">>, Reason}]}}]}). - - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -monitor_attachments_test_() -> - {"ignore stubs", - fun () -> - Atts = [couch_att:new([{data, stub}])], - ?_assertEqual([], monitor_attachments(Atts)) - end - }. - -parse_partitioned_opt_test_() -> - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_should_allow_partitioned_db(), - t_should_throw_on_not_allowed_partitioned_db(), - t_returns_empty_array_for_partitioned_false(), - t_returns_empty_array_for_no_partitioned_qs() - ] - }. - - -setup() -> - ok. - -teardown(_) -> - meck:unload(). - -mock_request(Url) -> - Headers = mochiweb_headers:make([{"Host", "examples.com"}]), - MochiReq = mochiweb_request:new(nil, 'PUT', Url, {1, 1}, Headers), - #httpd{mochi_req = MochiReq}. - -t_should_allow_partitioned_db() -> - ?_test(begin - meck:expect(couch_flags, is_enabled, 2, true), - Req = mock_request("/all-test21?partitioned=true"), - [Partitioned, _] = parse_partitioned_opt(Req), - ?assertEqual(Partitioned, {partitioned, true}) - end). - -t_should_throw_on_not_allowed_partitioned_db() -> - ?_test(begin - meck:expect(couch_flags, is_enabled, 2, false), - Req = mock_request("/all-test21?partitioned=true"), - Throw = {bad_request, <<"Partitioned feature is not enabled.">>}, - ?assertThrow(Throw, parse_partitioned_opt(Req)) - end). - -t_returns_empty_array_for_partitioned_false() -> - ?_test(begin - Req = mock_request("/all-test21?partitioned=false"), - ?assertEqual(parse_partitioned_opt(Req), []) - end). - -t_returns_empty_array_for_no_partitioned_qs() -> - ?_test(begin - Req = mock_request("/all-test21"), - ?assertEqual(parse_partitioned_opt(Req), []) - end). - --endif. diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index a39e31cd3..c78f6b779 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -113,39 +113,54 @@ maybe_add_csp_headers(Headers, _) -> Headers. handle_all_dbs_req(#httpd{method='GET'}=Req) -> - % TODO: Support args and options properly, transform - % this back into a fold call similar to the old - % version. - %% Args = couch_mrview_http:parse_params(Req, undefined), + #mrargs{ + start_key = StartKey, + end_key = EndKey, + direction = Dir, + limit = Limit, + skip = Skip + } = couch_mrview_http:parse_params(Req, undefined), + + Options = [ + {start_key, StartKey}, + {end_key, EndKey}, + {dir, Dir}, + {limit, Limit}, + {skip, Skip} + ], + % Eventually the Etag for this request will be derived % from the \xFFmetadataVersion key in fdb Etag = <<"foo">>, - %% Options = [{user_ctx, Req#httpd.user_ctx}], + {ok, Resp} = chttpd:etag_respond(Req, Etag, fun() -> - AllDbs = fabric2_db:list_dbs(), - chttpd:send_json(Req, AllDbs) - end); + {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [{"ETag",Etag}]), + Callback = fun all_dbs_callback/2, + Acc = #vacc{req=Req,resp=Resp}, + fabric2_db:list_dbs(Callback, Acc, Options) + end), + case is_record(Resp, vacc) of + true -> {ok, Resp#vacc.resp}; + _ -> {ok, Resp} + end; handle_all_dbs_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). -%% all_dbs_callback({meta, _Meta}, #vacc{resp=Resp0}=Acc) -> -%% {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), -%% {ok, Acc#vacc{resp=Resp1}}; -%% all_dbs_callback({row, Row}, #vacc{resp=Resp0}=Acc) -> -%% Prepend = couch_mrview_http:prepend_val(Acc), -%% case couch_util:get_value(id, Row) of <<"_design", _/binary>> -> -%% {ok, Acc}; -%% DbName -> -%% {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, [Prepend, ?JSON_ENCODE(DbName)]), -%% {ok, Acc#vacc{prepend=",", resp=Resp1}} -%% end; -%% all_dbs_callback(complete, #vacc{resp=Resp0}=Acc) -> -%% {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), -%% {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), -%% {ok, Acc#vacc{resp=Resp2}}; -%% all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> -%% {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), -%% {ok, Acc#vacc{resp=Resp1}}. +all_dbs_callback({meta, _Meta}, #vacc{resp=Resp0}=Acc) -> + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), + {ok, Acc#vacc{resp=Resp1}}; +all_dbs_callback({row, Row}, #vacc{resp=Resp0}=Acc) -> + Prepend = couch_mrview_http:prepend_val(Acc), + DbName = couch_util:get_value(id, Row), + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, [Prepend, ?JSON_ENCODE(DbName)]), + {ok, Acc#vacc{prepend=",", resp=Resp1}}; +all_dbs_callback(complete, #vacc{resp=Resp0}=Acc) -> + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), + {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), + {ok, Acc#vacc{resp=Resp2}}; +all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> + {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), + {ok, Acc#vacc{resp=Resp1}}. handle_dbs_info_req(#httpd{method='POST'}=Req) -> chttpd:validate_ctype(Req, "application/json"), diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 80028a645..eb74a183c 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -20,6 +20,7 @@ list_dbs/0, list_dbs/1, + list_dbs/3, is_admin/1, check_is_admin/1, @@ -194,8 +195,30 @@ list_dbs() -> list_dbs(Options) -> + Callback = fun(DbName, Acc) -> [DbName | Acc] end, + DbNames = fabric2_fdb:transactional(fun(Tx) -> + fabric2_fdb:list_dbs(Tx, Callback, [], Options) + end), + lists:reverse(DbNames). + + +list_dbs(UserFun, UserAcc0, Options) -> + FoldFun = fun + (DbName, Acc) -> maybe_stop(UserFun({row, [{id, DbName}]}, Acc)) + end, fabric2_fdb:transactional(fun(Tx) -> - fabric2_fdb:list_dbs(Tx, Options) + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + UserAcc2 = fabric2_fdb:list_dbs( + Tx, + FoldFun, + UserAcc1, + Options + ), + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end end). @@ -406,6 +429,7 @@ open_doc(#{} = Db, <> = DocId, _Options) -> open_doc(#{} = Db, DocId, Options) -> NeedsTreeOpts = [revs_info, conflicts, deleted_conflicts], NeedsTree = (Options -- NeedsTreeOpts /= Options), + OpenDeleted = lists:member(deleted, Options), fabric2_fdb:transactional(Db, fun(TxDb) -> Revs = case NeedsTree of true -> fabric2_fdb:get_all_revs(TxDb, DocId); @@ -414,6 +438,8 @@ open_doc(#{} = Db, DocId, Options) -> if Revs == [] -> {not_found, missing}; true -> #{winner := true} = RI = lists:last(Revs), case fabric2_fdb:get_doc_body(TxDb, DocId, RI) of + #doc{deleted = true} when not OpenDeleted -> + {not_found, deleted}; #doc{} = Doc -> apply_open_doc_opts(Doc, Revs, Options); Else -> @@ -451,8 +477,10 @@ open_doc_revs(Db, DocId, Revs, Options) -> rev_path => RevPath }, case fabric2_fdb:get_doc_body(TxDb, DocId, RevInfo) of - #doc{} = Doc -> {ok, Doc}; - Else -> {Else, {Pos, Rev}} + #doc{} = Doc -> + apply_open_doc_opts(Doc, AllRevInfos, Options); + Else -> + {Else, {Pos, Rev}} end end end, Found), @@ -615,9 +643,40 @@ fold_docs(Db, UserFun, UserAcc) -> fold_docs(Db, UserFun, UserAcc, []). -fold_docs(Db, UserFun, UserAcc, Options) -> +fold_docs(Db, UserFun, UserAcc0, Options) -> fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:fold_docs(TxDb, UserFun, UserAcc, Options) + try + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_ALL_DOCS}, DbPrefix), + DocCount = get_doc_count(TxDb), + + Meta = case lists:keyfind(update_seq, 1, Options) of + {_, true} -> + UpdateSeq = fabric2_db:get_update_seq(TxDb), + [{update_seq, UpdateSeq}]; + _ -> + [] + end ++ [{total, DocCount}, {offset, null}], + + UserAcc1 = maybe_stop(UserFun({meta, Meta}, UserAcc0)), + + UserAcc2 = fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> + {DocId} = erlfdb_tuple:unpack(K, Prefix), + RevId = erlfdb_tuple:unpack(V), + maybe_stop(UserFun({row, [ + {id, DocId}, + {key, DocId}, + {value, {[{rev, couch_doc:rev_to_str(RevId)}]}} + ]}, Acc)) + end, UserAcc1, Options), + + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end end). @@ -627,7 +686,44 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc) -> fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:fold_changes(TxDb, SinceSeq, UserFun, UserAcc, Options) + try + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_CHANGES}, DbPrefix), + + Dir = case fabric2_util:get_value(dir, Options, fwd) of + rev -> rev; + _ -> fwd + end, + + StartKey = get_since_seq(TxDb, Dir, SinceSeq), + EndKey = case Dir of + rev -> fabric2_util:seq_zero_vs(); + _ -> fabric2_util:seq_max_vs() + end, + FoldOpts = [ + {start_key, StartKey}, + {end_key, EndKey} + ] ++ Options, + + {ok, fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> + {SeqVS} = erlfdb_tuple:unpack(K, Prefix), + {DocId, Deleted, RevId} = erlfdb_tuple:unpack(V), + + Change = #{ + id => DocId, + sequence => fabric2_fdb:vs_to_seq(SeqVS), + rev_id => RevId, + deleted => Deleted + }, + + maybe_stop(UserFun(Change, Acc)) + end, UserAcc, FoldOpts)} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end end). @@ -796,7 +892,6 @@ apply_open_doc_opts(Doc, Revs, Options) -> IncludeConflicts = lists:member(conflicts, Options), IncludeDelConflicts = lists:member(deleted_conflicts, Options), IncludeLocalSeq = lists:member(local_seq, Options), - ReturnDeleted = lists:member(deleted, Options), % This revs_info becomes fairly useless now that we're % not keeping old document bodies around... @@ -827,14 +922,7 @@ apply_open_doc_opts(Doc, Revs, Options) -> [{local_seq, fabric2_fdb:vs_to_seq(SeqVS)}] end, - case Doc#doc.deleted and not ReturnDeleted of - true -> - {not_found, deleted}; - false -> - {ok, Doc#doc{ - meta = Meta1 ++ Meta2 ++ Meta3 ++ Meta4 - }} - end. + {ok, Doc#doc{meta = Meta1 ++ Meta2 ++ Meta3 ++ Meta4}}. filter_found_revs(RevInfo, Revs) -> @@ -1289,6 +1377,26 @@ check_duplicate_attachments(#doc{atts = Atts}) -> end, ordsets:new(), Atts). +get_since_seq(Db, rev, <<>>) -> + get_since_seq(Db, rev, now); + +get_since_seq(_Db, _Dir, Seq) when Seq == <<>>; Seq == <<"0">>; Seq == 0-> + fabric2_util:seq_zero_vs(); + +get_since_seq(Db, Dir, Seq) when Seq == now; Seq == <<"now">> -> + CurrSeq = fabric2_fdb:get_last_change(Db), + get_since_seq(Db, Dir, CurrSeq); + +get_since_seq(_Db, _Dir, Seq) when is_binary(Seq), size(Seq) == 24 -> + fabric2_fdb:next_vs(fabric2_fdb:seq_to_vs(Seq)); + +get_since_seq(Db, Dir, List) when is_list(List) -> + get_since_seq(Db, Dir, list_to_binary(List)); + +get_since_seq(_Db, _Dir, Seq) -> + erlang:error({invalid_since_seq, Seq}). + + get_leaf_path(Pos, Rev, [{Pos, [{Rev, _RevInfo} | LeafPath]} | _]) -> LeafPath; get_leaf_path(Pos, Rev, [_WrongLeaf | RestLeafs]) -> @@ -1353,3 +1461,8 @@ rev(Rev) when is_list(Rev); is_binary(Rev) -> rev({Seq, Hash} = Rev) when is_integer(Seq), is_binary(Hash) -> Rev. + +maybe_stop({ok, Acc}) -> + Acc; +maybe_stop({stop, Acc}) -> + throw({stop, Acc}). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 4b0182646..670ce8b49 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -24,7 +24,7 @@ delete/1, exists/1, - list_dbs/2, + list_dbs/4, get_info/1, get_config/1, @@ -50,11 +50,13 @@ read_attachment/3, write_attachment/3, - fold_docs/4, - fold_changes/5, get_last_change/1, + fold_range/5, + vs_to_seq/1, + seq_to_vs/1, + next_vs/1, debug_cluster/0, debug_cluster/2 @@ -254,16 +256,15 @@ exists(#{name := DbName} = Db) when is_binary(DbName) -> end. -list_dbs(Tx, _Options) -> +list_dbs(Tx, Callback, AccIn, Options) -> Root = erlfdb_directory:root(), CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), LayerPrefix = erlfdb_directory:get_name(CouchDB), - {Start, End} = erlfdb_tuple:range({?ALL_DBS}, LayerPrefix), - Future = erlfdb:get_range(Tx, Start, End), - lists:map(fun({K, _V}) -> - {?ALL_DBS, DbName} = erlfdb_tuple:unpack(K, LayerPrefix), - DbName - end, erlfdb:wait(Future)). + Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), + fold_range({tx, Tx}, Prefix, fun({K, _V}, Acc) -> + {DbName} = erlfdb_tuple:unpack(K, Prefix), + Callback(DbName, Acc) + end, AccIn, Options). get_info(#{} = Db) -> @@ -508,24 +509,26 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> UpdateStatus = case {OldWinner, NewWinner} of {not_found, #{deleted := false}} -> created; + {not_found, #{deleted := true}} -> + deleted; {#{deleted := true}, #{deleted := false}} -> recreated; {#{deleted := false}, #{deleted := false}} -> updated; {#{deleted := false}, #{deleted := true}} -> + deleted; + {#{deleted := true}, #{deleted := true}} -> deleted end, case UpdateStatus of - Status when Status == created orelse Status == recreated -> - ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), - ADVal = erlfdb_tuple:pack(NewRevId), - ok = erlfdb:set(Tx, ADKey, ADVal); deleted -> ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), ok = erlfdb:clear(Tx, ADKey); - updated -> - ok + _ -> + ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), + ADVal = erlfdb_tuple:pack(NewRevId), + ok = erlfdb:set(Tx, ADKey, ADVal) end, % _changes @@ -640,84 +643,6 @@ write_attachment(#{} = Db, DocId, Data) when is_binary(Data) -> {ok, AttId}. -fold_docs(#{} = Db, UserFun, UserAcc0, Options) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = ensure_current(Db), - - {Reverse, Start, End} = get_dir_and_bounds(DbPrefix, Options), - - DocCountKey = erlfdb_tuple:pack({?DB_STATS, <<"doc_count">>}, DbPrefix), - DocCountBin = erlfdb:wait(erlfdb:get(Tx, DocCountKey)), - - try - UserAcc1 = maybe_stop(UserFun({meta, [ - {total, ?bin2uint(DocCountBin)}, - {offset, null} - ]}, UserAcc0)), - - UserAcc2 = erlfdb:fold_range(Tx, Start, End, fun({K, V}, UserAccIn) -> - {?DB_ALL_DOCS, DocId} = erlfdb_tuple:unpack(K, DbPrefix), - RevId = erlfdb_tuple:unpack(V), - maybe_stop(UserFun({row, [ - {id, DocId}, - {key, DocId}, - {value, couch_doc:rev_to_str(RevId)} - ]}, UserAccIn)) - end, UserAcc1, [{reverse, Reverse}] ++ Options), - - {ok, maybe_stop(UserFun(complete, UserAcc2))} - catch throw:{stop, FinalUserAcc} -> - {ok, FinalUserAcc} - end. - - -fold_changes(#{} = Db, SinceSeq0, UserFun, UserAcc0, Options) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = ensure_current(Db), - - SinceSeq1 = get_since_seq(SinceSeq0), - - Reverse = case fabric2_util:get_value(dir, Options, fwd) of - fwd -> false; - rev -> true - end, - - {Start0, End0} = case Reverse of - false -> {SinceSeq1, fabric2_util:seq_max_vs()}; - true -> {fabric2_util:seq_zero_vs(), SinceSeq1} - end, - - Start1 = erlfdb_tuple:pack({?DB_CHANGES, Start0}, DbPrefix), - End1 = erlfdb_tuple:pack({?DB_CHANGES, End0}, DbPrefix), - - {Start, End} = case Reverse of - false -> {erlfdb_key:first_greater_than(Start1), End1}; - true -> {Start1, erlfdb_key:first_greater_than(End1)} - end, - - try - {ok, erlfdb:fold_range(Tx, Start, End, fun({K, V}, UserAccIn) -> - {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(K, DbPrefix), - {DocId, Deleted, RevId} = erlfdb_tuple:unpack(V), - - Change = #{ - id => DocId, - sequence => vs_to_seq(SeqVS), - rev_id => RevId, - deleted => Deleted - }, - - maybe_stop(UserFun(Change, UserAccIn)) - end, UserAcc0, [{reverse, Reverse}] ++ Options)} - catch throw:{stop, FinalUserAcc} -> - {ok, FinalUserAcc} - end. - - get_last_change(#{} = Db) -> #{ tx := Tx, @@ -735,17 +660,57 @@ get_last_change(#{} = Db) -> end. -maybe_stop({ok, Acc}) -> - Acc; -maybe_stop({stop, Acc}) -> - throw({stop, Acc}). +fold_range(#{} = Db, RangePrefix, Callback, Acc, Options) -> + #{ + tx := Tx + } = ensure_current(Db), + fold_range({tx, Tx}, RangePrefix, Callback, Acc, Options); + +fold_range({tx, Tx}, RangePrefix, UserCallback, UserAcc, Options) -> + case fabric2_util:get_value(limit, Options) of + 0 -> + % FoundationDB treats a limit of 0 as unlimited + % so we have to guard for that here. + UserAcc; + _ -> + {Start, End, Skip, FoldOpts} = get_fold_opts(RangePrefix, Options), + Callback = fun fold_range_cb/2, + Acc = {skip, Skip, UserCallback, UserAcc}, + {skip, _, UserCallback, OutAcc} = + erlfdb:fold_range(Tx, Start, End, Callback, Acc, FoldOpts), + OutAcc + end. -vs_to_seq(VS) -> +vs_to_seq(VS) when is_tuple(VS) -> + % 51 is the versionstamp type tag <<51:8, SeqBin:12/binary>> = erlfdb_tuple:pack({VS}), fabric2_util:to_hex(SeqBin). +seq_to_vs(Seq) when is_binary(Seq) -> + Seq1 = fabric2_util:from_hex(Seq), + % 51 is the versionstamp type tag + Seq2 = <<51:8, Seq1/binary>>, + {VS} = erlfdb_tuple:unpack(Seq2), + VS. + + +next_vs({versionstamp, VS, Batch, TxId}) -> + {V, B, T} = case TxId =< 65535 of + true -> + {VS, Batch, TxId + 1}; + false -> + case Batch =< 65535 of + true -> + {VS, Batch + 1, 0}; + false -> + {VS + 1, 0, 0} + end + end, + {versionstamp, V, B, T}. + + debug_cluster() -> debug_cluster(<<>>, <<16#FE, 16#FF, 16#FF>>). @@ -753,7 +718,7 @@ debug_cluster() -> debug_cluster(Start, End) -> transactional(fun(Tx) -> lists:foreach(fun({Key, Val}) -> - io:format("~s => ~s~n", [ + io:format(standard_error, "~s => ~s~n", [ string:pad(erlfdb_util:repr(Key), 60), erlfdb_util:repr(Val) ]) @@ -790,7 +755,7 @@ load_validate_doc_funs(#{} = Db) -> {end_key, <<"_design0">>} ], - {ok, Infos1} = fold_docs(Db, FoldFun, [], Options), + {ok, Infos1} = fabric2_db:fold_docs(Db, FoldFun, [], Options), Infos2 = lists:map(fun(Info) -> #{ @@ -999,11 +964,12 @@ chunkify_attachment(Data) -> end. -get_dir_and_bounds(DbPrefix, Options) -> - Reverse = case fabric2_util:get_value(dir, Options, fwd) of - fwd -> false; - rev -> true +get_fold_opts(RangePrefix, Options) -> + Reverse = case fabric2_util:get_value(dir, Options) of + rev -> true; + _ -> false end, + StartKey0 = fabric2_util:get_value(start_key, Options), EndKeyGt = fabric2_util:get_value(end_key_gt, Options), EndKey0 = fabric2_util:get_value(end_key, Options, EndKeyGt), @@ -1019,17 +985,17 @@ get_dir_and_bounds(DbPrefix, Options) -> % Set the maximum bounds for the start and endkey StartKey2 = case StartKey1 of - undefined -> {?DB_ALL_DOCS}; - SK2 when is_binary(SK2) -> {?DB_ALL_DOCS, SK2} + undefined -> <<>>; + SK2 -> SK2 end, EndKey2 = case EndKey1 of - undefined -> {?DB_ALL_DOCS, <<16#FF>>}; - EK2 when is_binary(EK2) -> {?DB_ALL_DOCS, EK2} + undefined -> <<255>>; + EK2 -> EK2 end, - StartKey3 = erlfdb_tuple:pack(StartKey2, DbPrefix), - EndKey3 = erlfdb_tuple:pack(EndKey2, DbPrefix), + StartKey3 = erlfdb_tuple:pack({StartKey2}, RangePrefix), + EndKey3 = erlfdb_tuple:pack({EndKey2}, RangePrefix), % FoundationDB ranges are applied as SK <= key < EK % By default, CouchDB is SK <= key <= EK with the @@ -1056,26 +1022,46 @@ get_dir_and_bounds(DbPrefix, Options) -> EndKey3 end, - {Reverse, StartKey4, EndKey4}. + Skip = case fabric2_util:get_value(skip, Options) of + S when is_integer(S), S >= 0 -> S; + _ -> 0 + end, + Limit = case fabric2_util:get_value(limit, Options) of + L when is_integer(L), L >= 0 -> [{limit, L + Skip}]; + undefined -> [] + end, -get_since_seq(Seq) when Seq == <<>>; Seq == <<"0">>; Seq == 0-> - fabric2_util:seq_zero_vs(); + TargetBytes = case fabric2_util:get_value(target_bytes, Options) of + T when is_integer(T), T >= 0 -> [{target_bytes, T}]; + undefined -> [] + end, -get_since_seq(Seq) when Seq == now; Seq == <<"now">> -> - fabric2_util:seq_max_vs(); + StreamingMode = case fabric2_util:get_value(streaming_mode, Options) of + undefined -> []; + Name when is_atom(Name) -> [{streaming_mode, Name}] + end, + + Snapshot = case fabric2_util:get_value(snapshot, Options) of + undefined -> []; + B when is_boolean(B) -> [{snapshot, B}] + end, + + OutOpts = [{reverse, Reverse}] + ++ Limit + ++ TargetBytes + ++ StreamingMode + ++ Snapshot, + + {StartKey4, EndKey4, Skip, OutOpts}. -get_since_seq(Seq) when is_binary(Seq), size(Seq) == 24 -> - Seq1 = fabric2_util:from_hex(Seq), - Seq2 = <<51:8, Seq1/binary>>, - {SeqVS} = erlfdb_tuple:unpack(Seq2), - SeqVS; -get_since_seq(List) when is_list(List) -> - get_since_seq(list_to_binary(List)); +fold_range_cb(KV, {skip, 0, Callback, Acc}) -> + NewAcc = Callback(KV, Acc), + {skip, 0, Callback, NewAcc}; -get_since_seq(Seq) -> - erlang:error({invalid_since_seq, Seq}). +fold_range_cb(_KV, {skip, N, Callback, Acc}) when is_integer(N), N > 0 -> + {skip, N - 1, Callback, Acc}. get_db_handle() -> diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl index caa5f925a..ee0180f14 100644 --- a/src/fabric/test/fabric2_doc_fold_tests.erl +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -34,7 +34,10 @@ doc_fold_test_() -> fun fold_docs_with_start_key/1, fun fold_docs_with_end_key/1, fun fold_docs_with_both_keys_the_same/1, - fun fold_docs_with_different_keys/1 + fun fold_docs_with_different_keys/1, + fun fold_docs_with_limit/1, + fun fold_docs_with_skip/1, + fun fold_docs_with_skip_and_limit/1 ]} } }. @@ -50,7 +53,7 @@ setup() -> body = {[{<<"value">>, Val}]} }, {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), - {DocId, couch_doc:rev_to_str(Rev)} + {DocId, {[{rev, couch_doc:rev_to_str(Rev)}]}} end, lists:seq(1, ?DOC_COUNT)), {Db, lists:sort(DocIdRevs), Ctx}. @@ -108,11 +111,58 @@ fold_docs_with_different_keys({Db, DocIdRevs, _}) -> end, lists:seq(1, 500)). +fold_docs_with_limit({Db, DocIdRevs, _}) -> + lists:foreach(fun(Limit) -> + Opts1 = [{limit, Limit}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:sublist(DocIdRevs, Limit), lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts2), + ?assertEqual( + lists:sublist(lists:reverse(DocIdRevs), Limit), + lists:reverse(Rows2) + ) + end, lists:seq(0, 51)). + + +fold_docs_with_skip({Db, DocIdRevs, _}) -> + lists:foreach(fun(Skip) -> + Opts1 = [{skip, Skip}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), + Expect1 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, DocIdRevs) + end, + ?assertEqual(Expect1, lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, lists:reverse(DocIdRevs)) + end, + ?assertEqual(Expect2, lists:reverse(Rows2)) + end, lists:seq(0, 51)). + + +fold_docs_with_skip_and_limit({Db, DocIdRevs, _}) -> + lists:foreach(fun(_) -> + check_skip_and_limit(Db, [], DocIdRevs), + check_skip_and_limit(Db, [{dir, rev}], lists:reverse(DocIdRevs)) + end, lists:seq(1, 100)). + + check_all_combos(Db, StartKey, EndKey, Rows) -> Opts1 = make_opts(fwd, StartKey, EndKey, true), {ok, {?DOC_COUNT, Rows1}} = fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), ?assertEqual(lists:reverse(Rows), Rows1), + check_skip_and_limit(Db, Opts1, Rows), Opts2 = make_opts(fwd, StartKey, EndKey, false), {ok, {?DOC_COUNT, Rows2}} = @@ -121,11 +171,13 @@ check_all_combos(Db, StartKey, EndKey, Rows) -> lists:reverse(all_but_last(Rows)) end, ?assertEqual(Expect2, Rows2), + check_skip_and_limit(Db, Opts2, lists:reverse(Expect2)), Opts3 = make_opts(rev, StartKey, EndKey, true), {ok, {?DOC_COUNT, Rows3}} = fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts3), ?assertEqual(Rows, Rows3), + check_skip_and_limit(Db, Opts3, lists:reverse(Rows)), Opts4 = make_opts(rev, StartKey, EndKey, false), {ok, {?DOC_COUNT, Rows4}} = @@ -133,8 +185,34 @@ check_all_combos(Db, StartKey, EndKey, Rows) -> Expect4 = if StartKey == undefined -> Rows; true -> tl(Rows) end, - ?assertEqual(Expect4, Rows4). + ?assertEqual(Expect4, Rows4), + check_skip_and_limit(Db, Opts4, lists:reverse(Expect4)). + + +check_skip_and_limit(Db, Opts, []) -> + Skip = rand:uniform(?DOC_COUNT + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1) - 1, + NewOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, OutRows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], NewOpts), + ?assertEqual([], OutRows); + +check_skip_and_limit(Db, Opts, Rows) -> + Skip = rand:uniform(length(Rows) + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1 - Skip) - 1, + + ExpectRows = case Skip >= length(Rows) of + true -> + []; + false -> + lists:sublist(lists:nthtail(Skip, Rows), Limit) + end, + SkipLimitOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, RevRows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], SkipLimitOpts), + OutRows = lists:reverse(RevRows), + ?assertEqual(ExpectRows, OutRows). make_opts(fwd, StartKey, EndKey, InclusiveEnd) -> diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index a091dce55..91301d0b9 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -43,7 +43,8 @@ defmodule AllDocsTest do # Check _all_docs offset retry_until(fn -> resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => "\"2\""}).body - assert resp["offset"] == 2 + assert resp["offset"] == :null + assert Enum.at(resp["rows"], 0)["key"] == "2" end) # Confirm that queries may assume raw collation -- cgit v1.2.1 From 5c757619b6a9e266d71b0ec430db02e8ccaee53e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 11 Jul 2019 14:25:24 -0500 Subject: Fix bulk docs error reporting The existing logic around return codes and term formats is labyrinthine. This is the result of much trial and error to get the new logic to behave exactly the same as the previous implementation. --- src/chttpd/src/chttpd_db.erl | 2 + src/fabric/src/fabric2_db.erl | 108 +++++++++++++++++------------ src/fabric/test/fabric2_doc_crud_tests.erl | 20 ++---- 3 files changed, 73 insertions(+), 57 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 4bde3b007..f5618cf43 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1356,6 +1356,8 @@ update_doc_result_to_json(DocId, {ok, NewRev}) -> {[{ok, true}, {id, DocId}, {rev, couch_doc:rev_to_str(NewRev)}]}; update_doc_result_to_json(DocId, {accepted, NewRev}) -> {[{ok, true}, {id, DocId}, {rev, couch_doc:rev_to_str(NewRev)}, {accepted, true}]}; +update_doc_result_to_json(DocId, {{DocId, _}, Error}) -> + update_doc_result_to_json(DocId, Error); update_doc_result_to_json(DocId, Error) -> {_Code, ErrorStr, Reason} = chttpd:error_info(Error), {[{id, DocId}, {error, ErrorStr}, {reason, Reason}]}. diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index eb74a183c..3ea30e70f 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -584,46 +584,52 @@ update_docs(Db, Docs) -> update_docs(Db, Docs0, Options) -> Docs1 = apply_before_doc_update(Db, Docs0, Options), - Resps0 = case lists:member(replicated_changes, Options) of - false -> - fabric2_fdb:transactional(Db, fun(TxDb) -> - update_docs_interactive(TxDb, Docs1, Options) - end); - true -> - lists:map(fun(Doc) -> + try + validate_atomic_update(Docs0, lists:member(all_or_nothing, Options)), + Resps0 = case lists:member(replicated_changes, Options) of + false -> fabric2_fdb:transactional(Db, fun(TxDb) -> - update_doc_int(TxDb, Doc, Options) - end) - end, Docs1) - end, - % Convert errors - Resps1 = lists:map(fun(Resp) -> - case Resp of - {#doc{} = Doc, Error} -> - #doc{ - id = DocId, - revs = Revs - } = Doc, - RevId = case Revs of - {RevPos, [Rev | _]} -> {RevPos, Rev}; - {0, []} -> {0, <<>>} - end, - {{DocId, RevId}, Error}; - Else -> - Else + update_docs_interactive(TxDb, Docs1, Options) + end); + true -> + lists:map(fun(Doc) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + update_doc_int(TxDb, Doc, Options) + end) + end, Docs1) + end, + % Convert errors + Resps1 = lists:map(fun(Resp) -> + case Resp of + {#doc{} = Doc, Error} -> + #doc{ + id = DocId, + revs = Revs + } = Doc, + RevId = case Revs of + {RevPos, [Rev | _]} -> {RevPos, Rev}; + {0, []} -> {0, <<>>}; + Else -> Else + end, + {{DocId, RevId}, Error}; + Else -> + Else + end + end, Resps0), + case lists:member(replicated_changes, Options) of + true -> + {ok, lists:flatmap(fun(R) -> + case R of + {ok, []} -> []; + {{_, _}, {ok, []}} -> []; + Else -> [Else] + end + end, Resps1)}; + false -> + {ok, Resps1} end - end, Resps0), - case lists:member(replicated_changes, Options) of - true -> - {ok, [R || R <- Resps1, R /= {ok, []}]}; - false -> - Status = lists:foldl(fun(Resp, Acc) -> - case Resp of - {ok, _} -> Acc; - _ -> error - end - end, ok, Resps1), - {Status, Resps1} + catch throw:{aborted, Errors} -> + {aborted, Errors} end. @@ -1023,7 +1029,7 @@ update_docs_interactive(Db, #doc{id = <>} = Doc, update_docs_interactive(Db, Doc, Options, Futures, SeenIds) -> case lists:member(Doc#doc.id, SeenIds) of true -> - {{error, conflict}, SeenIds}; + {conflict, SeenIds}; false -> Future = maps:get(doc_tag(Doc), Futures), case update_doc_interactive(Db, Doc, Future, Options) of @@ -1066,12 +1072,12 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % Check that a revision was specified if required Doc0RevId = doc_to_revid(Doc0), if Doc0RevId /= {0, <<>>} orelse WinnerRevId == {0, <<>>} -> ok; true -> - ?RETURN({error, conflict}) + ?RETURN({Doc0, conflict}) end, % Check that we're not trying to create a deleted doc if Doc0RevId /= {0, <<>>} orelse not Doc0#doc.deleted -> ok; true -> - ?RETURN({error, conflict}) + ?RETURN({Doc0, conflict}) end, % Get the target revision to update @@ -1088,7 +1094,7 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % that we get not_found for a deleted revision % because we only check for the non-deleted % key in fdb - ?RETURN({error, conflict}) + ?RETURN({Doc0, conflict}) end end, @@ -1191,7 +1197,7 @@ update_doc_replicated(Db, Doc0, _Options) -> if Status /= internal_node -> ok; true -> % We already know this revision so nothing % left to do. - ?RETURN({ok, []}) + ?RETURN({Doc0, {ok, []}}) end, % Its possible to have a replication with fewer than $revs_limit @@ -1248,7 +1254,7 @@ update_doc_replicated(Db, Doc0, _Options) -> update_local_doc(Db, Doc0, _Options) -> Doc1 = case increment_local_doc_rev(Doc0) of {ok, Updated} -> Updated; - {error, _} = Error -> ?RETURN(Error) + {error, Error} -> ?RETURN({Doc0, Error}) end, ok = fabric2_fdb:write_local_doc(Db, Doc1), @@ -1367,6 +1373,20 @@ validate_ddoc(Db, DDoc) -> end. +validate_atomic_update(_, false) -> + ok; +validate_atomic_update(AllDocs, true) -> + % TODO actually perform the validation. This requires some hackery, we need + % to basically extract the prep_and_validate_updates function from couch_db + % and only run that, without actually writing in case of a success. + Error = {not_implemented, <<"all_or_nothing is not supported">>}, + PreCommitFailures = lists:map(fun(#doc{id=Id, revs = {Pos,Revs}}) -> + case Revs of [] -> RevId = <<>>; [RevId|_] -> ok end, + {{Id, {Pos, RevId}}, Error} + end, AllDocs), + throw({aborted, PreCommitFailures}). + + check_duplicate_attachments(#doc{atts = Atts}) -> lists:foldl(fun(Att, Names) -> Name = couch_att:fetch(name, Att), diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index 85b276679..c19c47421 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -408,7 +408,7 @@ conflict_on_create_new_with_rev({Db, _}) -> revs = {1, [fabric2_util:uuid()]}, body = {[{<<"foo">>, <<"bar">>}]} }, - ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc)). + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc)). conflict_on_update_with_no_rev({Db, _}) -> @@ -421,7 +421,7 @@ conflict_on_update_with_no_rev({Db, _}) -> revs = {0, []}, body = {[{<<"state">>, 2}]} }, - ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc2)). + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc2)). conflict_on_create_as_deleted({Db, _}) -> @@ -430,7 +430,7 @@ conflict_on_create_as_deleted({Db, _}) -> deleted = true, body = {[{<<"foo">>, <<"bar">>}]} }, - ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc)). + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc)). conflict_on_recreate_as_deleted({Db, _}) -> @@ -450,7 +450,7 @@ conflict_on_recreate_as_deleted({Db, _}) -> deleted = true, body = {[{<<"state">>, 3}]} }, - ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc3)). + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc3)). conflict_on_extend_deleted({Db, _}) -> @@ -470,7 +470,7 @@ conflict_on_extend_deleted({Db, _}) -> deleted = false, body = {[{<<"state">>, 3}]} }, - ?assertThrow({error, conflict}, fabric2_db:update_doc(Db, Doc3)). + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc3)). open_doc_revs_basic({Db, _}) -> @@ -725,18 +725,12 @@ create_local_doc_bad_rev({Db, _}) -> id = LDocId, revs = {0, [<<"not a number">>]} }, - ?assertThrow( - {error, <<"Invalid rev format">>}, - fabric2_db:update_doc(Db, Doc1) - ), + ?assertThrow(<<"Invalid rev format">>, fabric2_db:update_doc(Db, Doc1)), Doc2 = Doc1#doc{ revs = bad_bad_rev_roy_brown }, - ?assertThrow( - {error, <<"Invalid rev format">>}, - fabric2_db:update_doc(Db, Doc2) - ). + ?assertThrow(<<"Invalid rev format">>, fabric2_db:update_doc(Db, Doc2)). create_local_doc_random_rev({Db, _}) -> -- cgit v1.2.1 From 333cb3e6eb0718ffd452bcaa3a1b1f67879932d5 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 11 Jul 2019 15:03:12 -0500 Subject: Fix `COPY` method Simple function change to `fabric2_db:name/1` --- src/chttpd/src/chttpd_db.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index f5618cf43..de4cb9600 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1199,7 +1199,7 @@ db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> HttpCode = 202 end, % respond - DbName = couch_db:name(Db), + DbName = fabric2_db:name(Db), {PartRes} = update_doc_result_to_json(TargetDocId, {ok, NewTargetRev}), Loc = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName) ++ "/" ++ couch_util:url_encode(TargetDocId)), send_json(Req, HttpCode, -- cgit v1.2.1 From fbb7a297ce8ba5b8f3caad6e11a467c8b92258cf Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 12 Jul 2019 09:37:10 -0500 Subject: Fix revision tree extensions Previously I was forgetting to keep the previous history around which ended up limiting the revision depth to two. --- src/fabric/src/fabric2_db.erl | 19 +++++++++++-------- src/fabric/src/fabric2_util.erl | 9 +++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 3ea30e70f..43d555c0e 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1098,16 +1098,19 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> end end, - % When recreating a deleted document we want to extend - % the winning revision branch rather than create a - % new branch. If we did not do this we could be - % recreating into a state that previously existed. Doc1 = case Winner of #{deleted := true} when not Doc0#doc.deleted -> - {WinnerRevPos, WinnerRev} = maps:get(rev_id, Winner), - WinnerRevPath = maps:get(rev_path, Winner), - Doc0#doc{revs = {WinnerRevPos, [WinnerRev | WinnerRevPath]}}; - _ -> + % When recreating a deleted document we want to extend + % the winning revision branch rather than create a + % new branch. If we did not do this we could be + % recreating into a state that previously existed. + Doc0#doc{revs = fabric2_util:revinfo_to_revs(Winner)}; + #{} -> + % Otherwise we're extending the target's revision + % history with this update + Doc0#doc{revs = fabric2_util:revinfo_to_revs(Target)}; + not_found -> + % Creating a new doc means our revs start empty Doc0 end, diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index fb59d5923..48bf7d143 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -14,6 +14,7 @@ -export([ + revinfo_to_revs/1, revinfo_to_path/1, sort_revinfos/1, @@ -37,6 +38,14 @@ -include_lib("couch/include/couch_db.hrl"). +revinfo_to_revs(RevInfo) -> + #{ + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevInfo, + {RevPos, [Rev | RevPath]}. + + revinfo_to_path(RevInfo) -> #{ rev_id := {RevPos, Rev}, -- cgit v1.2.1 From 169ee08c0ec0bb5541fd0c34c5dce9ab24cfb60e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 12 Jul 2019 10:19:30 -0500 Subject: Implement `POST /_dbs_info` --- src/chttpd/src/chttpd_misc.erl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index c78f6b779..6e0d8cea2 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -162,7 +162,7 @@ all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), {ok, Acc#vacc{resp=Resp1}}. -handle_dbs_info_req(#httpd{method='POST'}=Req) -> +handle_dbs_info_req(#httpd{method='POST', user_ctx=UserCtx}=Req) -> chttpd:validate_ctype(Req, "application/json"), Props = chttpd:json_body_obj(Req), Keys = couch_mrview_util:get_view_keys(Props), @@ -179,13 +179,14 @@ handle_dbs_info_req(#httpd{method='POST'}=Req) -> {ok, Resp} = chttpd:start_json_response(Req, 200), send_chunk(Resp, "["), lists:foldl(fun(DbName, AccSeparator) -> - case catch fabric:get_db_info(DbName) of - {ok, Result} -> - Json = ?JSON_ENCODE({[{key, DbName}, {info, {Result}}]}), - send_chunk(Resp, AccSeparator ++ Json); - _ -> - Json = ?JSON_ENCODE({[{key, DbName}, {error, not_found}]}), - send_chunk(Resp, AccSeparator ++ Json) + try + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + {ok, Info} = fabric2_db:get_db_info(Db), + Json = ?JSON_ENCODE({[{key, DbName}, {info, {Info}}]}), + send_chunk(Resp, AccSeparator ++ Json) + catch error:database_does_not_exist -> + ErrJson = ?JSON_ENCODE({[{key, DbName}, {error, not_found}]}), + send_chunk(Resp, AccSeparator ++ ErrJson) end, "," % AccSeparator now has a comma end, "", Keys), -- cgit v1.2.1 From 9ec2435a4e44411f12882d87b650333bb672ea10 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 12 Jul 2019 10:25:35 -0500 Subject: Fix formatting of all_docs_test.exs --- test/elixir/test/all_docs_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index 91301d0b9..acf4f390e 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -44,7 +44,7 @@ defmodule AllDocsTest do retry_until(fn -> resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => "\"2\""}).body assert resp["offset"] == :null - assert Enum.at(resp["rows"], 0)["key"] == "2" + assert Enum.at(resp["rows"], 0)["key"] == "2" end) # Confirm that queries may assume raw collation -- cgit v1.2.1 From 67139d11ebc538611709a72f96b23f3ea5f6d819 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 12 Jul 2019 12:42:17 -0500 Subject: Reinitialize chttpd_auth_cache on config change The old test got around this by using couch_httpd_auth cache in its tests which is fairly odd given that we run chttpd_auth_cache in production. This fixes that mistake and upgrades chttpd_auth_cache so that it works in the test scenario of changing the authentication_db configuration. --- src/chttpd/src/chttpd_auth_cache.erl | 58 +++++++++++++++++++++++---- src/fabric/src/fabric2_db.erl | 6 ++- test/elixir/test/security_validation_test.exs | 4 +- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/src/chttpd/src/chttpd_auth_cache.erl b/src/chttpd/src/chttpd_auth_cache.erl index c72b12667..c5a56bddb 100644 --- a/src/chttpd/src/chttpd_auth_cache.erl +++ b/src/chttpd/src/chttpd_auth_cache.erl @@ -12,16 +12,19 @@ -module(chttpd_auth_cache). -behaviour(gen_server). +-behaviour(config_listener). -export([start_link/0, get_user_creds/2, update_user_creds/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([listen_for_changes/1, changes_callback/2]). +-export([handle_config_change/5, handle_config_terminate/3]). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_js_functions.hrl"). -define(CACHE, chttpd_auth_cache_lru). +-define(RELISTEN_DELAY, 5000). -record(state, { changes_pid, @@ -101,17 +104,28 @@ maybe_increment_auth_cache_miss(UserName) -> %% gen_server callbacks init([]) -> - try - fabric2_db:open(dbname(), [?ADMIN_CTX]) - catch error:database_does_not_exist -> - case fabric2_db:create(dbname(), [?ADMIN_CTX]) of - {ok, _} -> ok; - {error, file_exists} -> ok - end - end, + ensure_auth_db(), + ok = config:listen_for_changes(?MODULE, nil), self() ! {start_listener, 0}, {ok, #state{}}. +handle_call(reinit_cache, _From, State) -> + #state{ + changes_pid = Pid + } = State, + + % The database may currently be cached. This + % ensures that we've removed it so that the + % system db callbacks are installed. + fabric2_server:remove(dbname()), + + ensure_auth_db(), + ets_lru:clear(?CACHE), + exit(Pid, shutdown), + self() ! {start_listener, 0}, + + {reply, ok, State#state{changes_pid = undefined}}; + handle_call(_Call, _From, State) -> {noreply, State}. @@ -133,6 +147,9 @@ handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> {noreply, State#state{last_seq=Seq}}; handle_info({start_listener, Seq}, State) -> {noreply, State#state{changes_pid = spawn_changes(Seq)}}; +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; handle_info(_Msg, State) -> {noreply, State}. @@ -184,6 +201,19 @@ changes_callback({timeout, _ResponseType}, Acc) -> changes_callback({error, _}, EndSeq) -> exit({seq, EndSeq}). + +handle_config_change("chttpd_auth", "authentication_db", _DbName, _, _) -> + {ok, gen_server:call(?MODULE, reinit_cache, infinity)}; +handle_config_change(_, _, _, _, _) -> + {ok, nil}. + +handle_config_terminate(_, stop, _) -> + ok; +handle_config_terminate(_Server, _Reason, _State) -> + Dst = whereis(?MODULE), + erlang:send_after(?RELISTEN_DELAY, Dst, restart_config_listener). + + load_user_from_db(UserName) -> {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), try fabric2_db:open_doc(Db, docid(UserName), [conflicts]) of @@ -197,6 +227,18 @@ load_user_from_db(UserName) -> nil end. + +ensure_auth_db() -> + try + fabric2_db:open(dbname(), [?ADMIN_CTX]) + catch error:database_does_not_exist -> + case fabric2_db:create(dbname(), [?ADMIN_CTX]) of + {ok, _} -> ok; + {error, file_exists} -> ok + end + end. + + dbname() -> DbNameStr = config:get("chttpd_auth", "authentication_db", "_users"), iolist_to_binary(DbNameStr). diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 43d555c0e..711490307 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -736,10 +736,14 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> maybe_add_sys_db_callbacks(Db) -> IsReplicatorDb = fabric2_util:dbname_ends_with(Db, <<"_replicator">>), + AuthenticationDb = config:get("chttpd_auth", "authentication_db"), + IsAuthCache = if AuthenticationDb == undefined -> false; true -> + name(Db) == ?l2b(AuthenticationDb) + end, CfgUsersSuffix = config:get("couchdb", "users_db_suffix", "_users"), IsCfgUsersDb = fabric2_util:dbname_ends_with(Db, ?l2b(CfgUsersSuffix)), IsGlobalUsersDb = fabric2_util:dbname_ends_with(Db, <<"_users">>), - IsUsersDb = IsCfgUsersDb orelse IsGlobalUsersDb, + IsUsersDb = IsAuthCache orelse IsCfgUsersDb orelse IsGlobalUsersDb, {BDU, ADR} = if IsReplicatorDb -> diff --git a/test/elixir/test/security_validation_test.exs b/test/elixir/test/security_validation_test.exs index 0df3a780b..e10331477 100644 --- a/test/elixir/test/security_validation_test.exs +++ b/test/elixir/test/security_validation_test.exs @@ -53,9 +53,6 @@ defmodule SecurityValidationTest do on_exit(fn -> delete_db(auth_db_name) end) configs = [ - {"httpd", "authentication_handlers", - "{couch_httpd_auth, cookie_authentication_handler}, {couch_httpd_auth, default_authentication_handler}"}, - {"couch_httpd_auth", "authentication_db", auth_db_name}, {"chttpd_auth", "authentication_db", auth_db_name} ] @@ -72,6 +69,7 @@ defmodule SecurityValidationTest do Enum.each(users, fn {name, pass} -> doc = %{ :_id => "org.couchdb.user:#{name}", + :type => "user", :name => name, :roles => [], :password => pass -- cgit v1.2.1 From 80956487d34523a96fb6eecb1d8d23b8fe1a8354 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 12 Jul 2019 13:33:35 -0500 Subject: Disable broken couch_att tests --- src/couch/src/couch_att.erl | 374 ++++++++++++++++++++++---------------------- 1 file changed, 187 insertions(+), 187 deletions(-) diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index 90d364441..d3c8966c6 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -667,190 +667,190 @@ validate_attachment_size(_AttName, _AttSize, _MAxAttSize) -> ok. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -% Eww... --include("couch_bt_engine.hrl"). - -%% Test utilities - - -empty_att() -> new(). - - -upgraded_empty_att() -> - new([{headers, undefined}]). - - -%% Test groups - - -attachment_upgrade_test_() -> - {"Lazy record upgrade tests", [ - {"Existing record fields don't upgrade", - {with, empty_att(), [fun test_non_upgrading_fields/1]} - }, - {"New fields upgrade", - {with, empty_att(), [fun test_upgrading_fields/1]} - } - ]}. - - -attachment_defaults_test_() -> - {"Attachment defaults tests", [ - {"Records retain old default values", [ - {with, empty_att(), [fun test_legacy_defaults/1]} - ]}, - {"Upgraded records inherit defaults", [ - {with, upgraded_empty_att(), [fun test_legacy_defaults/1]} - ]}, - {"Undefined entries are elided on upgrade", [ - {with, upgraded_empty_att(), [fun test_elided_entries/1]} - ]} - ]}. - -attachment_field_api_test_() -> - {"Basic attachment field api", [ - fun test_construction/0, - fun test_store_and_fetch/0, - fun test_transform/0 - ]}. - - -attachment_disk_term_test_() -> - BaseAttachment = new([ - {name, <<"empty">>}, - {type, <<"application/octet-stream">>}, - {att_len, 0}, - {disk_len, 0}, - {md5, <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>}, - {revpos, 4}, - {data, {stream, {couch_bt_engine_stream, {fake_fd, fake_sp}}}}, - {encoding, identity} - ]), - BaseDiskTerm = { - <<"empty">>, - <<"application/octet-stream">>, - fake_sp, - 0, 0, 4, - <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>, - identity - }, - Headers = [{<<"X-Foo">>, <<"bar">>}], - ExtendedAttachment = store(headers, Headers, BaseAttachment), - ExtendedDiskTerm = {BaseDiskTerm, [{headers, Headers}]}, - FakeDb = test_util:fake_db([{engine, {couch_bt_engine, #st{fd=fake_fd}}}]), - {"Disk term tests", [ - ?_assertEqual(BaseDiskTerm, to_disk_term(BaseAttachment)), - ?_assertEqual(BaseAttachment, from_disk_term(FakeDb, BaseDiskTerm)), - ?_assertEqual(ExtendedDiskTerm, to_disk_term(ExtendedAttachment)), - ?_assertEqual(ExtendedAttachment, from_disk_term(FakeDb, ExtendedDiskTerm)) - ]}. - - -attachment_json_term_test_() -> - Props = [ - {<<"content_type">>, <<"application/json">>}, - {<<"digest">>, <<"md5-QCNtWUNXV0UzJnEjMk92YUk1JA==">>}, - {<<"length">>, 14}, - {<<"revpos">>, 1} - ], - PropsInline = [{<<"data">>, <<"eyJhbnN3ZXIiOiA0Mn0=">>}] ++ Props, - InvalidProps = [{<<"data">>, <<"!Base64Encoded$">>}] ++ Props, - Att = couch_att:new([ - {name, <<"attachment.json">>}, - {type, <<"application/json">>} - ]), - ResultStub = couch_att:new([ - {name, <<"attachment.json">>}, - {type, <<"application/json">>}, - {att_len, 14}, - {disk_len, 14}, - {md5, <<"@#mYCWWE3&q#2OvaI5$">>}, - {revpos, 1}, - {data, stub}, - {encoding, identity} - ]), - ResultFollows = ResultStub#att{data = follows}, - ResultInline = ResultStub#att{md5 = <<>>, data = <<"{\"answer\": 42}">>}, - {"JSON term tests", [ - ?_assertEqual(ResultStub, stub_from_json(Att, Props)), - ?_assertEqual(ResultFollows, follow_from_json(Att, Props)), - ?_assertEqual(ResultInline, inline_from_json(Att, PropsInline)), - ?_assertThrow({bad_request, _}, inline_from_json(Att, Props)), - ?_assertThrow({bad_request, _}, inline_from_json(Att, InvalidProps)) - ]}. - - -attachment_stub_merge_test_() -> - %% Stub merging needs to demonstrate revpos matching, skipping, and missing - %% attachment errors. - {"Attachment stub merging tests", []}. - - -%% Test generators - - -test_non_upgrading_fields(Attachment) -> - Pairs = [ - {name, "cat.gif"}, - {type, "text/very-very-plain"}, - {att_len, 1024}, - {disk_len, 42}, - {md5, <<"md5-hashhashhash">>}, - {revpos, 4}, - {data, stub}, - {encoding, gzip} - ], - lists:foreach( - fun({Field, Value}) -> - ?assertMatch(#att{}, Attachment), - Updated = store(Field, Value, Attachment), - ?assertMatch(#att{}, Updated) - end, - Pairs). - - -test_upgrading_fields(Attachment) -> - ?assertMatch(#att{}, Attachment), - UpdatedHeaders = store(headers, [{<<"Ans">>, <<"42">>}], Attachment), - ?assertMatch(X when is_list(X), UpdatedHeaders), - UpdatedHeadersUndefined = store(headers, undefined, Attachment), - ?assertMatch(X when is_list(X), UpdatedHeadersUndefined). - - -test_legacy_defaults(Attachment) -> - ?assertEqual(<<>>, fetch(md5, Attachment)), - ?assertEqual(0, fetch(revpos, Attachment)), - ?assertEqual(identity, fetch(encoding, Attachment)). - - -test_elided_entries(Attachment) -> - ?assertNot(lists:keymember(name, 1, Attachment)), - ?assertNot(lists:keymember(type, 1, Attachment)), - ?assertNot(lists:keymember(att_len, 1, Attachment)), - ?assertNot(lists:keymember(disk_len, 1, Attachment)), - ?assertNot(lists:keymember(data, 1, Attachment)). - - -test_construction() -> - ?assert(new() == new()), - Initialized = new([{name, <<"foo.bar">>}, {type, <<"application/qux">>}]), - ?assertEqual(<<"foo.bar">>, fetch(name, Initialized)), - ?assertEqual(<<"application/qux">>, fetch(type, Initialized)). - - -test_store_and_fetch() -> - Attachment = empty_att(), - ?assertEqual(<<"abc">>, fetch(name, store(name, <<"abc">>, Attachment))), - ?assertEqual(42, fetch(ans, store(ans, 42, Attachment))). - - -test_transform() -> - Attachment = new([{counter, 0}]), - Transformed = transform(counter, fun(Count) -> Count + 1 end, Attachment), - ?assertEqual(1, fetch(counter, Transformed)). - - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% % Eww... +%% -include("couch_bt_engine.hrl"). +%% +%% %% Test utilities +%% +%% +%% empty_att() -> new(). +%% +%% +%% upgraded_empty_att() -> +%% new([{headers, undefined}]). +%% +%% +%% %% Test groups +%% +%% +%% attachment_upgrade_test_() -> +%% {"Lazy record upgrade tests", [ +%% {"Existing record fields don't upgrade", +%% {with, empty_att(), [fun test_non_upgrading_fields/1]} +%% }, +%% {"New fields upgrade", +%% {with, empty_att(), [fun test_upgrading_fields/1]} +%% } +%% ]}. +%% +%% +%% attachment_defaults_test_() -> +%% {"Attachment defaults tests", [ +%% {"Records retain old default values", [ +%% {with, empty_att(), [fun test_legacy_defaults/1]} +%% ]}, +%% {"Upgraded records inherit defaults", [ +%% {with, upgraded_empty_att(), [fun test_legacy_defaults/1]} +%% ]}, +%% {"Undefined entries are elided on upgrade", [ +%% {with, upgraded_empty_att(), [fun test_elided_entries/1]} +%% ]} +%% ]}. +%% +%% attachment_field_api_test_() -> +%% {"Basic attachment field api", [ +%% fun test_construction/0, +%% fun test_store_and_fetch/0, +%% fun test_transform/0 +%% ]}. +%% +%% +%% attachment_disk_term_test_() -> +%% BaseAttachment = new([ +%% {name, <<"empty">>}, +%% {type, <<"application/octet-stream">>}, +%% {att_len, 0}, +%% {disk_len, 0}, +%% {md5, <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>}, +%% {revpos, 4}, +%% {data, {stream, {couch_bt_engine_stream, {fake_fd, fake_sp}}}}, +%% {encoding, identity} +%% ]), +%% BaseDiskTerm = { +%% <<"empty">>, +%% <<"application/octet-stream">>, +%% fake_sp, +%% 0, 0, 4, +%% <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>, +%% identity +%% }, +%% Headers = [{<<"X-Foo">>, <<"bar">>}], +%% ExtendedAttachment = store(headers, Headers, BaseAttachment), +%% ExtendedDiskTerm = {BaseDiskTerm, [{headers, Headers}]}, +%% FakeDb = test_util:fake_db([{engine, {couch_bt_engine, #st{fd=fake_fd}}}]), +%% {"Disk term tests", [ +%% ?_assertEqual(BaseDiskTerm, to_disk_term(BaseAttachment)), +%% ?_assertEqual(BaseAttachment, from_disk_term(FakeDb, BaseDiskTerm)), +%% ?_assertEqual(ExtendedDiskTerm, to_disk_term(ExtendedAttachment)), +%% ?_assertEqual(ExtendedAttachment, from_disk_term(FakeDb, ExtendedDiskTerm)) +%% ]}. +%% +%% +%% attachment_json_term_test_() -> +%% Props = [ +%% {<<"content_type">>, <<"application/json">>}, +%% {<<"digest">>, <<"md5-QCNtWUNXV0UzJnEjMk92YUk1JA==">>}, +%% {<<"length">>, 14}, +%% {<<"revpos">>, 1} +%% ], +%% PropsInline = [{<<"data">>, <<"eyJhbnN3ZXIiOiA0Mn0=">>}] ++ Props, +%% InvalidProps = [{<<"data">>, <<"!Base64Encoded$">>}] ++ Props, +%% Att = couch_att:new([ +%% {name, <<"attachment.json">>}, +%% {type, <<"application/json">>} +%% ]), +%% ResultStub = couch_att:new([ +%% {name, <<"attachment.json">>}, +%% {type, <<"application/json">>}, +%% {att_len, 14}, +%% {disk_len, 14}, +%% {md5, <<"@#mYCWWE3&q#2OvaI5$">>}, +%% {revpos, 1}, +%% {data, stub}, +%% {encoding, identity} +%% ]), +%% ResultFollows = ResultStub#att{data = follows}, +%% ResultInline = ResultStub#att{md5 = <<>>, data = <<"{\"answer\": 42}">>}, +%% {"JSON term tests", [ +%% ?_assertEqual(ResultStub, stub_from_json(Att, Props)), +%% ?_assertEqual(ResultFollows, follow_from_json(Att, Props)), +%% ?_assertEqual(ResultInline, inline_from_json(Att, PropsInline)), +%% ?_assertThrow({bad_request, _}, inline_from_json(Att, Props)), +%% ?_assertThrow({bad_request, _}, inline_from_json(Att, InvalidProps)) +%% ]}. +%% +%% +%% attachment_stub_merge_test_() -> +%% %% Stub merging needs to demonstrate revpos matching, skipping, and missing +%% %% attachment errors. +%% {"Attachment stub merging tests", []}. +%% +%% +%% %% Test generators +%% +%% +%% test_non_upgrading_fields(Attachment) -> +%% Pairs = [ +%% {name, "cat.gif"}, +%% {type, "text/very-very-plain"}, +%% {att_len, 1024}, +%% {disk_len, 42}, +%% {md5, <<"md5-hashhashhash">>}, +%% {revpos, 4}, +%% {data, stub}, +%% {encoding, gzip} +%% ], +%% lists:foreach( +%% fun({Field, Value}) -> +%% ?assertMatch(#att{}, Attachment), +%% Updated = store(Field, Value, Attachment), +%% ?assertMatch(#att{}, Updated) +%% end, +%% Pairs). +%% +%% +%% test_upgrading_fields(Attachment) -> +%% ?assertMatch(#att{}, Attachment), +%% UpdatedHeaders = store(headers, [{<<"Ans">>, <<"42">>}], Attachment), +%% ?assertMatch(X when is_list(X), UpdatedHeaders), +%% UpdatedHeadersUndefined = store(headers, undefined, Attachment), +%% ?assertMatch(X when is_list(X), UpdatedHeadersUndefined). +%% +%% +%% test_legacy_defaults(Attachment) -> +%% ?assertEqual(<<>>, fetch(md5, Attachment)), +%% ?assertEqual(0, fetch(revpos, Attachment)), +%% ?assertEqual(identity, fetch(encoding, Attachment)). +%% +%% +%% test_elided_entries(Attachment) -> +%% ?assertNot(lists:keymember(name, 1, Attachment)), +%% ?assertNot(lists:keymember(type, 1, Attachment)), +%% ?assertNot(lists:keymember(att_len, 1, Attachment)), +%% ?assertNot(lists:keymember(disk_len, 1, Attachment)), +%% ?assertNot(lists:keymember(data, 1, Attachment)). +%% +%% +%% test_construction() -> +%% ?assert(new() == new()), +%% Initialized = new([{name, <<"foo.bar">>}, {type, <<"application/qux">>}]), +%% ?assertEqual(<<"foo.bar">>, fetch(name, Initialized)), +%% ?assertEqual(<<"application/qux">>, fetch(type, Initialized)). +%% +%% +%% test_store_and_fetch() -> +%% Attachment = empty_att(), +%% ?assertEqual(<<"abc">>, fetch(name, store(name, <<"abc">>, Attachment))), +%% ?assertEqual(42, fetch(ans, store(ans, 42, Attachment))). +%% +%% +%% test_transform() -> +%% Attachment = new([{counter, 0}]), +%% Transformed = transform(counter, fun(Count) -> Count + 1 end, Attachment), +%% ?assertEqual(1, fetch(counter, Transformed)). +%% +%% +%% -endif. -- cgit v1.2.1 From 112f598c69fde4610a734ac044d207a93f181ec6 Mon Sep 17 00:00:00 2001 From: Russell Branca Date: Tue, 2 Jul 2019 13:31:33 -0700 Subject: Expose ICU ucol_getSortKey --- src/couch/priv/icu_driver/couch_icu_driver.c | 74 ++++++++++++-- src/couch/src/couch_util.erl | 13 ++- src/couch/test/eunit/couch_util_tests.erl | 140 +++++++++++++++++++++++++++ 3 files changed, 219 insertions(+), 8 deletions(-) diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c index 4d9bb982d..ffccf2e9d 100644 --- a/src/couch/priv/icu_driver/couch_icu_driver.c +++ b/src/couch/priv/icu_driver/couch_icu_driver.c @@ -30,6 +30,8 @@ specific language governing permissions and limitations under the License. #include /* for memcpy */ #endif +#define BUFFER_SIZE 1024 + typedef struct { ErlDrvPort port; @@ -54,6 +56,8 @@ static ErlDrvData couch_drv_start(ErlDrvPort port, char *buff) UErrorCode status = U_ZERO_ERROR; couch_drv_data* pData = (couch_drv_data*)driver_alloc(sizeof(couch_drv_data)); + set_port_control_flags(port, PORT_CONTROL_FLAG_BINARY); + if (pData == NULL) return ERL_DRV_ERROR_GENERAL; @@ -84,14 +88,17 @@ ErlDrvSSizeT return_control_result(void* pLocalResult, int localLen, char **ppRetBuf, ErlDrvSizeT returnLen) { + ErlDrvBinary* buf = NULL; + if (*ppRetBuf == NULL || localLen > returnLen) { - *ppRetBuf = (char*)driver_alloc_binary(localLen); - if(*ppRetBuf == NULL) { - return -1; - } + buf = driver_alloc_binary(localLen); + memcpy(buf->orig_bytes, pLocalResult, localLen); + *ppRetBuf = (char*) buf; + return localLen; + } else { + memcpy(*ppRetBuf, pLocalResult, localLen); + return localLen; } - memcpy(*ppRetBuf, pLocalResult, localLen); - return localLen; } static ErlDrvSSizeT @@ -147,6 +154,61 @@ couch_drv_control(ErlDrvData drv_data, unsigned int command, return return_control_result(&response, sizeof(response), rbuf, rlen); } + case 2: /* GET_SORT_KEY: */ + { + + UChar source[BUFFER_SIZE]; + UChar* sourcePtr = source; + int32_t sourceLen = BUFFER_SIZE; + + uint8_t sortKey[BUFFER_SIZE]; + uint8_t* sortKeyPtr = sortKey; + int32_t sortKeyLen = BUFFER_SIZE; + + int32_t inputLen; + + UErrorCode status = U_ZERO_ERROR; + ErlDrvSSizeT res; + + /* first 32bits are the length */ + memcpy(&inputLen, pBuf, sizeof(inputLen)); + pBuf += sizeof(inputLen); + + u_strFromUTF8(sourcePtr, BUFFER_SIZE, &sourceLen, pBuf, inputLen, &status); + + if (sourceLen >= BUFFER_SIZE) { + /* reset status or next u_strFromUTF8 call will auto-fail */ + status = U_ZERO_ERROR; + sourcePtr = (UChar*) malloc(sourceLen * sizeof(UChar)); + u_strFromUTF8(sourcePtr, sourceLen, NULL, pBuf, inputLen, &status); + if (U_FAILURE(status)) { + rbuf = NULL; + return 0; + } + } else if (U_FAILURE(status)) { + rbuf = NULL; + return 0; + } + + sortKeyLen = ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, BUFFER_SIZE); + + if (sortKeyLen > BUFFER_SIZE) { + sortKeyPtr = (uint8_t*) malloc(sortKeyLen); + ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, sortKeyLen); + } + + res = return_control_result(sortKeyPtr, sortKeyLen, rbuf, rlen); + + if (sourcePtr != source) { + free(sourcePtr); + } + + if (sortKeyPtr != sortKey) { + free(sortKeyPtr); + } + + return res; + } default: return -1; diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index dffb68152..ce0db4306 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -14,7 +14,7 @@ -export([priv_dir/0, normpath/1, fold_files/5]). -export([should_flush/0, should_flush/1, to_existing_atom/1]). --export([rand32/0, implode/2, collate/2, collate/3]). +-export([rand32/0, implode/2, collate/2, collate/3, get_sort_key/1]). -export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]). -export([encodeBase64Url/1, decodeBase64Url/1]). -export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]). @@ -407,11 +407,20 @@ collate(A, B, Options) when is_binary(A), is_binary(B) -> SizeA = byte_size(A), SizeB = byte_size(B), Bin = <>, - [Result] = erlang:port_control(drv_port(), Operation, Bin), + <> = erlang:port_control(drv_port(), Operation, Bin), % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the % expected typical -1, 0, 1 Result - 1. +get_sort_key(Str) when is_binary(Str) -> + Operation = 2, % get_sort_key + Size = byte_size(Str), + Bin = <>, + case erlang:port_control(drv_port(), Operation, Bin) of + <<>> -> error; + Res -> Res + end. + should_flush() -> should_flush(?FLUSH_MAX_MEM). diff --git a/src/couch/test/eunit/couch_util_tests.erl b/src/couch/test/eunit/couch_util_tests.erl index 012c961a4..e37691110 100644 --- a/src/couch/test/eunit/couch_util_tests.erl +++ b/src/couch/test/eunit/couch_util_tests.erl @@ -14,6 +14,12 @@ -include_lib("couch/include/couch_eunit.hrl"). +% For generating poisson distributed string lengths +% in the random unicode generation. This shoots +% for lengths centered around 24 characters. To +% change, replace this value with math:exp(-Length). +-define(POISSON_LIMIT, 3.775134544279098e-11). +-define(RANDOM_TEST_SIZE, 10000). setup() -> %% We cannot start driver from here since it becomes bounded to eunit @@ -175,3 +181,137 @@ json_decode_test_() -> ?_assertEqual({[]}, couch_util:json_decode(<<"{}">>, [])), ?_assertEqual(#{}, couch_util:json_decode(<<"{}">>, [return_maps])) ]. + +sort_key_test_() -> + { + "Sort Key tests", + [ + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_get_sort_key/1, + fun test_get_sort_key_jiffy_string/1, + fun test_get_sort_key_fails_on_bad_input/1, + fun test_get_sort_key_longer_than_buffer/1, + fun test_sort_key_collation/1, + fun test_sort_key_list_sort/1 + ] + } + ] + }. + +test_get_sort_key(_) -> + Strs = [ + <<"">>, + <<"foo">>, + <<"bar">>, + <<"Bar">>, + <<"baz">>, + <<"BAZ">>, + <<"quaz">>, + <<"1234fdsa">>, + <<"1234">>, + <<"pizza">> + ], + Pairs = [{S1, S2} || S1 <- Strs, S2 <- Strs], + lists:map(fun({S1, S2}) -> + S1K = couch_util:get_sort_key(S1), + S2K = couch_util:get_sort_key(S2), + SortRes = sort_keys(S1K, S2K), + Comment = list_to_binary(io_lib:format("strcmp(~p, ~p)", [S1, S2])), + CollRes = couch_util:collate(S1, S2), + {Comment, ?_assertEqual(SortRes, CollRes)} + end, Pairs). + +test_get_sort_key_jiffy_string(_) -> + %% jiffy:decode does not null terminate strings + %% so we use it here to test unterminated strings + {[{S1,S2}]} = jiffy:decode(<<"{\"foo\": \"bar\"}">>), + S1K = couch_util:get_sort_key(S1), + S2K = couch_util:get_sort_key(S2), + SortRes = sort_keys(S1K, S2K), + CollRes = couch_util:collate(S1, S2), + ?_assertEqual(SortRes, CollRes). + +test_get_sort_key_fails_on_bad_input(_) -> + %% generated with crypto:strong_rand_bytes + %% contains invalid character, should error + S = <<209,98,222,144,60,163,72,134,206,157>>, + Res = couch_util:get_sort_key(S), + ?_assertEqual(error, Res). + +test_get_sort_key_longer_than_buffer(_) -> + %% stack allocated buffer is 1024 units + %% test resize logic with strings > 1024 char + Extra = list_to_binary(["a" || _ <- lists:seq(1, 1200)]), + ?_assert(is_binary(Extra)). + +test_sort_key_collation(_) -> + ?_test(begin + lists:foreach(fun(_) -> + K1 = random_unicode_binary(), + SK1 = couch_util:get_sort_key(K1), + + K2 = random_unicode_binary(), + SK2 = couch_util:get_sort_key(K2), + + % Probably kinda silly but whatevs + ?assertEqual(couch_util:collate(K1, K1), sort_keys(SK1, SK1)), + ?assertEqual(couch_util:collate(K2, K2), sort_keys(SK2, SK2)), + + ?assertEqual(couch_util:collate(K1, K2), sort_keys(SK1, SK2)), + ?assertEqual(couch_util:collate(K2, K1), sort_keys(SK2, SK1)) + end, lists:seq(1, ?RANDOM_TEST_SIZE)) + end). + +test_sort_key_list_sort(_) -> + ?_test(begin + RandomKeys = lists:map(fun(_) -> + random_unicode_binary() + end, lists:seq(1, ?RANDOM_TEST_SIZE)), + + CollationSorted = lists:sort(fun(A, B) -> + couch_util:collate(A, B) =< 0 + end, RandomKeys), + + SortKeys = lists:map(fun(K) -> + {couch_util:get_sort_key(K), K} + end, RandomKeys), + {_, SortKeySorted} = lists:unzip(lists:sort(SortKeys)), + + ?assertEqual(CollationSorted, SortKeySorted) + end). + +sort_keys(S1, S2) -> + case S1 < S2 of + true -> + -1; + false -> case S1 =:= S2 of + true -> + 0; + false -> + 1 + end + end. + +random_unicode_binary() -> + Size = poisson_length(0, rand:uniform()), + Chars = [random_unicode_char() || _ <- lists:seq(1, Size)], + <<_/binary>> = unicode:characters_to_binary(Chars). + +poisson_length(N, Acc) when Acc > ?POISSON_LIMIT -> + poisson_length(N + 1, Acc * rand:uniform()); +poisson_length(N, _) -> + N. + +random_unicode_char() -> + BaseChar = rand:uniform(16#FFFD + 1) - 1, + case BaseChar of + BC when BC >= 16#D800, BC =< 16#DFFF -> + % This range is reserved for surrogate pair + % encodings. + random_unicode_char(); + BC -> + BC + end. -- cgit v1.2.1 From daa2d5ea919228cde4570b5ef96d559046002307 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 16 Jul 2019 12:05:35 -0500 Subject: Fix more elixir tests --- src/chttpd/src/chttpd_db.erl | 8 ++++---- src/couch/src/couch_att.erl | 10 +++++++--- src/fabric/src/fabric2_db.erl | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index de4cb9600..6487e10af 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1065,7 +1065,7 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> couch_httpd:validate_referer(Req), - couch_db:validate_docid(Db, DocId), + couch_doc:validate_docid(DocId, fabric2_db:name(Db)), chttpd:validate_ctype(Req, "multipart/form-data"), Options = [{user_ctx,Ctx}], @@ -1125,7 +1125,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> update_type = UpdateType } = parse_doc_query(Req), DbName = fabric2_db:name(Db), - couch_doc:validate_docid(DocId), + couch_doc:validate_docid(DocId, fabric2_db:name(Db)), Options = [{user_ctx, Ctx}], @@ -1686,7 +1686,7 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) % check for the existence of the doc to handle the 404 case. couch_doc_open(Db, DocId, nil, []) end, - couch_db:validate_docid(Db, DocId), + couch_doc:validate_docid(DocId, fabric2_db:name(Db)), #doc{id=DocId}; Rev -> case fabric2_db:open_doc_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of @@ -2065,7 +2065,7 @@ bulk_get_open_doc_revs1(Db, Props, Options, {}) -> {null, {error, Error}, Options}; DocId -> try - couch_db:validate_docid(Db, DocId), + couch_doc:validate_docid(DocId, fabric2_db:name(Db)), bulk_get_open_doc_revs1(Db, Props, Options, {DocId}) catch throw:{Error, Reason} -> {DocId, {error, {null, Error, Reason}}, Options} diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index d3c8966c6..2c3336291 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -384,8 +384,12 @@ flush(Db, DocId, Att1) -> % If we were sent a gzip'ed attachment with no % length data, we have to set it here. Att3 = case DiskLen of - undefined -> store(disk_len, AttLen, Att2); - _ -> Att2 + undefined when AttLen /= undefined -> + store(disk_len, AttLen, Att2); + undefined when is_binary(Data) -> + store(disk_len, size(Data), Att2); + _ -> + Att2 end, % If no encoding has been set, default to @@ -537,7 +541,7 @@ range_foldl(Bin1, From, To, Fun, Acc) when is_binary(Bin1) -> ReadLen = To - From, Bin2 = case Bin1 of _ when size(Bin1) < From -> <<>>; - <<_:From/binary, B2>> -> B2 + <<_:From/binary, B2/binary>> -> B2 end, Bin3 = case Bin2 of _ when size(Bin2) < ReadLen -> Bin2; diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 711490307..3c3b7d3a5 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -897,7 +897,7 @@ get_members(SecProps) -> end. -apply_open_doc_opts(Doc, Revs, Options) -> +apply_open_doc_opts(Doc0, Revs, Options) -> IncludeRevsInfo = lists:member(revs_info, Options), IncludeConflicts = lists:member(conflicts, Options), IncludeDelConflicts = lists:member(deleted_conflicts, Options), @@ -906,7 +906,7 @@ apply_open_doc_opts(Doc, Revs, Options) -> % This revs_info becomes fairly useless now that we're % not keeping old document bodies around... Meta1 = if not IncludeRevsInfo -> []; true -> - {Pos, [Rev | RevPath]} = Doc#doc.revs, + {Pos, [Rev | RevPath]} = Doc0#doc.revs, RevPathMissing = lists:map(fun(R) -> {R, missing} end, RevPath), [{revs_info, Pos, [{Rev, available} | RevPathMissing]}] end, @@ -932,7 +932,36 @@ apply_open_doc_opts(Doc, Revs, Options) -> [{local_seq, fabric2_fdb:vs_to_seq(SeqVS)}] end, - {ok, Doc#doc{meta = Meta1 ++ Meta2 ++ Meta3 ++ Meta4}}. + Doc1 = case lists:keyfind(atts_since, 1, Options) of + {_, PossibleAncestors} -> + #doc{ + revs = DocRevs, + atts = Atts0 + } = Doc0, + RevPos = find_ancestor_rev_pos(DocRevs, PossibleAncestors), + Atts1 = lists:map(fun(Att) -> + [AttPos, Data] = couch_att:fetch([revpos, data], Att), + if AttPos > RevPos -> couch_att:store(data, Data, Att); + true -> couch_att:store(data, stub, Att) + end + end, Atts0), + Doc0#doc{atts = Atts1}; + false -> + Doc0 + end, + + {ok, Doc1#doc{meta = Meta1 ++ Meta2 ++ Meta3 ++ Meta4}}. + + +find_ancestor_rev_pos({_, []}, _PossibleAncestors) -> + 0; +find_ancestor_rev_pos(_DocRevs, []) -> + 0; +find_ancestor_rev_pos({RevPos, [RevId | Rest]}, AttsSinceRevs) -> + case lists:member({RevPos, RevId}, AttsSinceRevs) of + true -> RevPos; + false -> find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs) + end. filter_found_revs(RevInfo, Revs) -> -- cgit v1.2.1 From ba3e4053483d071cd8ca064060ef004a6392e1f2 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 23 Jul 2019 15:12:18 -0500 Subject: Make fabric2.hrl public --- src/fabric/include/fabric2.hrl | 66 ++++++++++++++++++++++++++++++++++++++++++ src/fabric/src/fabric2.hrl | 66 ------------------------------------------ 2 files changed, 66 insertions(+), 66 deletions(-) create mode 100644 src/fabric/include/fabric2.hrl delete mode 100644 src/fabric/src/fabric2.hrl diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl new file mode 100644 index 000000000..de1d3d177 --- /dev/null +++ b/src/fabric/include/fabric2.hrl @@ -0,0 +1,66 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-define(uint2bin(I), binary:encode_unsigned(I, little)). +-define(bin2uint(I), binary:decode_unsigned(I, little)). + +% This will eventually be the `\xFFmetadataVersion` key that is +% currently only available in FoundationDB master. +% +% https://forums.foundationdb.org/t/a-new-tool-for-managing-layer-metadata/1191 +% +% Until then we'll fake the same behavior using a randomish +% key for tracking metadata changse. Once we get to the +% new feature this will be more performant by updating +% this define. +-define(METADATA_VERSION_KEY, <<"$metadata_version_key$">>). + + +% Prefix Definitions + +% Layer Level: (LayerPrefix, X, ...) + +-define(CLUSTER_CONFIG, 0). +-define(ALL_DBS, 1). +-define(DBS, 15). +-define(TX_IDS, 255). + +% Database Level: (LayerPrefix, ?DBS, DbPrefix, X, ...) + +-define(DB_VERSION, 0). +-define(DB_CONFIG, 16). +-define(DB_STATS, 17). +-define(DB_ALL_DOCS, 18). +-define(DB_CHANGES, 19). +-define(DB_REVS, 20). +-define(DB_DOCS, 21). +-define(DB_LOCAL_DOCS, 22). +-define(DB_ATTS, 23). + + +% Versions + +-define(CURR_REV_FORMAT, 0). + + +% Misc constants + +-define(PDICT_DB_KEY, '$fabric_db_handle'). +-define(PDICT_LAYER_CACHE, '$fabric_layer_id'). +-define(PDICT_CHECKED_DB_IS_CURRENT, '$fabric_checked_db_is_current'). +-define(PDICT_TX_ID_KEY, '$fabric_tx_id'). +-define(PDICT_TX_RES_KEY, '$fabric_tx_result'). +-define(COMMIT_UNKNOWN_RESULT, 1021). + + +-define(ATTACHMENT_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2.hrl b/src/fabric/src/fabric2.hrl deleted file mode 100644 index de1d3d177..000000000 --- a/src/fabric/src/fabric2.hrl +++ /dev/null @@ -1,66 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - - --define(uint2bin(I), binary:encode_unsigned(I, little)). --define(bin2uint(I), binary:decode_unsigned(I, little)). - -% This will eventually be the `\xFFmetadataVersion` key that is -% currently only available in FoundationDB master. -% -% https://forums.foundationdb.org/t/a-new-tool-for-managing-layer-metadata/1191 -% -% Until then we'll fake the same behavior using a randomish -% key for tracking metadata changse. Once we get to the -% new feature this will be more performant by updating -% this define. --define(METADATA_VERSION_KEY, <<"$metadata_version_key$">>). - - -% Prefix Definitions - -% Layer Level: (LayerPrefix, X, ...) - --define(CLUSTER_CONFIG, 0). --define(ALL_DBS, 1). --define(DBS, 15). --define(TX_IDS, 255). - -% Database Level: (LayerPrefix, ?DBS, DbPrefix, X, ...) - --define(DB_VERSION, 0). --define(DB_CONFIG, 16). --define(DB_STATS, 17). --define(DB_ALL_DOCS, 18). --define(DB_CHANGES, 19). --define(DB_REVS, 20). --define(DB_DOCS, 21). --define(DB_LOCAL_DOCS, 22). --define(DB_ATTS, 23). - - -% Versions - --define(CURR_REV_FORMAT, 0). - - -% Misc constants - --define(PDICT_DB_KEY, '$fabric_db_handle'). --define(PDICT_LAYER_CACHE, '$fabric_layer_id'). --define(PDICT_CHECKED_DB_IS_CURRENT, '$fabric_checked_db_is_current'). --define(PDICT_TX_ID_KEY, '$fabric_tx_id'). --define(PDICT_TX_RES_KEY, '$fabric_tx_result'). --define(COMMIT_UNKNOWN_RESULT, 1021). - - --define(ATTACHMENT_CHUNK_SIZE, 100000). -- cgit v1.2.1 From 0db221ed4ab56d58e170413c9012187b8a6fa6d6 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 16 Jul 2019 14:26:11 -0500 Subject: Expose the is_replicator_db and is_user_db logic This exposes a single place where we can check for whether a given database or database name is a replicator or users database. --- src/fabric/src/fabric2_db.erl | 37 +++++++++++++++++++++++++++---------- src/fabric/src/fabric2_util.erl | 6 ++++-- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 3c3b7d3a5..c926da9e0 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -55,6 +55,8 @@ is_partitioned/1, is_system_db/1, is_system_db_name/1, + is_replicator_db/1, + is_users_db/1, set_revs_limit/2, %% set_purge_infos_limit/2, @@ -379,6 +381,29 @@ is_system_db_name(DbName) when is_binary(DbName) -> end. +is_replicator_db(#{name := DbName}) -> + is_replicator_db(DbName); + +is_replicator_db(DbName) when is_binary(DbName) -> + fabric2_util:dbname_ends_with(DbName, <<"_replicator">>). + + +is_users_db(#{name := DbName}) -> + is_users_db(DbName); + +is_users_db(DbName) when is_binary(DbName) -> + AuthenticationDb = config:get("chttpd_auth", "authentication_db"), + CfgUsersSuffix = config:get("couchdb", "users_db_suffix", "_users"), + + IsAuthCache = if AuthenticationDb == undefined -> false; true -> + DbName == ?l2b(AuthenticationDb) + end, + IsCfgUsersDb = fabric2_util:dbname_ends_with(DbName, ?l2b(CfgUsersSuffix)), + IsGlobalUsersDb = fabric2_util:dbname_ends_with(DbName, <<"_users">>), + + IsAuthCache orelse IsCfgUsersDb orelse IsGlobalUsersDb. + + set_revs_limit(#{} = Db, RevsLimit) -> check_is_admin(Db), RevsLimBin = ?uint2bin(RevsLimit), @@ -734,16 +759,8 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> maybe_add_sys_db_callbacks(Db) -> - IsReplicatorDb = fabric2_util:dbname_ends_with(Db, <<"_replicator">>), - - AuthenticationDb = config:get("chttpd_auth", "authentication_db"), - IsAuthCache = if AuthenticationDb == undefined -> false; true -> - name(Db) == ?l2b(AuthenticationDb) - end, - CfgUsersSuffix = config:get("couchdb", "users_db_suffix", "_users"), - IsCfgUsersDb = fabric2_util:dbname_ends_with(Db, ?l2b(CfgUsersSuffix)), - IsGlobalUsersDb = fabric2_util:dbname_ends_with(Db, <<"_users">>), - IsUsersDb = IsAuthCache orelse IsCfgUsersDb orelse IsGlobalUsersDb, + IsReplicatorDb = is_replicator_db(Db), + IsUsersDb = is_users_db(Db), {BDU, ADR} = if IsReplicatorDb -> diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 48bf7d143..2b8e49ebf 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -124,8 +124,10 @@ validate_json_list_of_strings(Member, Props) -> end. -dbname_ends_with(#{} = Db, Suffix) when is_binary(Suffix) -> - DbName = fabric2_db:name(Db), +dbname_ends_with(#{} = Db, Suffix) -> + dbname_ends_with(fabric2_db:name(Db), Suffix); + +dbname_ends_with(DbName, Suffix) when is_binary(DbName), is_binary(Suffix) -> Suffix == filename:basename(DbName). -- cgit v1.2.1 From 2c0154a10a9c42b9c6baa1b27ac7bb4781585538 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Jul 2019 12:37:05 -0500 Subject: Fix default key ranges for fold_range If a start or end key is not specified we still need to scope the range read to the given `RangePrefix`. --- src/fabric/src/fabric2_fdb.erl | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 670ce8b49..71cb68f21 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -985,18 +985,19 @@ get_fold_opts(RangePrefix, Options) -> % Set the maximum bounds for the start and endkey StartKey2 = case StartKey1 of - undefined -> <<>>; - SK2 -> SK2 + undefined -> + <>; + SK2 -> + erlfdb_tuple:pack({SK2}, RangePrefix) end, EndKey2 = case EndKey1 of - undefined -> <<255>>; - EK2 -> EK2 + undefined -> + <>; + EK2 -> + erlfdb_tuple:pack({EK2}, RangePrefix) end, - StartKey3 = erlfdb_tuple:pack({StartKey2}, RangePrefix), - EndKey3 = erlfdb_tuple:pack({EndKey2}, RangePrefix), - % FoundationDB ranges are applied as SK <= key < EK % By default, CouchDB is SK <= key <= EK with the % optional inclusive_end=false option changing that @@ -1006,20 +1007,20 @@ get_fold_opts(RangePrefix, Options) -> % Thus we have this wonderful bit of logic to account % for all of those combinations. - StartKey4 = case {Reverse, InclusiveEnd} of + StartKey3 = case {Reverse, InclusiveEnd} of {true, false} -> - erlfdb_key:first_greater_than(StartKey3); + erlfdb_key:first_greater_than(StartKey2); _ -> - StartKey3 + StartKey2 end, - EndKey4 = case {Reverse, InclusiveEnd} of + EndKey3 = case {Reverse, InclusiveEnd} of {false, true} when EndKey0 /= undefined -> - erlfdb_key:first_greater_than(EndKey3); + erlfdb_key:first_greater_than(EndKey2); {true, _} -> - erlfdb_key:first_greater_than(EndKey3); + erlfdb_key:first_greater_than(EndKey2); _ -> - EndKey3 + EndKey2 end, Skip = case fabric2_util:get_value(skip, Options) of @@ -1053,7 +1054,7 @@ get_fold_opts(RangePrefix, Options) -> ++ StreamingMode ++ Snapshot, - {StartKey4, EndKey4, Skip, OutOpts}. + {StartKey3, EndKey3, Skip, OutOpts}. fold_range_cb(KV, {skip, 0, Callback, Acc}) -> -- cgit v1.2.1 From 769e191ab013cb7c7158349543e3b755379de849 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 17 Jun 2019 15:45:10 +0200 Subject: CouchDB map indexes on FDB This adds couch_views which builds map indexes and stores them in FDB. Co-authored-by: Paul J. Davis --- rebar.config.script | 1 + rel/overlay/etc/default.ini | 4 + rel/reltool.config | 2 + src/chttpd/src/chttpd_db.erl | 3 +- src/chttpd/src/chttpd_view.erl | 5 +- src/couch_mrview/src/couch_mrview_util.erl | 2 +- src/couch_views/.gitignore | 19 + src/couch_views/README.md | 15 + src/couch_views/include/couch_views.hrl | 26 ++ src/couch_views/rebar.config | 14 + src/couch_views/src/couch_views.app.src | 31 ++ src/couch_views/src/couch_views.erl | 140 ++++++ src/couch_views/src/couch_views_app.erl | 31 ++ src/couch_views/src/couch_views_encoding.erl | 105 +++++ src/couch_views/src/couch_views_fdb.erl | 438 +++++++++++++++++ src/couch_views/src/couch_views_indexer.erl | 261 +++++++++++ src/couch_views/src/couch_views_jobs.erl | 109 +++++ src/couch_views/src/couch_views_reader.erl | 208 +++++++++ src/couch_views/src/couch_views_server.erl | 103 ++++ src/couch_views/src/couch_views_sup.erl | 46 ++ src/couch_views/src/couch_views_util.erl | 78 ++++ src/couch_views/test/couch_views_encoding_test.erl | 94 ++++ src/couch_views/test/couch_views_indexer_test.erl | 456 ++++++++++++++++++ src/couch_views/test/couch_views_map_test.erl | 517 +++++++++++++++++++++ src/fabric/include/fabric2.hrl | 1 + test/elixir/test/basics_test.exs | 24 +- test/elixir/test/map_test.exs | 450 ++++++++++++++++++ test/elixir/test/view_collation_test.exs | 28 +- 28 files changed, 3183 insertions(+), 28 deletions(-) create mode 100644 src/couch_views/.gitignore create mode 100644 src/couch_views/README.md create mode 100644 src/couch_views/include/couch_views.hrl create mode 100644 src/couch_views/rebar.config create mode 100644 src/couch_views/src/couch_views.app.src create mode 100644 src/couch_views/src/couch_views.erl create mode 100644 src/couch_views/src/couch_views_app.erl create mode 100644 src/couch_views/src/couch_views_encoding.erl create mode 100644 src/couch_views/src/couch_views_fdb.erl create mode 100644 src/couch_views/src/couch_views_indexer.erl create mode 100644 src/couch_views/src/couch_views_jobs.erl create mode 100644 src/couch_views/src/couch_views_reader.erl create mode 100644 src/couch_views/src/couch_views_server.erl create mode 100644 src/couch_views/src/couch_views_sup.erl create mode 100644 src/couch_views/src/couch_views_util.erl create mode 100644 src/couch_views/test/couch_views_encoding_test.erl create mode 100644 src/couch_views/test/couch_views_indexer_test.erl create mode 100644 src/couch_views/test/couch_views_map_test.erl create mode 100644 test/elixir/test/map_test.exs diff --git a/rebar.config.script b/rebar.config.script index f87be50a1..16ec16c8b 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -127,6 +127,7 @@ SubDirs = [ "src/couch_stats", "src/couch_peruser", "src/couch_tests", + "src/couch_views", "src/ddoc_cache", "src/dreyfus", "src/fabric", diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index c115185a2..856578e18 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -257,6 +257,10 @@ iterations = 10 ; iterations for password hashing ; Set the SameSite cookie property for the auth cookie. If empty, the SameSite property is not set. ; same_site = +; Settings for view indexing +[couch_views] +; max_workers = 100 + ; CSP (Content Security Policy) Support for _utils [csp] enable = true diff --git a/rel/reltool.config b/rel/reltool.config index 8f153c8bc..a96be105e 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -42,6 +42,7 @@ couch_stats, couch_event, couch_peruser, + couch_views, ddoc_cache, dreyfus, ets_lru, @@ -102,6 +103,7 @@ {app, couch_stats, [{incl_cond, include}]}, {app, couch_event, [{incl_cond, include}]}, {app, couch_peruser, [{incl_cond, include}]}, + {app, couch_views, [{incl_cond, include}]}, {app, ddoc_cache, [{incl_cond, include}]}, {app, dreyfus, [{incl_cond, include}]}, {app, ets_lru, [{incl_cond, include}]}, diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 6487e10af..8e2b9d189 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -352,7 +352,8 @@ handle_design_req(#httpd{ path_parts=[_DbName, _Design, Name, <<"_",_/binary>> = Action | _Rest] }=Req, Db) -> DbName = fabric2_db:name(Db), - case ddoc_cache:open(DbName, <<"_design/", Name/binary>>) of +%% case ddoc_cache:open(DbName, <<"_design/", Name/binary>>) of + case fabric2_db:open_doc(Db, <<"_design/", Name/binary>>) of {ok, DDoc} -> Handler = chttpd_handlers:design_handler(Action, fun bad_action_req/3), Handler(Req, Db, DDoc); diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index f73a8b7b1..49ca1a793 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -49,10 +49,9 @@ design_doc_view(Req, Db, DDoc, ViewName, Keys) -> fabric_query_view(Db, Req, DDoc, ViewName, Args) -> Max = chttpd:chunked_response_buffer_size(), + Fun = fun view_cb/2, VAcc = #vacc{db=Db, req=Req, threshold=Max}, - Options = [{user_ctx, Req#httpd.user_ctx}], - {ok, Resp} = fabric:query_view(Db, Options, DDoc, ViewName, - fun view_cb/2, VAcc, Args), + {ok, Resp} = couch_views:query(Db, DDoc, ViewName, Fun, VAcc, Args), {ok, Resp#vacc.resp}. diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index e971720c9..b2b2354c9 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -397,7 +397,7 @@ fold_reduce({NthRed, Lang, View}, Fun, Acc, Options) -> validate_args(Db, DDoc, Args0) -> - {ok, State} = couch_mrview_index:init(Db, DDoc), + {ok, State} = couch_mrview_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), Args1 = apply_limit(State#mrst.partitioned, Args0), validate_args(State, Args1). diff --git a/src/couch_views/.gitignore b/src/couch_views/.gitignore new file mode 100644 index 000000000..f1c455451 --- /dev/null +++ b/src/couch_views/.gitignore @@ -0,0 +1,19 @@ +.rebar3 +_* +.eunit +*.o +*.beam +*.plt +*.swp +*.swo +.erlang.cookie +ebin +log +erl_crash.dump +.rebar +logs +_build +.idea +*.iml +rebar3.crashdump +*~ diff --git a/src/couch_views/README.md b/src/couch_views/README.md new file mode 100644 index 000000000..49cd82b98 --- /dev/null +++ b/src/couch_views/README.md @@ -0,0 +1,15 @@ +CouchDB Views +===== + +This is the new application that builds and runs Map/reduce views against FoundationDB. +Currently only map indexes are supported and it will always return the full index. + +Code layout: + +* `couch_views` - Main entry point to query a view +* `couch_views_reader` - Reads from the index for queries +* `couch_views_indexer` - `couch_jobs` worker that builds an index from the changes feed. +* `couch_vews_jobs` - `couch_views` interactions with `couch_jobs`. It handles adding index jobs and subscribes to jobs. +* `couch_views_fdb` - Maps view operations to FoundationDB logic. +* `couch_views_encoding` - Encodes view keys that are byte comparable following CouchDB view sort order. +* `couch_views_server` - Spawns `couch_views_indexer` workers to handle index update jobs. diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl new file mode 100644 index 000000000..2e443ebc3 --- /dev/null +++ b/src/couch_views/include/couch_views.hrl @@ -0,0 +1,26 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% indexing +-define(VIEW_UPDATE_SEQ, 0). +-define(VIEW_ID_INFO, 1). +-define(VIEW_ID_RANGE, 2). +-define(VIEW_MAP_RANGE, 3). + +-define(VIEW_ROW_COUNT, 0). +-define(VIEW_KV_SIZE, 1). + +-define(VIEW_ROW_KEY, 0). +-define(VIEW_ROW_VALUE, 1). + +% jobs api +-define(INDEX_JOB_TYPE, <<"views">>). diff --git a/src/couch_views/rebar.config b/src/couch_views/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_views/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_views/src/couch_views.app.src b/src/couch_views/src/couch_views.app.src new file mode 100644 index 000000000..c80c30b02 --- /dev/null +++ b/src/couch_views/src/couch_views.app.src @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_views, [ + {description, "CouchDB Views on FDB"}, + {vsn, git}, + {mod, {couch_views_app, []}}, + {registered, [ + couch_views_sup, + couch_views_server + ]}, + {applications, [ + kernel, + stdlib, + erlfdb, + couch_log, + config, + couch_stats, + fabric, + couch_jobs + ]} +]}. diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl new file mode 100644 index 000000000..7c7588c67 --- /dev/null +++ b/src/couch_views/src/couch_views.erl @@ -0,0 +1,140 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views). + +-export([ + query/6 +]). + + +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> + case fabric2_db:is_users_db(Db) of + true -> + fabric2_users_db:after_doc_read(DDoc, Db); + false -> + ok + end, + + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + + #mrst{ + views = Views + } = Mrst, + + Args1 = to_mrargs(Args0), + Args2 = couch_mrview_util:set_view_type(Args1, ViewName, Views), + Args3 = couch_mrview_util:validate_args(Args2), + ok = check_range(Args3), + case is_reduce_view(Args3) of + true -> throw({not_implemented}); + false -> ok + end, + + ok = maybe_update_view(Db, Mrst, Args3), + + try + couch_views_reader:read(Db, Mrst, ViewName, Callback, Acc0, Args3) + after + UpdateAfter = Args3#mrargs.update == lazy, + if UpdateAfter == false -> ok; true -> + couch_views_jobs:build_view_async(Db, Mrst) + end + end. + + +maybe_update_view(_Db, _Mrst, #mrargs{update = false}) -> + ok; + +maybe_update_view(_Db, _Mrst, #mrargs{update = lazy}) -> + ok; + +maybe_update_view(Db, Mrst, _Args) -> + WaitSeq = fabric2_fdb:transactional(Db, fun(TxDb) -> + DbSeq = fabric2_db:get_update_seq(TxDb), + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + case DbSeq == ViewSeq of + true -> ready; + false -> DbSeq + end + end), + + if WaitSeq == ready -> ok; true -> + couch_views_jobs:build_view(Db, Mrst, WaitSeq) + end. + + +is_reduce_view(#mrargs{view_type = ViewType}) -> + ViewType =:= red; +is_reduce_view({Reduce, _, _}) -> + Reduce =:= red. + + +to_mrargs(#mrargs{} = Args) -> + Args; + +to_mrargs(#{} = Args) -> + Fields = record_info(fields, mrargs), + Indexes = lists:seq(2, record_info(size, mrargs)), + LU = lists:zip(Fields, Indexes), + + maps:fold(fun(Key, Value, Acc) -> + Index = fabric2_util:get_value(couch_util:to_existing_atom(Key), LU), + setelement(Index, Acc, Value) + end, #mrargs{}, Args). + + +check_range(#mrargs{start_key = undefined}) -> + ok; + +check_range(#mrargs{end_key = undefined}) -> + ok; + +check_range(#mrargs{start_key = K, end_key = K}) -> + ok; + +check_range(Args) -> + #mrargs{ + direction = Dir, + start_key = SK, + start_key_docid = SKD, + end_key = EK, + end_key_docid = EKD + } = Args, + + case {Dir, view_cmp(SK, SKD, EK, EKD)} of + {fwd, false} -> + throw(check_range_error(<<"true">>)); + {rev, true} -> + throw(check_range_error(<<"false">>)); + _ -> + ok + end. + + +check_range_error(Descending) -> + {query_parse_error, + <<"No rows can match your key range, reverse your ", + "start_key and end_key or set descending=", + Descending/binary>>}. + + +view_cmp(SK, SKD, EK, EKD) -> + BinSK = couch_views_encoding:encode(SK, key), + BinEK = couch_views_encoding:encode(EK, key), + PackedSK = erlfdb_tuple:pack({BinSK, SKD}), + PackedEK = erlfdb_tuple:pack({BinEK, EKD}), + PackedSK =< PackedEK. diff --git a/src/couch_views/src/couch_views_app.erl b/src/couch_views/src/couch_views_app.erl new file mode 100644 index 000000000..5ede5ef85 --- /dev/null +++ b/src/couch_views/src/couch_views_app.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_app). + + +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_StartType, StartArgs) -> + couch_views_sup:start_link(StartArgs). + + +stop(_State) -> + ok. diff --git a/src/couch_views/src/couch_views_encoding.erl b/src/couch_views/src/couch_views_encoding.erl new file mode 100644 index 000000000..ef5fed9a2 --- /dev/null +++ b/src/couch_views/src/couch_views_encoding.erl @@ -0,0 +1,105 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_encoding). + + +-export([ + encode/1, + encode/2, + decode/1 +]). + + +-define(NULL, 0). +-define(FALSE, 1). +-define(TRUE, 2). +-define(NUMBER, 3). +-define(STRING, 4). +-define(LIST, 5). +-define(OBJECT, 6). + + +encode(X) -> + encode(X, value). + + +encode(X, Type) when Type == key; Type == value -> + erlfdb_tuple:pack(encode_int(X, Type)). + + +decode(Encoded) -> + Val = erlfdb_tuple:unpack(Encoded), + decode_int(Val). + + +encode_int(null, _Type) -> + {?NULL}; + +encode_int(false, _Type) -> + {?FALSE}; + +encode_int(true, _Type) -> + {?TRUE}; + +encode_int(Num, key) when is_number(Num) -> + {?NUMBER, float(Num)}; + +encode_int(Num, value) when is_number(Num) -> + {?NUMBER, Num}; + +encode_int(Bin, key) when is_binary(Bin) -> + {?STRING, couch_util:get_sort_key(Bin)}; + +encode_int(Bin, value) when is_binary(Bin) -> + {?STRING, Bin}; + +encode_int(List, Type) when is_list(List) -> + Encoded = lists:map(fun(Item) -> + encode_int(Item, Type) + end, List), + {?LIST, list_to_tuple(Encoded)}; + +encode_int({Props}, Type) when is_list(Props) -> + Encoded = lists:map(fun({K, V}) -> + EK = encode_int(K, Type), + EV = encode_int(V, Type), + {EK, EV} + end, Props), + {?OBJECT, list_to_tuple(Encoded)}. + + +decode_int({?NULL}) -> + null; + +decode_int({?FALSE}) -> + false; + +decode_int({?TRUE}) -> + true; + +decode_int({?STRING, Bin}) -> + Bin; + +decode_int({?NUMBER, Num}) -> + Num; + +decode_int({?LIST, List}) -> + lists:map(fun decode_int/1, tuple_to_list(List)); + +decode_int({?OBJECT, Object}) -> + Props = lists:map(fun({EK, EV}) -> + K = decode_int(EK), + V = decode_int(EV), + {K, V} + end, tuple_to_list(Object)), + {Props}. diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl new file mode 100644 index 000000000..60ce30019 --- /dev/null +++ b/src/couch_views/src/couch_views_fdb.erl @@ -0,0 +1,438 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_fdb). + +-export([ + get_update_seq/2, + set_update_seq/3, + + get_row_count/3, + get_kv_size/3, + + fold_map_idx/6, + + write_doc/4 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + +-define(LIST_VALUE, 0). +-define(JSON_VALUE, 1). +-define(VALUE, 2). + + +-include("couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +% View Build Sequence Access +% (, ?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ) = Sequence + + +get_update_seq(TxDb, #mrst{sig = Sig}) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + case erlfdb:wait(erlfdb:get(Tx, seq_key(DbPrefix, Sig))) of + not_found -> <<>>; + UpdateSeq -> UpdateSeq + end. + + +set_update_seq(TxDb, Sig, Seq) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq). + + +get_row_count(TxDb, #mrst{sig = Sig}, ViewId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + case erlfdb:wait(erlfdb:get(Tx, row_count_key(DbPrefix, Sig, ViewId))) of + not_found -> 0; % Can this happen? + CountBin -> ?bin2uint(CountBin) + end. + + +get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of + not_found -> 0; % Can this happen? + SizeBin -> ?bin2uint(SizeBin) + end. + + +fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> + #{ + db_prefix := DbPrefix + } = TxDb, + + MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), + FoldAcc = #{ + prefix => MapIdxPrefix, + sort_key => undefined, + docid => undefined, + dupe_id => undefined, + callback => Callback, + acc => Acc0 + }, + + {Fun, Acc} = case fabric2_util:get_value(dir, Options, fwd) of + fwd -> + FwdAcc = FoldAcc#{ + next => key, + key => undefined + }, + {fun fold_fwd/2, FwdAcc}; + rev -> + RevAcc = FoldAcc#{ + next => value, + value => undefined + }, + {fun fold_rev/2, RevAcc} + end, + + #{ + acc := Acc1 + } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, Acc, Options), + + Acc1. + + +write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> + #{ + id := DocId + } = Doc, + + ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), + + clear_id_idx(TxDb, Sig, DocId), + lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> + clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize) + end, ExistingViewKeys); + +write_doc(TxDb, Sig, ViewIds, Doc) -> + #{ + id := DocId, + results := Results + } = Doc, + + ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), + + clear_id_idx(TxDb, Sig, DocId), + + lists:foreach(fun({ViewId, NewRows}) -> + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), + + ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + SizeChange = calculate_row_size(NewRows) - TotalSize, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), + EKeys; + false -> + RowChange = length(NewRows), + SizeChange = calculate_row_size(NewRows), + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), + [] + end, + update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) + end, lists:zip(ViewIds, Results)). + + +% For each row in a map view there are two rows stored in +% FoundationDB: +% +% `(EncodedSortKey, EncodedKey)` +% `(EncodedSortKey, EncodedValue)` +% +% The difference between `EncodedSortKey` and `EndcodedKey` is +% the use of `couch_util:get_sort_key/1` which turns UTF-8 +% strings into binaries that are byte comparable. Given a sort +% key binary we cannot recover the input so to return unmodified +% user data we are forced to store the original. +% +% These two fold functions exist so that we can be fairly +% forceful on our assertions about which rows to see. Since +% when we're folding forward we'll see the key first. When +% `descending=true` and we're folding in reverse we'll see +% the value first. + +fold_fwd({RowKey, EncodedOriginalKey}, #{next := key} = Acc) -> + #{ + prefix := Prefix + } = Acc, + + {{SortKey, DocId}, DupeId, ?VIEW_ROW_KEY} = + erlfdb_tuple:unpack(RowKey, Prefix), + Acc#{ + next := value, + key := couch_views_encoding:decode(EncodedOriginalKey), + sort_key := SortKey, + docid := DocId, + dupe_id := DupeId + }; + +fold_fwd({RowKey, EncodedValue}, #{next := value} = Acc) -> + #{ + prefix := Prefix, + key := Key, + sort_key := SortKey, + docid := DocId, + dupe_id := DupeId, + callback := UserCallback, + acc := UserAcc0 + } = Acc, + + % We're asserting there that this row is paired + % correctly with the previous row by relying on + % a badmatch if any of these values don't match. + {{SortKey, DocId}, DupeId, ?VIEW_ROW_VALUE} = + erlfdb_tuple:unpack(RowKey, Prefix), + + Value = couch_views_encoding:decode(EncodedValue), + UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0), + + Acc#{ + next := key, + key := undefined, + sort_key := undefined, + docid := undefined, + dupe_id := undefined, + acc := UserAcc1 + }. + + +fold_rev({RowKey, EncodedValue}, #{next := value} = Acc) -> + #{ + prefix := Prefix + } = Acc, + + {{SortKey, DocId}, DupeId, ?VIEW_ROW_VALUE} = + erlfdb_tuple:unpack(RowKey, Prefix), + Acc#{ + next := key, + value := couch_views_encoding:decode(EncodedValue), + sort_key := SortKey, + docid := DocId, + dupe_id := DupeId + }; + +fold_rev({RowKey, EncodedOriginalKey}, #{next := key} = Acc) -> + #{ + prefix := Prefix, + value := Value, + sort_key := SortKey, + docid := DocId, + dupe_id := DupeId, + callback := UserCallback, + acc := UserAcc0 + } = Acc, + + % We're asserting there that this row is paired + % correctly with the previous row by relying on + % a badmatch if any of these values don't match. + {{SortKey, DocId}, DupeId, ?VIEW_ROW_KEY} = + erlfdb_tuple:unpack(RowKey, Prefix), + + Key = couch_views_encoding:decode(EncodedOriginalKey), + UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0), + + Acc#{ + next := value, + value := undefined, + sort_key := undefined, + docid := undefined, + dupe_id := undefined, + acc := UserAcc1 + }. + + +clear_id_idx(TxDb, Sig, DocId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + {Start, End} = id_idx_range(DbPrefix, Sig, DocId), + ok = erlfdb:clear_range(Tx, Start, End). + + +clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + lists:foreach(fun(ViewKey) -> + {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId), + ok = erlfdb:clear_range(Tx, Start, End) + end, ViewKeys). + + +update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + Unique = lists:usort([K || {K, _V} <- NewRows]), + + Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), + RowSize = calculate_row_size(NewRows), + Val = couch_views_encoding:encode([length(NewRows), RowSize, Unique]), + ok = erlfdb:set(Tx, Key, Val). + + +update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + Unique = lists:usort([K || {K, _V} <- NewRows]), + + KeysToRem = ExistingKeys -- Unique, + lists:foreach(fun(RemKey) -> + {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId), + ok = erlfdb:clear_range(Tx, Start, End) + end, KeysToRem), + + KVsToAdd = process_rows(NewRows), + MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), + + lists:foreach(fun({DupeId, Key1, Key2, Val}) -> + KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId, ?VIEW_ROW_KEY), + VK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId, ?VIEW_ROW_VALUE), + ok = erlfdb:set(Tx, KK, Key2), + ok = erlfdb:set(Tx, VK, Val) + end, KVsToAdd). + + +get_view_keys(TxDb, Sig, DocId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + {Start, End} = id_idx_range(DbPrefix, Sig, DocId), + lists:map(fun({K, V}) -> + {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = + erlfdb_tuple:unpack(K, DbPrefix), + [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), + {ViewId, TotalKeys, TotalSize, UniqueKeys} + end, erlfdb:get_range(Tx, Start, End, [])). + + +update_row_count(TxDb, Sig, ViewId, Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + Key = row_count_key(DbPrefix, Sig, ViewId), + erlfdb:add(Tx, Key, Increment). + + +update_kv_size(TxDb, Sig, ViewId, Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + Key = kv_size_key(DbPrefix, Sig, ViewId), + erlfdb:add(Tx, Key, Increment). + + +seq_key(DbPrefix, Sig) -> + Key = {?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ}, + erlfdb_tuple:pack(Key, DbPrefix). + + +row_count_key(DbPrefix, Sig, ViewId) -> + Key = {?DB_VIEWS, Sig, ?VIEW_ID_INFO, ViewId, ?VIEW_ROW_COUNT}, + erlfdb_tuple:pack(Key, DbPrefix). + + +kv_size_key(DbPrefix, Sig, ViewId) -> + Key = {?DB_VIEWS, Sig, ?VIEW_ID_INFO, ViewId, ?VIEW_KV_SIZE}, + erlfdb_tuple:pack(Key, DbPrefix). + + +id_idx_key(DbPrefix, Sig, DocId, ViewId) -> + Key = {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId}, + erlfdb_tuple:pack(Key, DbPrefix). + + +id_idx_range(DbPrefix, Sig, DocId) -> + Key = {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId}, + erlfdb_tuple:range(Key, DbPrefix). + + +map_idx_prefix(DbPrefix, Sig, ViewId) -> + Key = {?DB_VIEWS, Sig, ?VIEW_MAP_RANGE, ViewId}, + erlfdb_tuple:pack(Key, DbPrefix). + + +map_idx_key(MapIdxPrefix, MapKey, DupeId, Type) -> + Key = {MapKey, DupeId, Type}, + erlfdb_tuple:pack(Key, MapIdxPrefix). + + +map_idx_range(DbPrefix, Sig, ViewId, MapKey, DocId) -> + Encoded = couch_views_encoding:encode(MapKey, key), + Key = {?DB_VIEWS, Sig, ?VIEW_MAP_RANGE, ViewId, {Encoded, DocId}}, + erlfdb_tuple:range(Key, DbPrefix). + + +process_rows(Rows) -> + Encoded = lists:map(fun({K, V}) -> + EK1 = couch_views_encoding:encode(K, key), + EK2 = couch_views_encoding:encode(K, value), + EV = couch_views_encoding:encode(V, value), + {EK1, EK2, EV} + end, Rows), + + Grouped = lists:foldl(fun({K1, K2, V}, Acc) -> + dict:append(K1, {K2, V}, Acc) + end, dict:new(), Encoded), + + dict:fold(fun(K1, Vals, DAcc) -> + Vals1 = lists:keysort(2, Vals), + {_, Labeled} = lists:foldl(fun({K2, V}, {Count, Acc}) -> + {Count + 1, [{Count, K1, K2, V} | Acc]} + end, {0, []}, Vals1), + Labeled ++ DAcc + end, [], Grouped). + + +calculate_row_size(Rows) -> + lists:foldl(fun({K, V}, Acc) -> + Acc + erlang:external_size(K) + erlang:external_size(V) + end, 0, Rows). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl new file mode 100644 index 000000000..a3179369c --- /dev/null +++ b/src/couch_views/src/couch_views_indexer.erl @@ -0,0 +1,261 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_indexer). + +-export([ + spawn_link/0 +]). + + +-export([ + init/0 +]). + +-include("couch_views.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + +% TODO: +% * Handle timeouts of transaction and other errors + + +spawn_link() -> + proc_lib:spawn_link(?MODULE, init, []). + + +init() -> + {ok, Job, Data} = couch_jobs:accept(?INDEX_JOB_TYPE, #{}), + #{ + <<"db_name">> := DbName, + <<"ddoc_id">> := DDocId, + <<"sig">> := JobSig + } = Data, + + {ok, Db} = fabric2_db:open(DbName, []), + {ok, DDoc} = fabric2_db:open_doc(Db, DDocId), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + HexSig = fabric2_util:to_hex(Mrst#mrst.sig), + + if HexSig == JobSig -> ok; true -> + couch_jobs:finish(undefined, Job, Data#{ + error => sig_changed, + reason => <<"Design document was modified">> + }), + exit(normal) + end, + + State = #{ + tx_db => undefined, + db_seq => undefined, + view_seq => undefined, + last_seq => undefined, + job => Job, + job_data => Data, + count => 0, + limit => num_changes(), + doc_acc => [], + design_opts => Mrst#mrst.design_opts + }, + + update(Db, Mrst, State). + + +update(#{} = Db, Mrst0, State0) -> + {Mrst2, State3} = fabric2_fdb:transactional(Db, fun(TxDb) -> + % In the first iteration of update we need + % to populate our db and view sequences + State1 = case State0 of + #{db_seq := undefined} -> + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst0), + State0#{ + tx_db := TxDb, + db_seq := fabric2_db:get_update_seq(TxDb), + view_seq := ViewSeq, + last_seq := ViewSeq + }; + _ -> + State0#{ + tx_db := TxDb + } + end, + + {ok, State2} = fold_changes(State1), + + #{ + count := Count, + limit := Limit, + doc_acc := DocAcc, + last_seq := LastSeq + } = State2, + + {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc), + write_docs(TxDb, Mrst1, MappedDocs, State2), + + case Count < Limit of + true -> + report_progress(State2, finished), + {Mrst1, finished}; + false -> + report_progress(State2, update), + {Mrst1, State2#{ + tx_db := undefined, + count := 0, + doc_acc := [], + view_seq := LastSeq + }} + end + end), + + case State3 of + finished -> + couch_query_servers:stop_doc_map(Mrst2#mrst.qserver); + _ -> + update(Db, Mrst2, State3) + end. + + +fold_changes(State) -> + #{ + view_seq := SinceSeq, + limit := Limit, + tx_db := TxDb + } = State, + + Fun = fun process_changes/2, + fabric2_db:fold_changes(TxDb, SinceSeq, Fun, State, [{limit, Limit}]). + + +process_changes(Change, Acc) -> + #{ + doc_acc := DocAcc, + count := Count, + tx_db := TxDb, + design_opts := DesignOpts + } = Acc, + + #{ + id := Id, + sequence := LastSeq, + deleted := Deleted + } = Change, + + IncludeDesign = lists:keymember(<<"include_design">>, 1, DesignOpts), + + Acc1 = case {Id, IncludeDesign} of + {<>, false} -> + maps:merge(Acc, #{ + count => Count + 1, + last_seq => LastSeq + }); + _ -> + % Making a note here that we should make fetching all the docs + % a parallel fdb operation + {ok, Doc} = case Deleted of + true -> {ok, []}; + false -> fabric2_db:open_doc(TxDb, Id) + end, + + Change1 = maps:put(doc, Doc, Change), + Acc#{ + doc_acc := DocAcc ++ [Change1], + count := Count + 1, + last_seq := LastSeq + } + end, + {ok, Acc1}. + + +map_docs(Mrst, Docs) -> + % Run all the non deleted docs through the view engine and + Mrst1 = start_query_server(Mrst), + QServer = Mrst1#mrst.qserver, + MapFun = fun + (#{deleted := true} = Change) -> + Change#{results => []}; + (#{deleted := false} = Change) -> + #{doc := Doc} = Change, + couch_stats:increment_counter([couchdb, mrview, map_doc]), + {ok, RawResults} = couch_query_servers:map_doc_raw(QServer, Doc), + JsonResults = couch_query_servers:raw_to_ejson(RawResults), + ListResults = lists:map(fun(ViewResults) -> + [list_to_tuple(Res) || Res <- ViewResults] + end, JsonResults), + Change#{results => ListResults} + end, + {Mrst1, lists:map(MapFun, Docs)}. + + +write_docs(TxDb, Mrst, Docs, State) -> + #mrst{ + views = Views, + sig = Sig + } = Mrst, + + #{ + last_seq := LastSeq + } = State, + + ViewIds = [View#mrview.id_num || View <- Views], + + lists:foreach(fun(Doc) -> + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc) + end, Docs), + + couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). + + +start_query_server(#mrst{} = Mrst) -> + #mrst{ + language = Language, + lib = Lib, + views = Views + } = Mrst, + Defs = [View#mrview.def || View <- Views], + {ok, QServer} = couch_query_servers:start_doc_map(Language, Defs, Lib), + Mrst#mrst{qserver = QServer}. + + +report_progress(State, UpdateType) -> + #{ + tx_db := TxDb, + job := Job, + job_data := JobData, + last_seq := LastSeq + } = State, + + #{ + <<"db_name">> := DbName, + <<"ddoc_id">> := DDocId, + <<"sig">> := Sig + } = JobData, + + % Reconstruct from scratch to remove any + % possible existing error state. + NewData = #{ + <<"db_name">> => DbName, + <<"ddoc_id">> => DDocId, + <<"sig">> => Sig, + <<"view_seq">> => LastSeq + }, + + case UpdateType of + update -> + couch_jobs:update(TxDb, Job, NewData); + finished -> + couch_jobs:finish(TxDb, Job, NewData) + end. + + +num_changes() -> + config:get_integer("couch_views", "change_limit", 100). diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl new file mode 100644 index 000000000..16fc4103f --- /dev/null +++ b/src/couch_views/src/couch_views_jobs.erl @@ -0,0 +1,109 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_jobs). + +-export([ + set_timeout/0, + build_view/3, + build_view_async/2 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + + +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include("couch_views.hrl"). + + +set_timeout() -> + couch_jobs:set_type_timeout(?INDEX_JOB_TYPE, 6 * 1000). + + +build_view(TxDb, Mrst, UpdateSeq) -> + {ok, JobId} = build_view_async(TxDb, Mrst), + case wait_for_job(JobId, UpdateSeq) of + ok -> ok; + retry -> build_view(TxDb, Mrst, UpdateSeq) + end. + + +build_view_async(TxDb, Mrst) -> + JobId = job_id(TxDb, Mrst), + JobData = job_data(TxDb, Mrst), + ok = couch_jobs:add(undefined, ?INDEX_JOB_TYPE, JobId, JobData), + {ok, JobId}. + + +wait_for_job(JobId, UpdateSeq) -> + case couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId) of + {ok, Subscription, _State, _Data} -> + wait_for_job(JobId, Subscription, UpdateSeq); + {ok, finished, Data} -> + case Data of + #{<<"view_sig">> := ViewSeq} when ViewSeq >= UpdateSeq -> + ok; + _ -> + retry + end + end. + + +wait_for_job(JobId, Subscription, UpdateSeq) -> + case wait(Subscription) of + {error, Error} -> + erlang:error(Error); + {finished, #{<<"error">> := Error, <<"reason">> := Reason}} -> + erlang:error({binary_to_existing_atom(Error, latin1), Reason}); + {finished, #{<<"view_seq">> := ViewSeq}} when ViewSeq >= UpdateSeq -> + ok; + {finished, _} -> + wait_for_job(JobId, UpdateSeq); + {_State, #{<<"view_seq">> := ViewSeq}} when ViewSeq >= UpdateSeq -> + couch_jobs:unsubscribe(Subscription), + ok; + {_, _} -> + wait_for_job(JobId, Subscription, UpdateSeq) + end. + + +job_id(#{name := DbName}, #mrst{sig = Sig}) -> + job_id(DbName, Sig); + +job_id(DbName, Sig) -> + HexSig = fabric2_util:to_hex(Sig), + <>. + + +job_data(Db, Mrst) -> + #mrst{ + idx_name = DDocId, + sig = Sig + } = Mrst, + + #{ + db_name => fabric2_db:name(Db), + ddoc_id => DDocId, + sig => fabric2_util:to_hex(Sig) + }. + + +wait(Subscription) -> + case couch_jobs:wait(Subscription, infinity) of + {?INDEX_JOB_TYPE, _JobId, JobState, JobData} -> + {JobState, JobData}; + timeout -> + {error, timeout} + end. diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl new file mode 100644 index 000000000..c7989d89c --- /dev/null +++ b/src/couch_views/src/couch_views_reader.erl @@ -0,0 +1,208 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_reader). + +-export([ + read/6 +]). + + +-include("couch_views.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) -> + #mrst{ + language = Lang, + sig = Sig, + views = Views + } = Mrst, + + ViewId = get_view_id(Lang, Args, ViewName, Views), + Fun = fun handle_row/4, + + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId), + + Meta = {meta, [{total, TotalRows}, {offset, null}]}, + UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)), + + Acc0 = #{ + db => TxDb, + skip => Args#mrargs.skip, + mrargs => undefined, + callback => UserCallback, + acc => UserAcc1 + }, + + Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) -> + Opts = mrargs_to_fdb_options(KeyArgs), + KeyAcc1 = KeyAcc0#{ + mrargs := KeyArgs + }, + couch_views_fdb:fold_map_idx( + TxDb, + Sig, + ViewId, + Opts, + Fun, + KeyAcc1 + ) + end, Acc0, expand_keys_args(Args)), + + #{ + acc := UserAcc2 + } = Acc1, + {ok, maybe_stop(UserCallback(complete, UserAcc2))} + end) + catch throw:{done, Out} -> + {ok, Out} + end. + + +handle_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 -> + Acc#{skip := Skip - 1}; + +handle_row(DocId, Key, Value, Acc) -> + #{ + db := TxDb, + mrargs := Args, + callback := UserCallback, + acc := UserAcc0 + } = Acc, + + BaseRow = [ + {id, DocId}, + {key, Key}, + {value, Value} + ], + + Row = BaseRow ++ if not Args#mrargs.include_docs -> []; true -> + DocOpts0 = Args#mrargs.doc_options, + DocOpts1 = DocOpts0 ++ case Args#mrargs.conflicts of + true -> [conflicts]; + _ -> [] + end, + + {TargetDocId, Rev} = get_doc_id(DocId, Value), + DocObj = load_doc(TxDb, TargetDocId, Rev, DocOpts1), + [{doc, DocObj}] + end, + + UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)), + Acc#{acc := UserAcc1}. + + +get_view_id(Lang, Args, ViewName, Views) -> + case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of + {map, View, _Args} -> View#mrview.id_num; + {red, {_Idx, _Lang, View}} -> View#mrview.id_num + end. + + +expand_keys_args(#mrargs{keys = undefined} = Args) -> + [Args]; + +expand_keys_args(#mrargs{keys = Keys} = Args) -> + lists:map(fun(Key) -> + Args#mrargs{ + start_key = Key, + end_key = Key + } + end, Keys). + + +mrargs_to_fdb_options(Args) -> + #mrargs{ + start_key = StartKey0, + start_key_docid = StartKeyDocId, + end_key = EndKey0, + end_key_docid = EndKeyDocId, + direction = Direction, + limit = Limit, + skip = Skip, + inclusive_end = InclusiveEnd + } = Args, + + StartKey1 = if StartKey0 == undefined -> undefined; true -> + couch_views_encoding:encode(StartKey0, key) + end, + + StartKeyOpts = case {StartKey1, StartKeyDocId} of + {undefined, _} -> + []; + {StartKey1, StartKeyDocId} -> + [{start_key, {StartKey1, StartKeyDocId}}] + end, + + EndKey1 = if EndKey0 == undefined -> undefined; true -> + couch_views_encoding:encode(EndKey0, key) + end, + + EndKeyOpts = case {EndKey1, EndKeyDocId, Direction} of + {undefined, _, _} -> + []; + {EndKey1, <<>>, rev} when not InclusiveEnd -> + % When we iterate in reverse with + % inclusive_end=false we have to set the + % EndKeyDocId to <<255>> so that we don't + % include matching rows. + [{end_key_gt, {EndKey1, <<255>>}}]; + {EndKey1, <<255>>, _} when not InclusiveEnd -> + % When inclusive_end=false we need to + % elide the default end_key_docid so as + % to not sort past the docids with the + % given end key. + [{end_key_gt, {EndKey1}}]; + {EndKey1, EndKeyDocId, _} when not InclusiveEnd -> + [{end_key_gt, {EndKey1, EndKeyDocId}}]; + {EndKey1, EndKeyDocId, _} when InclusiveEnd -> + [{end_key, {EndKey1, EndKeyDocId}}] + end, + + [ + {dir, Direction}, + {limit, Limit * 2 + Skip * 2}, + {streaming_mode, want_all} + ] ++ StartKeyOpts ++ EndKeyOpts. + + +maybe_stop({ok, Acc}) -> Acc; +maybe_stop({stop, Acc}) -> throw({done, Acc}). + + +get_doc_id(Id, {Props}) -> + DocId = couch_util:get_value(<<"_id">>, Props, Id), + Rev = couch_util:get_value(<<"_rev">>, Props, null), + {DocId, Rev}; + +get_doc_id(Id, _Value) -> + {Id, null}. + + +load_doc(TxDb, Id, null, DocOpts) -> + case fabric2_db:open_doc(TxDb, Id, DocOpts) of + {ok, Doc} -> couch_doc:to_json_obj(Doc, DocOpts); + {not_found, _} -> null + end; + +load_doc(TxDb, Id, Rev, DocOpts) -> + Rev1 = couch_doc:parse_rev(Rev), + case (catch fabric2_db:open_doc_revs(TxDb, Id, [Rev1], DocOpts)) of + {ok, [{ok, Doc}]} -> couch_doc:to_json_obj(Doc, DocOpts); + {ok, [{{not_found, missing}, Rev}]} -> null; + {ok, [_Else]} -> null + end. diff --git a/src/couch_views/src/couch_views_server.erl b/src/couch_views/src/couch_views_server.erl new file mode 100644 index 000000000..d14216e40 --- /dev/null +++ b/src/couch_views/src/couch_views_server.erl @@ -0,0 +1,103 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_server). + + +-behaviour(gen_server). + + +-export([ + start_link/0 +]). + + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-define(MAX_WORKERS, 100). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + process_flag(trap_exit, true), + couch_views_jobs:set_timeout(), + St = #{ + workers => #{}, + max_workers => max_workers() + }, + {ok, spawn_workers(St)}. + + +terminate(_, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info({'EXIT', Pid, Reason}, St) -> + #{workers := Workers} = St, + case maps:is_key(Pid, Workers) of + true -> + if Reason == normal -> ok; true -> + LogMsg = "~p : indexer process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]) + end, + NewWorkers = maps:remove(Pid, Workers), + {noreply, spawn_workers(St#{workers := NewWorkers})}; + false -> + LogMsg = "~p : unknown process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unknown_pid_exit, Pid}, St} + end; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +spawn_workers(St) -> + #{ + workers := Workers, + max_workers := MaxWorkers + } = St, + case maps:size(Workers) < MaxWorkers of + true -> + Pid = couch_views_indexer:spawn_link(), + NewSt = St#{workers := Workers#{Pid => true}}, + spawn_workers(NewSt); + false -> + St + end. + + +max_workers() -> + config:get_integer("couch_views", "max_workers", ?MAX_WORKERS). diff --git a/src/couch_views/src/couch_views_sup.erl b/src/couch_views/src/couch_views_sup.erl new file mode 100644 index 000000000..7650fdf14 --- /dev/null +++ b/src/couch_views/src/couch_views_sup.erl @@ -0,0 +1,46 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/1 +]). + + +-export([ + init/1 +]). + + +start_link(Args) -> + supervisor:start_link({local, ?MODULE}, ?MODULE, Args). + + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 1, + period => 5 + }, + Children = [ + #{ + id => couch_views_server, + start => {couch_views_server, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl new file mode 100644 index 000000000..cfc89bdaf --- /dev/null +++ b/src/couch_views/src/couch_views_util.erl @@ -0,0 +1,78 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_util). + + +-export([ + ddoc_to_mrst/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include("couch_views.hrl"). + + +ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> + MakeDict = fun({Name, {MRFuns}}, DictBySrcAcc) -> + case couch_util:get_value(<<"map">>, MRFuns) of + MapSrc when MapSrc /= undefined -> + RedSrc = couch_util:get_value(<<"reduce">>, MRFuns, null), + {ViewOpts} = couch_util:get_value(<<"options">>, MRFuns, {[]}), + View = case dict:find({MapSrc, ViewOpts}, DictBySrcAcc) of + {ok, View0} -> View0; + error -> #mrview{def=MapSrc, options=ViewOpts} + end, + {MapNames, RedSrcs} = case RedSrc of + null -> + MNames = [Name | View#mrview.map_names], + {MNames, View#mrview.reduce_funs}; + _ -> + RedFuns = [{Name, RedSrc} | View#mrview.reduce_funs], + {View#mrview.map_names, RedFuns} + end, + View2 = View#mrview{map_names=MapNames, reduce_funs=RedSrcs}, + dict:store({MapSrc, ViewOpts}, View2, DictBySrcAcc); + undefined -> + DictBySrcAcc + end; + ({Name, Else}, DictBySrcAcc) -> + couch_log:error("design_doc_to_view_group ~s views ~p", + [Name, Else]), + DictBySrcAcc + end, + {DesignOpts} = proplists:get_value(<<"options">>, Fields, {[]}), + Partitioned = proplists:get_value(<<"partitioned">>, DesignOpts, false), + + {RawViews} = couch_util:get_value(<<"views">>, Fields, {[]}), + BySrc = lists:foldl(MakeDict, dict:new(), RawViews), + + NumViews = fun({_, View}, N) -> + {View#mrview{id_num = N}, N+1} + end, + {Views, _} = lists:mapfoldl(NumViews, 0, lists:sort(dict:to_list(BySrc))), + + Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), + Lib = couch_util:get_value(<<"lib">>, RawViews, {[]}), + + IdxState = #mrst{ + db_name=DbName, + idx_name=Id, + lib=Lib, + views=Views, + language=Language, + design_opts=DesignOpts, + partitioned=Partitioned + }, + SigInfo = {Views, Language, DesignOpts, couch_index_util:sort_lib(Lib)}, + {ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}. diff --git a/src/couch_views/test/couch_views_encoding_test.erl b/src/couch_views/test/couch_views_encoding_test.erl new file mode 100644 index 000000000..7c26583d2 --- /dev/null +++ b/src/couch_views/test/couch_views_encoding_test.erl @@ -0,0 +1,94 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_encoding_test). + +-include_lib("eunit/include/eunit.hrl"). + +val_encoding_test() -> + Values = [ + null, + true, + 1.0, + <<"a">>, + {[{<<"a">>, 1.0}, {<<"b">>, <<"hello">>}]} + ], + lists:foreach(fun (Val) -> + EncVal = couch_views_encoding:encode(Val), + ?assertEqual(Val, couch_views_encoding:decode(EncVal)) + end, Values). + + +correct_ordering_test() -> + % Load the ICU driver for couch_util:get_sort_key/1 + {ok, CfgPid} = gen_server:start_link(config, [], []), + {ok, DrvPid} = gen_server:start_link(couch_drv, [], []), + + Ordered = [ + % Special values sort before all other types + null, + false, + true, + + % Then numbers + 1, + 2, + 3.0, + 4, + + % Then text, case sensitive + <<"a">>, + <<"A">>, + <<"aa">>, + <<"b">>, + <<"B">>, + <<"ba">>, + <<"bb">>, + + % Then arrays, compared element by element until different. + % Longer arrays sort after their prefixes + [<<"a">>], + [<<"b">>], + [<<"b">>, <<"c">>], + [<<"b">>, <<"c">>, <<"a">>], + [<<"b">>, <<"d">>], + [<<"b">>, <<"d">>, <<"e">>], + + % Then objects, compared each key value in the list until different. + % Larger objects sort after their subset objects + {[{<<"a">>, 1}]}, + {[{<<"a">>, 2}]}, + {[{<<"b">>, 1}]}, + {[{<<"b">>, 2}]}, + + % Member order does matter for collation + {[{<<"b">>, 2}, {<<"a">>, 1}]}, + {[{<<"b">>, 2}, {<<"c">>, 2}]} + ], + + Encoded = lists:map(fun(Elem) -> + K = couch_views_encoding:encode(Elem, key), + V = couch_views_encoding:encode(Elem, value), + {K, V} + end, Ordered), + Shuffled = shuffle(Encoded), + Reordered = lists:sort(Shuffled), + + lists:foreach(fun({Original, {_K, ViewEncoded}}) -> + ?assertEqual(Original, couch_views_encoding:decode(ViewEncoded)) + end, lists:zip(Ordered, Reordered)). + + +shuffle(List) when is_list(List) -> + Tagged = [{rand:uniform(), Item} || Item <- List], + {_, Randomized} = lists:unzip(lists:sort(Tagged)), + Randomized. diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl new file mode 100644 index 000000000..02c8ceedb --- /dev/null +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -0,0 +1,456 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_indexer_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +-define(I_HEART_EUNIT(Tests), [{with, [T]} || T <- Tests]). + + +indexer_test_() -> + { + "Test view indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + ?I_HEART_EUNIT([ + fun indexed_empty_db/1, + fun indexed_single_doc/1, + fun updated_docs_are_reindexed/1, + fun updated_docs_without_changes_are_reindexed/1, + fun deleted_docs_not_indexed/1, + fun deleted_docs_are_unindexed/1, + fun multipe_docs_with_same_key/1, + fun multipe_keys_from_same_doc/1, + fun multipe_identical_keys_from_same_doc/1 + ]) + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +indexed_empty_db(Db) -> + DDoc = create_ddoc(), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([], Out). + + +indexed_single_doc(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out). + + +updated_docs_are_reindexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out1), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out2} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 1}, + {value, 1} + ]}], Out2), + + % Check that our id index is updated properly + % as well. + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + Sig = Mrst#mrst.sig, + fabric2_fdb:transactional(Db, fun(TxDb) -> + ?assertMatch( + [{0, 1, _, [1]}, {1, 0, 0, []}], + couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) + ) + end). + + +updated_docs_without_changes_are_reindexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out1), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 0}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out2} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out2), + + % Check fdb directly to make sure we've also + % removed the id idx keys properly. + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + Sig = Mrst#mrst.sig, + fabric2_fdb:transactional(Db, fun(TxDb) -> + ?assertMatch( + [{0, 1, _, [0]}, {1, 0, 0, []}], + couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) + ) + end). + + +deleted_docs_not_indexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + deleted = true, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([], Out). + + +deleted_docs_are_unindexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out1), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + deleted = true, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out2} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([], Out2), + + % Check fdb directly to make sure we've also + % removed the id idx keys properly. + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + Sig = Mrst#mrst.sig, + fabric2_fdb:transactional(Db, fun(TxDb) -> + ?assertEqual([], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>)) + end). + + +multipe_docs_with_same_key(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0, 1), + Doc2 = doc(1, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_docs(Db, [Doc1, Doc2], []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 1} + ]}, + {row, [ + {id, <<"1">>}, + {key, 1}, + {value, 1} + ]} + ], Out). + + +multipe_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_different), + Doc = doc(0, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 1} + ]}, + {row, [ + {id, <<"0">>}, + {key, <<"0">>}, + {value, <<"0">>} + ]} + ], Out). + + +multipe_identical_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_same), + Doc = doc(0, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 1} + ]}, + {row, [ + {id, <<"0">>}, + {key, 1}, + {value, 2} + ]} + ], Out). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. + + +create_ddoc() -> + create_ddoc(simple). + + +create_ddoc(simple) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_different) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc._id, doc._id); " + "emit(doc.val, doc.val); " + "}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_same) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val, doc.val * 2); " + "emit(doc.val, doc.val); " + "}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). diff --git a/src/couch_views/test/couch_views_map_test.erl b/src/couch_views/test/couch_views_map_test.erl new file mode 100644 index 000000000..0b0ab6894 --- /dev/null +++ b/src/couch_views/test/couch_views_map_test.erl @@ -0,0 +1,517 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_map_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +setup() -> + test_util:start_couch([fabric, couch_jobs, couch_views]). + + +teardown(State) -> + test_util:stop_couch(State). + + +map_views_test_() -> + { + "Map views", + { + setup, + fun setup/0, + fun teardown/1, + [ + ?TDEF(should_map), + ?TDEF(should_map_with_startkey), + ?TDEF(should_map_with_endkey), + ?TDEF(should_map_with_endkey_not_inclusive), + ?TDEF(should_map_reverse_and_limit), + ?TDEF(should_map_with_range_reverse), + ?TDEF(should_map_with_limit_and_skip), + ?TDEF(should_map_with_limit_and_skip_reverse), + ?TDEF(should_map_with_include_docs), + ?TDEF(should_map_with_include_docs_reverse), + ?TDEF(should_map_with_startkey_with_key_array), + ?TDEF(should_map_with_startkey_and_endkey_with_key_array), + ?TDEF(should_map_empty_views), + ?TDEF(should_map_duplicate_keys), + ?TDEF(should_map_with_doc_emit), + ?TDEF(should_map_update_is_false), + ?TDEF(should_map_update_is_lazy) + % fun should_give_ext_size_seq_indexed_test/1 + ] + } + }. + + +should_map() -> + Result = run_query(<<"baz">>, #{}), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_startkey() -> + Result = run_query(<<"baz">>, #{start_key => 4}), + Expect = {ok, [ + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_endkey() -> + Result = run_query(<<"baz">>, #{end_key => 5}), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_endkey_not_inclusive() -> + Result = run_query(<<"baz">>, #{ + end_key => 5, + inclusive_end => false + }), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_reverse_and_limit() -> + Result = run_query(<<"baz">>, #{ + direction => rev, + limit => 3 + }), + Expect = {ok, [ + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_range_reverse() -> + Result = run_query(<<"baz">>, #{ + direction => rev, + start_key => 5, + end_key => 3, + inclusive_end => true + }), + Expect = {ok, [ + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_limit_and_skip() -> + Result = run_query(<<"baz">>, #{ + start_key => 2, + limit => 3, + skip => 3 + }), + Expect = {ok, [ + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_limit_and_skip_reverse() -> + Result = run_query(<<"baz">>, #{ + start_key => 10, + limit => 3, + skip => 3, + direction => rev + }), + Expect = {ok, [ + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_include_docs() -> + Result = run_query(<<"baz">>, #{ + start_key => 8, + end_key => 8, + include_docs => true + }), + Doc = {[ + {<<"_id">>, <<"8">>}, + {<<"_rev">>, <<"1-55b9a29311341e07ec0a7ca13bc1b59f">>}, + {<<"val">>, 8} + ]}, + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}, {doc, Doc}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_include_docs_reverse() -> + Result = run_query(<<"baz">>, #{ + start_key => 8, + end_key => 8, + include_docs => true, + direction => rev + }), + Doc = {[ + {<<"_id">>, <<"8">>}, + {<<"_rev">>, <<"1-55b9a29311341e07ec0a7ca13bc1b59f">>}, + {<<"val">>, 8} + ]}, + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}, {doc, Doc}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_startkey_with_key_array() -> + Rows = [ + {row, [{id, <<"4">>}, {key, [<<"4">>, 4]}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, [<<"5">>, 5]}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, [<<"6">>, 6]}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, [<<"7">>, 7]}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, [<<"8">>, 8]}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, [<<"9">>, 9]}, {value, 9}]} + ], + + Result = run_query(<<"boom">>, #{ + start_key => [<<"4">>] + }), + + ?assertEqual({ok, Rows}, Result), + + ResultRev = run_query(<<"boom">>, #{ + start_key => [<<"9">>, 9], + direction => rev, + limit => 6 + }), + + ?assertEqual({ok, lists:reverse(Rows)}, ResultRev). + + +should_map_with_startkey_and_endkey_with_key_array() -> + Rows1 = [ + {row, [{id, <<"4">>}, {key, [<<"4">>, 4]}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, [<<"5">>, 5]}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, [<<"6">>, 6]}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, [<<"7">>, 7]}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, [<<"8">>, 8]}, {value, 8}]} + ], + + Rows2 = [ + {row, [{id, <<"4">>}, {key, [<<"4">>, 4]}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, [<<"5">>, 5]}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, [<<"6">>, 6]}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, [<<"7">>, 7]}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, [<<"8">>, 8]}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, [<<"9">>, 9]}, {value, 9}]} + ], + + Result = run_query(<<"boom">>, #{ + start_key => [<<"4">>], + end_key => [<<"8">>, []] + }), + + ?assertEqual({ok, Rows1}, Result), + + ResultRev = run_query(<<"boom">>, #{ + start_key => [<<"8">>, []], + end_key => [<<"4">>], + direction => rev + }), + + ?assertEqual({ok, lists:reverse(Rows1)}, ResultRev), + + ResultRev2 = run_query(<<"boom">>, #{ + start_key => [<<"9">>, 9], + end_key => [<<"4">>], + direction => rev, + inclusive_end => false + }), + + % Here, [<<"4">>] is less than [<<"4">>, 4] so we + % expect rows 9-4 + ?assertEqual({ok, lists:reverse(Rows2)}, ResultRev2), + + ResultRev3 = run_query(<<"boom">>, #{ + start_key => [<<"9">>, 9], + end_key => [<<"4">>, 4], + direction => rev, + inclusive_end => false + }), + + % Here, specifying [<<"4">>, 4] as the key will prevent + % us from including that row which leaves rows 9-5 + ?assertEqual({ok, lists:reverse(lists:nthtail(1, Rows2))}, ResultRev3). + + +should_map_empty_views() -> + Result = run_query(<<"bing">>, #{}), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_map_with_doc_emit() -> + Result = run_query(<<"doc_emit">>, #{ + start_key => 8, + limit => 1 + }), + Doc = {[ + {<<"_id">>, <<"8">>}, + {<<"_rev">>, <<"1-55b9a29311341e07ec0a7ca13bc1b59f">>}, + {<<"val">>, 8} + ]}, + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, Doc}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_duplicate_keys() -> + Result = run_query(<<"duplicate_keys">>, #{ + limit => 6 + }), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, <<"1">>}, {value, 1}]}, + {row, [{id, <<"1">>}, {key, <<"1">>}, {value, 2}]}, + {row, [{id, <<"10">>}, {key, <<"10">>}, {value, 10}]}, + {row, [{id, <<"10">>}, {key, <<"10">>}, {value, 11}]}, + {row, [{id, <<"2">>}, {key, <<"2">>}, {value, 2}]}, + {row, [{id, <<"2">>}, {key, <<"2">>}, {value, 3}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_update_is_false() -> + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + + Expect1 = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]}, + {row, [{id, <<"11">>}, {key, 11}, {value, 11}]} + ]}, + + Idx = <<"baz">>, + DbName = ?tempdb(), + + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_ddoc(), + Docs = make_docs(10), + fabric2_db:update_docs(Db, [DDoc | Docs]), + + Args1 = #{ + start_key => 8 + }, + + Result1 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args1), + ?assertEqual(Expect, Result1), + + Doc = doc(11), + fabric2_db:update_doc(Db, Doc), + + Args2 = #{ + start_key => 8, + update => false + }, + + Result2 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args2), + ?assertEqual(Expect, Result2), + + Result3 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args1), + ?assertEqual(Expect1, Result3). + + +should_map_update_is_lazy() -> + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + + Idx = <<"baz">>, + DbName = ?tempdb(), + + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_ddoc(), + Docs = make_docs(10), + + fabric2_db:update_docs(Db, [DDoc | Docs]), + + Args1 = #{ + start_key => 8, + update => lazy + }, + + Result1 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args1), + ?assertEqual({ok, []}, Result1), + + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + JobId = couch_views_jobs:job_id(Db, Mrst), + UpdateSeq = fabric2_db:get_update_seq(Db), + ok = couch_views_jobs:wait_for_job(JobId, UpdateSeq), + + Args2 = #{ + start_key => 8, + update => false + }, + + Result2 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args2), + ?assertEqual(Expect, Result2). + + +% should_give_ext_size_seq_indexed_test(Db) -> +% DDoc = couch_doc:from_json_obj({[ +% {<<"_id">>, <<"_design/seqdoc">>}, +% {<<"options">>, {[{<<"seq_indexed">>, true}]}}, +% {<<"views">>, {[ +% {<<"view1">>, {[ +% {<<"map">>, <<"function(doc){emit(doc._id, doc._id);}">>} +% ]}} +% ]} +% } +% ]}), +% {ok, _} = couch_db:update_doc(Db, DDoc, []), +% {ok, Db1} = couch_db:open_int(couch_db:name(Db), []), +% {ok, DDoc1} = couch_db:open_doc(Db1, <<"_design/seqdoc">>, [ejson_body]), +% couch_mrview:query_view(Db1, DDoc1, <<"view1">>, [{update, true}]), +% {ok, Info} = couch_mrview:get_info(Db1, DDoc), +% Size = couch_util:get_nested_json_value({Info}, [sizes, external]), +% ok = couch_db:close(Db1), +% ?assert(is_number(Size)). + + +run_query(Idx, Args) -> + run_query(Idx, Args, false). + + +run_query(Idx, Args, DebugCluster) -> + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + DDoc = create_ddoc(), + Docs = make_docs(10), + fabric2_db:update_docs(Db, [DDoc | Docs]), + if not DebugCluster -> ok; true -> + couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], #{}), + fabric2_fdb:debug_cluster(), + ok + end, + couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], Args). + + +default_cb(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_cb({final, Info}, []) -> + {ok, [Info]}; +default_cb({final, _}, Acc) -> + {ok, Acc}; +default_cb({meta, _}, Acc) -> + {ok, Acc}; +default_cb(ok, ddoc_updated) -> + {ok, ddoc_updated}; +default_cb(Row, Acc) -> + {ok, [Row | Acc]}. + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"baz">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"boom">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit([doc.val.toString(), doc.val], doc.val);\n" + "}" + >>} + ]}}, + {<<"bing">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}}, + {<<"doc_emit">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc)}">>} + ]}}, + {<<"duplicate_keys">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit(doc._id, doc.val);\n" + " emit(doc._id, doc.val + 1);\n" + "}">>} + ]}}, + {<<"zing">>, {[ + {<<"map">>, << + "function(doc) {\n" + " if(doc.foo !== undefined)\n" + " emit(doc.foo, 0);\n" + "}" + >>} + ]}} + ]}} + ]}). + + +make_docs(Count) -> + [doc(I) || I <- lists:seq(1, Count)]. + + +doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Id} + ]}). diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index de1d3d177..6392d125e 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -46,6 +46,7 @@ -define(DB_DOCS, 21). -define(DB_LOCAL_DOCS, 22). -define(DB_ATTS, 23). +-define(DB_VIEWS, 24). % Versions diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index f280513fb..c29e48e04 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -178,21 +178,33 @@ defmodule BasicsTest do assert Couch.get("/#{db_name}").body["doc_count"] == 8 + # Disabling until we figure out reduce functions + # # Test reduce function + # resp = Couch.get("/#{db_name}/_design/bar/_view/baz") + # assert hd(resp.body["rows"])["value"] == 33 + # Test reduce function - resp = Couch.get("/#{db_name}/_design/bar/_view/baz") - assert hd(resp.body["rows"])["value"] == 33 + resp = Couch.get("/#{db_name}/_design/bar/_view/baz", query: %{:reduce => false}) + assert resp.body["total_rows"] == 3 # Delete doc and test for updated view results doc0 = Couch.get("/#{db_name}/0").body assert Couch.delete("/#{db_name}/0?rev=#{doc0["_rev"]}").body["ok"] - retry_until(fn -> - Couch.get("/#{db_name}/_design/foo/_view/baz").body["total_rows"] == 2 - end) + # Disabling until we figure out reduce functions + # retry_until(fn -> + # Couch.get("/#{db_name}/_design/foo/_view/baz").body["total_rows"] == 2 + # end) + + resp = Couch.get("/#{db_name}/_design/bar/_view/baz", query: %{:reduce => false}) + assert resp.body["total_rows"] == 2 assert Couch.get("/#{db_name}").body["doc_count"] == 7 assert Couch.get("/#{db_name}/0").status_code == 404 - refute Couch.get("/#{db_name}/0?rev=#{doc0["_rev"]}").status_code == 404 + + # No longer true. Old revisions are not stored after + # an update. + # refute Couch.get("/#{db_name}/0?rev=#{doc0["_rev"]}").status_code == 404 end @tag :with_db diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs new file mode 100644 index 000000000..04361ba87 --- /dev/null +++ b/test/elixir/test/map_test.exs @@ -0,0 +1,450 @@ +defmodule ViewMapTest do + use CouchTestCase + + @moduledoc """ + Test Map functionality for views + """ + def get_ids(resp) do + %{:body => %{"rows" => rows}} = resp + Enum.map(rows, fn row -> row["id"] end) + end + + def get_keys(resp) do + %{:body => %{"rows" => rows}} = resp + Enum.map(rows, fn row -> row["key"] end) + end + + defp create_map_docs(db_name) do + docs = + for i <- 1..10 do + group = + if rem(i, 3) == 0 do + "one" + else + "two" + end + + %{ + :_id => "doc-id-#{i}", + :value => i, + :some => "field", + :group => group + } + end + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs, :w => 3}) + assert resp.status_code == 201 + end + + setup do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + create_map_docs(db_name) + + map_fun1 = """ + function(doc) { + if (doc.some) { + emit(doc.value , doc.value); + } + + if (doc._id.indexOf("_design") > -1) { + emit(0, "ddoc") + } + } + """ + + map_fun2 = """ + function(doc) { + if (doc.group) { + emit([doc.some, doc.group], 1); + } + } + """ + + map_fun3 = """ + function(doc) { + if (doc.group) { + emit(doc.group, 1); + } + } + """ + + body = %{ + :w => 3, + :docs => [ + %{ + _id: "_design/map", + views: %{ + some: %{map: map_fun1}, + map_some: %{map: map_fun2}, + map_group: %{map: map_fun3} + } + }, + %{ + _id: "_design/include_ddocs", + views: %{some: %{map: map_fun1}}, + options: %{include_design: true} + } + ] + } + + resp = Couch.post("/#{db_name}/_bulk_docs", body: body) + Enum.each(resp.body, &assert(&1["ok"])) + + {:ok, [db_name: db_name]} + end + + def get_reduce_result(resp) do + %{:body => %{"rows" => rows}} = resp + rows + end + + test "query returns docs", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + + ids = get_ids(resp) + + assert ids == [ + "doc-id-1", + "doc-id-2", + "doc-id-3", + "doc-id-4", + "doc-id-5", + "doc-id-6", + "doc-id-7", + "doc-id-8", + "doc-id-9", + "doc-id-10" + ] + + url = "/#{db_name}/_design/map/_view/map_some" + resp = Couch.get(url) + assert resp.status_code == 200 + + ids = get_ids(resp) + + assert ids == [ + "doc-id-3", + "doc-id-6", + "doc-id-9", + "doc-id-1", + "doc-id-10", + "doc-id-2", + "doc-id-4", + "doc-id-5", + "doc-id-7", + "doc-id-8" + ] + end + + test "updated docs rebuilds index", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + ids = get_ids(resp) + + assert ids == [ + "doc-id-1", + "doc-id-2", + "doc-id-3", + "doc-id-4", + "doc-id-5", + "doc-id-6", + "doc-id-7", + "doc-id-8", + "doc-id-9", + "doc-id-10" + ] + + update_doc_value(db_name, "doc-id-5", 0) + update_doc_value(db_name, "doc-id-6", 100) + + resp = Couch.get("/#{db_name}/doc-id-3") + doc3 = convert(resp.body) + resp = Couch.delete("/#{db_name}/#{doc3["_id"]}", query: %{rev: doc3["_rev"]}) + assert resp.status_code == 200 + # + resp = Couch.get("/#{db_name}/doc-id-4") + doc4 = convert(resp.body) + doc4 = Map.delete(doc4, "some") + resp = Couch.put("/#{db_name}/#{doc4["_id"]}", body: doc4) + assert resp.status_code == 201 + # + resp = Couch.get("/#{db_name}/doc-id-1") + doc1 = convert(resp.body) + doc1 = Map.put(doc1, "another", "value") + resp = Couch.put("/#{db_name}/#{doc1["_id"]}", body: doc1) + assert resp.status_code == 201 + + url = "/#{db_name}/_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + ids = get_ids(resp) + + assert ids == [ + "doc-id-5", + "doc-id-1", + "doc-id-2", + "doc-id-7", + "doc-id-8", + "doc-id-9", + "doc-id-10", + "doc-id-6" + ] + end + + test "can index design docs", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/include_ddocs/_view/some" + resp = Couch.get(url, query: %{limit: 3}) + assert resp.status_code == 200 + ids = get_ids(resp) + + assert ids == ["_design/include_ddocs", "_design/map", "doc-id-1"] + end + + test "can use key in query string", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/map_group" + resp = Couch.get(url, query: %{limit: 3, key: "\"one\""}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["doc-id-3", "doc-id-6", "doc-id-9"] + + resp = + Couch.get(url, + query: %{ + limit: 3, + key: "\"one\"", + descending: true + } + ) + + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["doc-id-9", "doc-id-6", "doc-id-3"] + end + + test "can use keys in query string", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/some" + resp = Couch.post(url, body: %{keys: [6, 3, 9]}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["doc-id-6", "doc-id-3", "doc-id-9"] + + # should ignore descending = true + resp = Couch.post(url, body: %{keys: [6, 3, 9], descending: true}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["doc-id-6", "doc-id-3", "doc-id-9"] + end + + test "inclusive = false", context do + db_name = context[:db_name] + + docs = [ + %{key: "key1"}, + %{key: "key2"}, + %{key: "key3"}, + %{key: "key4"}, + %{key: "key4"}, + %{key: "key5"}, + %{ + _id: "_design/inclusive", + views: %{ + by_key: %{ + map: """ + function (doc) { + if (doc.key) { + emit(doc.key, doc); + } + } + """ + } + } + } + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs, :w => 3}) + assert resp.status_code == 201 + url = "/#{db_name}/_design/inclusive/_view/by_key" + + query = %{ + endkey: "\"key4\"", + inclusive_end: false + } + + resp = Couch.get(url, query: query) + assert resp.status_code == 200 + keys = get_keys(resp) + assert keys == ["key1", "key2", "key3"] + + query = %{ + startkey: "\"key3\"", + endkey: "\"key4\"", + inclusive_end: false + } + + resp = Couch.get(url, query: query) + assert resp.status_code == 200 + keys = get_keys(resp) + assert keys == ["key3"] + + query = %{ + startkey: "\"key4\"", + endkey: "\"key1\"", + inclusive_end: false, + descending: true + } + + resp = Couch.get(url, query: query) + assert resp.status_code == 200 + keys = get_keys(resp) + assert keys == ["key4", "key4", "key3", "key2"] + end + + test "supports linked documents", context do + db_name = context[:db_name] + + docs = [ + %{_id: "mydoc", foo: "bar"}, + %{_id: "join-doc", doc_id: "mydoc"}, + %{ + _id: "_design/join", + views: %{ + by_doc_id: %{ + map: """ + function (doc) { + if (doc.doc_id) { + emit(doc._id, {_id: doc.doc_id}); + } + } + """ + } + } + } + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs, :w => 3}) + assert resp.status_code == 201 + + url = "/#{db_name}/_design/join/_view/by_doc_id" + resp = Couch.get(url) + assert resp.status_code == 200 + %{:body => %{"rows" => [row]}} = resp + + assert row == %{ + "id" => "join-doc", + "key" => "join-doc", + "value" => %{"_id" => "mydoc"} + } + + url = "/#{db_name}/_design/join/_view/by_doc_id" + resp = Couch.get(url, query: %{include_docs: true}) + assert resp.status_code == 200 + %{:body => %{"rows" => [doc]}} = resp + + assert doc["id"] == "join-doc" + assert doc["doc"]["_id"] == "mydoc" + end + + test "bad range returns error", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/some" + resp = Couch.get(url, query: %{startkey: "5", endkey: "4"}) + assert resp.status_code == 400 + %{:body => %{"error" => error}} = resp + assert error == "query_parse_error" + end + + test "multiple emits in correct value order", context do + db_name = context[:db_name] + + docs = [ + %{_id: "doc1", foo: "foo", bar: "bar"}, + %{_id: "doc2", foo: "foo", bar: "bar"}, + %{ + _id: "_design/emit", + views: %{ + multiple_emit: %{ + map: """ + function (doc) { + if (!doc.foo) { + return; + } + emit(doc.foo); + emit(doc.bar); + emit(doc.foo); + emit(doc.bar, 'multiple values!'); + emit(doc.bar, 'crayon!'); + } + """ + } + } + } + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs, :w => 3}) + assert resp.status_code == 201 + + url = "/#{db_name}/_design/emit/_view/multiple_emit" + resp = Couch.post(url, body: %{keys: ["foo", "bar"]}) + assert resp.status_code == 200 + %{:body => %{"rows" => rows}} = resp + + assert Enum.at(rows, 0)["key"] == "foo" + assert Enum.at(rows, 0)["id"] == "doc1" + assert Enum.at(rows, 1)["key"] == "foo" + assert Enum.at(rows, 1)["id"] == "doc1" + + assert Enum.at(rows, 2)["key"] == "foo" + assert Enum.at(rows, 2)["id"] == "doc2" + assert Enum.at(rows, 3)["key"] == "foo" + assert Enum.at(rows, 3)["id"] == "doc2" + + assert Enum.at(rows, 4)["key"] == "bar" + assert Enum.at(rows, 4)["id"] == "doc1" + assert Enum.at(rows, 4)["value"] == :null + assert Enum.at(rows, 5)["key"] == "bar" + assert Enum.at(rows, 5)["id"] == "doc1" + assert Enum.at(rows, 5)["value"] == "crayon!" + assert Enum.at(rows, 6)["key"] == "bar" + assert Enum.at(rows, 6)["id"] == "doc1" + assert Enum.at(rows, 6)["value"] == "multiple values!" + + assert Enum.at(rows, 7)["key"] == "bar" + assert Enum.at(rows, 7)["id"] == "doc2" + assert Enum.at(rows, 7)["value"] == :null + assert Enum.at(rows, 8)["key"] == "bar" + assert Enum.at(rows, 8)["id"] == "doc2" + assert Enum.at(rows, 8)["value"] == "crayon!" + assert Enum.at(rows, 9)["key"] == "bar" + assert Enum.at(rows, 9)["id"] == "doc2" + assert Enum.at(rows, 9)["value"] == "multiple values!" + end + + def update_doc_value(db_name, id, value) do + resp = Couch.get("/#{db_name}/#{id}") + doc = convert(resp.body) + doc = Map.put(doc, "value", value) + resp = Couch.put("/#{db_name}/#{id}", body: doc) + assert resp.status_code == 201 + end + + def convert(value) do + :jiffy.decode(:jiffy.encode(value), [:return_maps]) + end +end diff --git a/test/elixir/test/view_collation_test.exs b/test/elixir/test/view_collation_test.exs index 7563ba416..bf30031e0 100644 --- a/test/elixir/test/view_collation_test.exs +++ b/test/elixir/test/view_collation_test.exs @@ -70,34 +70,28 @@ defmodule ViewCollationTest do end test "ascending collation order", context do - retry_until(fn -> - resp = Couch.get(url(context)) - pairs = Enum.zip(resp.body["rows"], @values) + resp = Couch.get(url(context)) + pairs = Enum.zip(resp.body["rows"], @values) - Enum.each(pairs, fn {row, value} -> - assert row["key"] == convert(value) - end) + Enum.each(pairs, fn {row, value} -> + assert row["key"] == convert(value) end) end test "descending collation order", context do - retry_until(fn -> - resp = Couch.get(url(context), query: %{"descending" => "true"}) - pairs = Enum.zip(resp.body["rows"], Enum.reverse(@values)) + resp = Couch.get(url(context), query: %{"descending" => "true"}) + pairs = Enum.zip(resp.body["rows"], Enum.reverse(@values)) - Enum.each(pairs, fn {row, value} -> - assert row["key"] == convert(value) - end) + Enum.each(pairs, fn {row, value} -> + assert row["key"] == convert(value) end) end test "key query option", context do Enum.each(@values, fn value -> - retry_until(fn -> - resp = Couch.get(url(context), query: %{:key => :jiffy.encode(value)}) - assert length(resp.body["rows"]) == 1 - assert Enum.at(resp.body["rows"], 0)["key"] == convert(value) - end) + resp = Couch.get(url(context), query: %{:key => :jiffy.encode(value)}) + assert length(resp.body["rows"]) == 1 + assert Enum.at(resp.body["rows"], 0)["key"] == convert(value) end) end -- cgit v1.2.1 From 90a27dbd214b9245030696ee39f947754444964f Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 1 Aug 2019 10:14:32 -0500 Subject: Fix job handling to halt on errors If the indexing job has timed out and has been requed we need to exit the current indexer. This ensures the errors are logged so that we can keep an eye on failing jobs. --- src/couch_views/src/couch_views_indexer.erl | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index a3179369c..edee332f3 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -72,7 +72,7 @@ init() -> update(#{} = Db, Mrst0, State0) -> - {Mrst2, State3} = fabric2_fdb:transactional(Db, fun(TxDb) -> + {Mrst2, State4} = fabric2_fdb:transactional(Db, fun(TxDb) -> % In the first iteration of update we need % to populate our db and view sequences State1 = case State0 of @@ -107,8 +107,8 @@ update(#{} = Db, Mrst0, State0) -> report_progress(State2, finished), {Mrst1, finished}; false -> - report_progress(State2, update), - {Mrst1, State2#{ + State3 = report_progress(State2, update), + {Mrst1, State3#{ tx_db := undefined, count := 0, doc_acc := [], @@ -117,11 +117,11 @@ update(#{} = Db, Mrst0, State0) -> end end), - case State3 of + case State4 of finished -> couch_query_servers:stop_doc_map(Mrst2#mrst.qserver); _ -> - update(Db, Mrst2, State3) + update(Db, Mrst2, State4) end. @@ -229,7 +229,7 @@ start_query_server(#mrst{} = Mrst) -> report_progress(State, UpdateType) -> #{ tx_db := TxDb, - job := Job, + job := Job1, job_data := JobData, last_seq := LastSeq } = State, @@ -251,9 +251,21 @@ report_progress(State, UpdateType) -> case UpdateType of update -> - couch_jobs:update(TxDb, Job, NewData); + case couch_jobs:update(TxDb, Job1, NewData) of + {ok, Job2} -> + State#{job := Job2}; + {error, halt} -> + couch_log:error("~s job halted :: ~w", [?MODULE, Job1]), + exit(normal) + end; finished -> - couch_jobs:finish(TxDb, Job, NewData) + case couch_jobs:finish(TxDb, Job1, NewData) of + ok -> + State; + {error, halt} -> + couch_log:error("~s job halted :: ~w", [?MODULE, Job1]), + exit(normal) + end end. -- cgit v1.2.1 From 77018c9ff8971c982667e40a45f8b4c091a6370a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 6 Aug 2019 12:20:59 -0500 Subject: Update to use new fold_range_future/wait functions --- src/fabric/src/fabric2_fdb.erl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 71cb68f21..be629949b 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -398,19 +398,20 @@ get_winning_revs_future(#{} = Db, DocId, NumRevs) -> {StartKey, EndKey} = erlfdb_tuple:range({?DB_REVS, DocId}, DbPrefix), Options = [{reverse, true}, {limit, NumRevs}], - erlfdb:get_range_raw(Tx, StartKey, EndKey, Options). + erlfdb:fold_range_future(Tx, StartKey, EndKey, Options). -get_winning_revs_wait(#{} = Db, Future) -> +get_winning_revs_wait(#{} = Db, RangeFuture) -> #{ + tx := Tx, db_prefix := DbPrefix } = ensure_current(Db), - {Rows, _, _} = erlfdb:wait(Future), - lists:map(fun({K, V}) -> + RevRows = erlfdb:fold_range_wait(Tx, RangeFuture, fun({K, V}, Acc) -> Key = erlfdb_tuple:unpack(K, DbPrefix), Val = erlfdb_tuple:unpack(V), - fdb_to_revinfo(Key, Val) - end, Rows). + [fdb_to_revinfo(Key, Val) | Acc] + end, []), + lists:reverse(RevRows). get_non_deleted_rev(#{} = Db, DocId, RevId) -> -- cgit v1.2.1 From 1b6998cfbfa035ac120861f2da1c0a02fe78c095 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 6 Aug 2019 12:21:23 -0500 Subject: Write large documents in chunks This opens the max document size to the transaction limits rather than a single 100k value. --- src/fabric/include/fabric2.hrl | 2 +- src/fabric/src/fabric2_fdb.erl | 45 +++++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 6392d125e..3e224987d 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -64,4 +64,4 @@ -define(COMMIT_UNKNOWN_RESULT, 1021). --define(ATTACHMENT_CHUNK_SIZE, 100000). +-define(BINARY_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index be629949b..b10c35747 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -447,20 +447,27 @@ get_doc_body_future(#{} = Db, DocId, RevInfo) -> rev_id := {RevPos, Rev} } = RevInfo, - Key = erlfdb_tuple:pack({?DB_DOCS, DocId, RevPos, Rev}, DbPrefix), - erlfdb:get(Tx, Key). + Key = {?DB_DOCS, DocId, RevPos, Rev}, + {StartKey, EndKey} = erlfdb_tuple:range(Key, DbPrefix), + erlfdb:fold_range_future(Tx, StartKey, EndKey, []). get_doc_body_wait(#{} = Db0, DocId, RevInfo, Future) -> - Db = ensure_current(Db0), + #{ + tx := Tx + } = Db = ensure_current(Db0), #{ rev_id := {RevPos, Rev}, rev_path := RevPath } = RevInfo, - Val = erlfdb:wait(Future), - fdb_to_doc(Db, DocId, RevPos, [Rev | RevPath], Val). + RevBodyRows = erlfdb:fold_range_wait(Tx, Future, fun({_K, V}, Acc) -> + [V | Acc] + end, []), + BodyRows = lists:reverse(RevBodyRows), + + fdb_to_doc(Db, DocId, RevPos, [Rev | RevPath], BodyRows). get_local_doc(#{} = Db0, <> = DocId) -> @@ -634,7 +641,7 @@ write_attachment(#{} = Db, DocId, Data) when is_binary(Data) -> } = ensure_current(Db), AttId = fabric2_util:uuid(), - Chunks = chunkify_attachment(Data), + Chunks = chunkify_binary(Data), lists:foldl(fun(Chunk, ChunkId) -> AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), @@ -823,8 +830,9 @@ write_doc_body(#{} = Db0, #doc{} = Doc) -> tx := Tx } = Db = ensure_current(Db0), - {NewDocKey, NewDocVal} = doc_to_fdb(Db, Doc), - erlfdb:set(Tx, NewDocKey, NewDocVal). + lists:foreach(fun({Key, Value}) -> + ok = erlfdb:set(Tx, Key, Value) + end, doc_to_fdb(Db, Doc)). revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> @@ -894,12 +902,17 @@ doc_to_fdb(Db, #doc{} = Doc) -> DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), - Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev}, DbPrefix), - Val = {Body, DiskAtts, Deleted}, - {Key, term_to_binary(Val, [{minor_version, 1}])}. + Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]), + + {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> + Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix), + {{Key, Chunk}, ChunkId + 1} + end, 0, chunkify_binary(Value)), + Rows. -fdb_to_doc(Db, DocId, Pos, Path, Bin) when is_binary(Bin) -> +fdb_to_doc(Db, DocId, Pos, Path, BinRows) when is_list(BinRows) -> + Bin = iolist_to_binary(BinRows), {Body, DiskAtts, Deleted} = binary_to_term(Bin, [safe]), Atts = lists:map(fun(Att) -> couch_att:from_disk_term(Db, DocId, Att) @@ -954,13 +967,13 @@ fdb_to_local_doc(_Db, _DocId, not_found) -> {not_found, missing}. -chunkify_attachment(Data) -> +chunkify_binary(Data) -> case Data of <<>> -> []; - <> -> - [Head | chunkify_attachment(Rest)]; - <<_/binary>> when size(Data) < ?ATTACHMENT_CHUNK_SIZE -> + <> -> + [Head | chunkify_binary(Rest)]; + <<_/binary>> when size(Data) < ?BINARY_CHUNK_SIZE -> [Data] end. -- cgit v1.2.1 From 67ee47ccfd30f969d13e8a65c714f3c375650323 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 6 Aug 2019 12:50:53 -0500 Subject: Remember to remove old doc bodies --- src/fabric/src/fabric2_fdb.erl | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index b10c35747..3f02e6ac4 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -509,7 +509,8 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> lists:foreach(fun(RI0) -> RI = RI0#{winner := false}, {K, _, undefined} = revinfo_to_fdb(Tx, DbPrefix, DocId, RI), - ok = erlfdb:clear(Tx, K) + ok = erlfdb:clear(Tx, K), + ok = clear_doc_body(Db, DocId, RI0) end, ToRemove), % _all_docs @@ -835,6 +836,25 @@ write_doc_body(#{} = Db0, #doc{} = Doc) -> end, doc_to_fdb(Db, Doc)). +clear_doc_body(_Db, _DocId, not_found) -> + % No old body to clear + ok; + +clear_doc_body(#{} = Db, DocId, #{} = RevInfo) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + #{ + rev_id := {RevPos, Rev} + } = RevInfo, + + BaseKey = {?DB_DOCS, DocId, RevPos, Rev}, + {StartKey, EndKey} = erlfdb_tuple:range(BaseKey, DbPrefix), + ok = erlfdb:clear_range(Tx, StartKey, EndKey). + + revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> #{ deleted := Deleted, @@ -911,6 +931,9 @@ doc_to_fdb(Db, #doc{} = Doc) -> Rows. +fdb_to_doc(_Db, _DocId, _Pos, _Path, []) -> + {not_found, missing}; + fdb_to_doc(Db, DocId, Pos, Path, BinRows) when is_list(BinRows) -> Bin = iolist_to_binary(BinRows), {Body, DiskAtts, Deleted} = binary_to_term(Bin, [safe]), @@ -928,10 +951,7 @@ fdb_to_doc(Db, DocId, Pos, Path, BinRows) when is_list(BinRows) -> case Db of #{after_doc_read := undefined} -> Doc0; #{after_doc_read := ADR} -> ADR(Doc0, Db) - end; - -fdb_to_doc(_Db, _DocId, _Pos, _Path, not_found) -> - {not_found, missing}. + end. local_doc_to_fdb(Db, #doc{} = Doc) -> -- cgit v1.2.1 From 9e7ba3981b8a45e137f9154e8938753564a51360 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 12 Aug 2019 14:40:59 -0400 Subject: Configurable FDB directory prefixes for CouchDB instances Specifying a custom prefix allows having multiple CouchDB instances on a single FDB cluster. This can be used for one form of multi-tenancy. It can also be used for integration testing by creating a temporary prefix then deleting all data in that directory when the test has finished. --- rel/overlay/etc/default.ini | 5 +++ src/couch_jobs/src/couch_jobs_fdb.erl | 3 +- src/fabric/src/fabric2_dir_prefix_tests.erl | 69 +++++++++++++++++++++++++++++ src/fabric/src/fabric2_fdb.erl | 6 ++- src/fabric/src/fabric2_server.erl | 28 +++++++++++- src/fabric/src/fabric2_txids.erl | 6 ++- 6 files changed, 111 insertions(+), 6 deletions(-) create mode 100644 src/fabric/src/fabric2_dir_prefix_tests.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 856578e18..85dc62b30 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -214,6 +214,11 @@ port = 6984 ; attachments_timeout = 60000 ; view_timeout = 3600000 ; partition_view_timeout = 3600000 +; +; Custom FDB directory prefix. All the nodes of the same CouchDB instance +; should have a matching directory prefix in order to read and write the same +; data. Changes to this value take effect only on node start-up. +;fdb_directory = couchdb ; [rexi] ; buffer_count = 2000 diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 1317d03df..6903801a2 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -616,7 +616,8 @@ init_jtx(undefined) -> init_jtx({erlfdb_transaction, _} = Tx) -> Root = erlfdb_directory:root(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Dir = fabric2_server:fdb_directory(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), LayerPrefix = erlfdb_directory:get_name(CouchDB), Jobs = erlfdb_tuple:pack({?JOBS}, LayerPrefix), Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), diff --git a/src/fabric/src/fabric2_dir_prefix_tests.erl b/src/fabric/src/fabric2_dir_prefix_tests.erl new file mode 100644 index 000000000..c7bc8bba4 --- /dev/null +++ b/src/fabric/src/fabric2_dir_prefix_tests.erl @@ -0,0 +1,69 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_dir_prefix_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +dir_prefix_test_() -> + { + "Test couchdb fdb directory prefix", + foreach, + fun() -> + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3]) + end, + fun(Ctx) -> + config:delete("fabric", "fdb_directory"), + ok = application:stop(fabric), + test_util:stop_couch(Ctx) + end, + [ + ?TDEF(default_prefix), + ?TDEF(custom_prefix) + ] + }. + + +default_prefix() -> + ok = application:start(fabric), + + ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), + + % Try again to test pdict caching code + ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), + + % Check that we can create dbs + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). + + +custom_prefix() -> + ok = config:set("fabric", "fdb_directory", "couchdb_foo"), + ok = application:start(fabric), + + ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), + + % Try again to test pdict caching code + ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), + + % Check that we can create dbs + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 3f02e6ac4..c58b5f674 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -258,7 +258,8 @@ exists(#{name := DbName} = Db) when is_binary(DbName) -> list_dbs(Tx, Callback, AccIn, Options) -> Root = erlfdb_directory:root(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Dir = fabric2_server:fdb_directory(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), LayerPrefix = erlfdb_directory:get_name(CouchDB), Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), fold_range({tx, Tx}, Prefix, fun({K, _V}, Acc) -> @@ -737,7 +738,8 @@ debug_cluster(Start, End) -> init_db(Tx, DbName, Options) -> Root = erlfdb_directory:root(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Dir = fabric2_server:fdb_directory(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), Prefix = erlfdb_directory:get_name(CouchDB), Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), #{ diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index 5b826cd14..f88ceb643 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -19,7 +19,8 @@ start_link/0, fetch/1, store/1, - remove/1 + remove/1, + fdb_directory/0 ]). @@ -37,6 +38,8 @@ -define(CLUSTER_FILE, "/usr/local/etc/foundationdb/fdb.cluster"). +-define(FDB_DIRECTORY, fdb_directory). +-define(DEFAULT_FDB_DIRECTORY, <<"couchdb">>). start_link() -> @@ -81,6 +84,14 @@ init(_) -> end, application:set_env(fabric, db, Db), + Dir = case config:get("fabric", "fdb_directory") of + Val when is_list(Val), length(Val) > 0 -> + [?l2b(Val)]; + _ -> + [?DEFAULT_FDB_DIRECTORY] + end, + application:set_env(fabric, ?FDB_DIRECTORY, Dir), + {ok, nil}. @@ -102,3 +113,18 @@ handle_info(Msg, St) -> code_change(_OldVsn, St, _Extra) -> {ok, St}. + + +fdb_directory() -> + case get(?FDB_DIRECTORY) of + undefined -> + case application:get_env(fabric, ?FDB_DIRECTORY) of + undefined -> + erlang:error(fabric_application_not_started); + {ok, Dir} -> + put(?FDB_DIRECTORY, Dir), + Dir + end; + Dir -> + Dir + end. diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl index ba427415d..06704f021 100644 --- a/src/fabric/src/fabric2_txids.erl +++ b/src/fabric/src/fabric2_txids.erl @@ -45,7 +45,8 @@ start_link() -> create(Tx, undefined) -> Root = erlfdb_directory:root(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Dir = fabric2_server:fdb_directory(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), Prefix = erlfdb_directory:get_name(CouchDB), create(Tx, Prefix); @@ -136,7 +137,8 @@ clean(St, NeedsSweep) -> sweep(Tx, {Mega, Secs, Micro}) -> Root = erlfdb_directory:root(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, [<<"couchdb">>]), + Dir = fabric2_server:fdb_directory(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), Prefix = erlfdb_directory:get_name(CouchDB), StartKey = erlfdb_tuple:pack({?TX_IDS}, Prefix), EndKey = erlfdb_tuple:pack({?TX_IDS, Mega, Secs, Micro}, Prefix), -- cgit v1.2.1 From d333a2e2c155263128efc75201a10f81fc48ebaf Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 15 Aug 2019 15:08:48 +0200 Subject: only fetch 1 query server for indexing --- src/couch_views/src/couch_views_indexer.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index edee332f3..bebbd1a7a 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -215,7 +215,7 @@ write_docs(TxDb, Mrst, Docs, State) -> couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). -start_query_server(#mrst{} = Mrst) -> +start_query_server(#mrst{qserver = nil} = Mrst) -> #mrst{ language = Language, lib = Lib, @@ -223,7 +223,10 @@ start_query_server(#mrst{} = Mrst) -> } = Mrst, Defs = [View#mrview.def || View <- Views], {ok, QServer} = couch_query_servers:start_doc_map(Language, Defs, Lib), - Mrst#mrst{qserver = QServer}. + Mrst#mrst{qserver = QServer}; + +start_query_server(#mrst{} = Mrst) -> + Mrst. report_progress(State, UpdateType) -> -- cgit v1.2.1 From 56d7f56a79f05d55a4d8d999adcf7d08513ef0ea Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 16 Aug 2019 19:08:04 +0000 Subject: Add timeout for 'fold_docs-with_different_keys' test --- src/fabric/test/fabric2_doc_fold_tests.erl | 32 ++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl index ee0180f14..3cb68bdac 100644 --- a/src/fabric/test/fabric2_doc_fold_tests.erl +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -20,6 +20,16 @@ -define(DOC_COUNT, 50). +%% eunit implementation of {with, Tests} doesn't detect test name correctly +with(Tests) -> + fun(ArgsTuple) -> + [{Name, ?_test(Fun(ArgsTuple))} || {Name, Fun} <- Tests] + ++ + [{Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} || {Name, Timeout, Fun} <- Tests] + end. + +-define(NAMED(A), {atom_to_list(A), fun A/1}). +-define(WITH_TIMEOUT(Timeout, A), {atom_to_list(A), Timeout, fun A/1}). doc_fold_test_() -> { @@ -28,17 +38,17 @@ doc_fold_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun fold_docs_basic/1, - fun fold_docs_rev/1, - fun fold_docs_with_start_key/1, - fun fold_docs_with_end_key/1, - fun fold_docs_with_both_keys_the_same/1, - fun fold_docs_with_different_keys/1, - fun fold_docs_with_limit/1, - fun fold_docs_with_skip/1, - fun fold_docs_with_skip_and_limit/1 - ]} + with([ + ?NAMED(fold_docs_basic), + ?NAMED(fold_docs_rev), + ?NAMED(fold_docs_with_start_key), + ?NAMED(fold_docs_with_end_key), + ?NAMED(fold_docs_with_both_keys_the_same), + ?WITH_TIMEOUT(10000, fold_docs_with_different_keys), + ?NAMED(fold_docs_with_limit), + ?NAMED(fold_docs_with_skip), + ?NAMED(fold_docs_with_skip_and_limit) + ]) } }. -- cgit v1.2.1 From 11eda338c1615426c483b159c7340cbe752310b8 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 22 Aug 2019 18:30:44 +0200 Subject: add trace test for couch_views --- .../test/couch_views_trace_index_test.erl | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 src/couch_views/test/couch_views_trace_index_test.erl diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl new file mode 100644 index 000000000..b7fe66b49 --- /dev/null +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -0,0 +1,140 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_trace_index_test). + + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +% Steps for this to work +% Run export FDB_NETWORK_OPTION_TRACE_ENABLE="" && +% make eunit apps=couch_views suites=couch_views_trace_index_test +% look in src/couch_views/.eunit for trace file +% Might need to add extra to finish up file +% Analyze! + + +-define(EUNIT_FTW(Tests), [{with, [T]} || T <- Tests]). + + +indexer_test_() -> + { + "Trace view indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + ?EUNIT_FTW([ + fun trace_single_doc/1 + ]) + } + } + }. + + +setup() -> + test_util:start_couch([fabric]). + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +trace_single_doc(Db) -> + DbName = fabric2_db:name(Db), + DDoc = create_ddoc(), + Doc = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + + JobData = #{ + <<"db_name">> => DbName, + <<"ddoc_id">> => <<"_design/bar">>, + <<"sig">> => fabric2_util:to_hex(Mrst#mrst.sig) + }, + meck:expect(couch_jobs, accept, 2, {ok, job, JobData}), + meck:expect(couch_jobs, update, 3, {ok, job}), + meck:expect(couch_jobs, finish, 3, ok), + put(erlfdb_trace, <<"views_write_one_doc">>), + couch_views_indexer:init(), + + put(erlfdb_trace, <<"views_read_one_doc">>), + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out). + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; + +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; + +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. -- cgit v1.2.1 From a234b189156a5393cbc2669ca2db974973927d3d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 22 Aug 2019 15:23:53 -0400 Subject: Implement fabric2_db EPI plugin This mostly equivalent to the `couch_db` EPI plugin, but using fabric2 calls and without some of the functions that are not relevant to FDB such as on_compact/1 and others. --- rel/apps/couch_epi.config | 1 + src/chttpd/src/chttpd_db.erl | 8 +-- src/fabric/src/fabric.app.src | 1 + src/fabric/src/fabric2_db.erl | 136 +++++++++++++++++++++++++++-------- src/fabric/src/fabric2_db_plugin.erl | 92 ++++++++++++++++++++++++ src/fabric/src/fabric2_epi.erl | 48 +++++++++++++ src/fabric/src/fabric2_sup.erl | 29 ++++---- 7 files changed, 269 insertions(+), 46 deletions(-) create mode 100644 src/fabric/src/fabric2_db_plugin.erl create mode 100644 src/fabric/src/fabric2_epi.erl diff --git a/rel/apps/couch_epi.config b/rel/apps/couch_epi.config index a53721a48..0f3d2da55 100644 --- a/rel/apps/couch_epi.config +++ b/rel/apps/couch_epi.config @@ -12,6 +12,7 @@ {plugins, [ couch_db_epi, + fabric2_epi, chttpd_epi, couch_index_epi, dreyfus_epi, diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 8e2b9d189..42e145bd5 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1066,7 +1066,7 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> couch_httpd:validate_referer(Req), - couch_doc:validate_docid(DocId, fabric2_db:name(Db)), + fabric2_db:validate_docid(DocId), chttpd:validate_ctype(Req, "multipart/form-data"), Options = [{user_ctx,Ctx}], @@ -1126,7 +1126,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> update_type = UpdateType } = parse_doc_query(Req), DbName = fabric2_db:name(Db), - couch_doc:validate_docid(DocId, fabric2_db:name(Db)), + fabric2_db:validate_docid(DocId), Options = [{user_ctx, Ctx}], @@ -1687,7 +1687,7 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) % check for the existence of the doc to handle the 404 case. couch_doc_open(Db, DocId, nil, []) end, - couch_doc:validate_docid(DocId, fabric2_db:name(Db)), + fabric2_db:validate_docid(DocId), #doc{id=DocId}; Rev -> case fabric2_db:open_doc_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of @@ -2066,7 +2066,7 @@ bulk_get_open_doc_revs1(Db, Props, Options, {}) -> {null, {error, Error}, Options}; DocId -> try - couch_doc:validate_docid(DocId, fabric2_db:name(Db)), + fabric2_db:validate_docid(DocId), bulk_get_open_doc_revs1(Db, Props, Options, {DocId}) catch throw:{Error, Reason} -> {DocId, {error, {null, Error, Reason}}, Options} diff --git a/src/fabric/src/fabric.app.src b/src/fabric/src/fabric.app.src index 20fbb1e2a..77260f962 100644 --- a/src/fabric/src/fabric.app.src +++ b/src/fabric/src/fabric.app.src @@ -21,6 +21,7 @@ kernel, stdlib, config, + couch_epi, couch, rexi, mem3, diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index c926da9e0..2afb780fa 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -85,7 +85,7 @@ %% get_minimum_purge_seq/1, %% purge_client_exists/3, - %% validate_docid/2, + validate_docid/1, %% doc_from_json_obj_validate/2, update_doc/2, @@ -118,9 +118,9 @@ %% wait_for_compaction/1, %% wait_for_compaction/2, - %% dbname_suffix/1, - %% normalize_dbname/1, - %% validate_dbname/1, + dbname_suffix/1, + normalize_dbname/1, + validate_dbname/1, %% make_doc/5, new_revid/2 @@ -141,21 +141,26 @@ create(DbName, Options) -> - Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> - case fabric2_fdb:exists(TxDb) of - true -> - {error, file_exists}; - false -> - fabric2_fdb:create(TxDb, Options) - end - end), - % We cache outside of the transaction so that we're sure - % that the transaction was committed. - case Result of - #{} = Db0 -> - Db1 = maybe_add_sys_db_callbacks(Db0), - ok = fabric2_server:store(Db1), - {ok, Db1#{tx := undefined}}; + case validate_dbname(DbName) of + ok -> + Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> + case fabric2_fdb:exists(TxDb) of + true -> + {error, file_exists}; + false -> + fabric2_fdb:create(TxDb, Options) + end + end), + % We cache outside of the transaction so that we're sure + % that the transaction was committed. + case Result of + #{} = Db0 -> + Db1 = maybe_add_sys_db_callbacks(Db0), + ok = fabric2_server:store(Db1), + {ok, Db1#{tx := undefined}}; + Error -> + Error + end; Error -> Error end. @@ -225,11 +230,15 @@ list_dbs(UserFun, UserAcc0, Options) -> is_admin(Db) -> - % TODO: Need to re-consider couch_db_plugin:check_is_admin/1 - {SecProps} = get_security(Db), - UserCtx = get_user_ctx(Db), - {Admins} = get_admins(SecProps), - is_authorized(Admins, UserCtx). + case fabric2_db_plugin:check_is_admin(Db) of + true -> + true; + false -> + {SecProps} = get_security(Db), + UserCtx = get_user_ctx(Db), + {Admins} = get_admins(SecProps), + is_authorized(Admins, UserCtx) + end. check_is_admin(Db) -> @@ -582,6 +591,44 @@ get_missing_revs(Db, JsonIdRevs) -> {ok, AllMissing}. +validate_docid(<<"">>) -> + throw({illegal_docid, <<"Document id must not be empty">>}); +validate_docid(<<"_design/">>) -> + throw({illegal_docid, <<"Illegal document id `_design/`">>}); +validate_docid(<<"_local/">>) -> + throw({illegal_docid, <<"Illegal document id `_local/`">>}); +validate_docid(Id) when is_binary(Id) -> + MaxLen = case config:get("couchdb", "max_document_id_length", "infinity") of + "infinity" -> infinity; + IntegerVal -> list_to_integer(IntegerVal) + end, + case MaxLen > 0 andalso byte_size(Id) > MaxLen of + true -> throw({illegal_docid, <<"Document id is too long">>}); + false -> ok + end, + case couch_util:validate_utf8(Id) of + false -> throw({illegal_docid, <<"Document id must be valid UTF-8">>}); + true -> ok + end, + case Id of + <> -> ok; + <> -> ok; + <<"_", _/binary>> -> + case fabric2_db_plugin:validate_docid(Id) of + true -> + ok; + false -> + throw( + {illegal_docid, + <<"Only reserved document ids may start with underscore.">>}) + end; + _Else -> ok + end; +validate_docid(Id) -> + couch_log:debug("Document id is not a string: ~p", [Id]), + throw({illegal_docid, <<"Document id must be a string">>}). + + update_doc(Db, Doc) -> update_doc(Db, Doc, []). @@ -758,6 +805,38 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> end). +dbname_suffix(DbName) -> + filename:basename(normalize_dbname(DbName)). + + +normalize_dbname(DbName) -> + % Remove in the final cleanup. We don't need to handle shards prefix or + % remove .couch suffixes anymore. Keep it for now to pass all the existing + % tests. + couch_db:normalize_dbname(DbName). + + +validate_dbname(DbName) when is_list(DbName) -> + validate_dbname(?l2b(DbName)); + +validate_dbname(DbName) when is_binary(DbName) -> + Normalized = normalize_dbname(DbName), + fabric2_db_plugin:validate_dbname( + DbName, Normalized, fun validate_dbname_int/2). + +validate_dbname_int(DbName, Normalized) when is_binary(DbName) -> + DbNoExt = couch_util:drop_dot_couch_ext(DbName), + case re:run(DbNoExt, ?DBNAME_REGEX, [{capture,none}, dollar_endonly]) of + match -> + ok; + nomatch -> + case is_system_db_name(Normalized) of + true -> ok; + false -> {error, {illegal_database_name, DbName}} + end + end. + + maybe_add_sys_db_callbacks(Db) -> IsReplicatorDb = is_replicator_db(Db), IsUsersDb = is_users_db(Db), @@ -1030,16 +1109,13 @@ find_possible_ancestors(RevInfos, MissingRevs) -> apply_before_doc_update(Db, Docs, Options) -> - #{before_doc_update := BDU} = Db, UpdateType = case lists:member(replicated_changes, Options) of true -> replicated_changes; false -> interactive_edit end, - if BDU == undefined -> Docs; true -> - lists:map(fun(Doc) -> - BDU(Doc, Db, UpdateType) - end, Docs) - end. + lists:map(fun(Doc) -> + fabric2_db_plugin:before_doc_update(Db, Doc, UpdateType) + end, Docs). update_doc_int(#{} = Db, #doc{} = Doc, Options) -> diff --git a/src/fabric/src/fabric2_db_plugin.erl b/src/fabric/src/fabric2_db_plugin.erl new file mode 100644 index 000000000..41f9e9db6 --- /dev/null +++ b/src/fabric/src/fabric2_db_plugin.erl @@ -0,0 +1,92 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_plugin). + +-export([ + validate_dbname/3, + before_doc_update/3, + after_doc_read/2, + validate_docid/1, + check_is_admin/1, + is_valid_purge_client/2 +]). + +-define(SERVICE_ID, fabric2_db). + + +%% ------------------------------------------------------------------ +%% API Function Definitions +%% ------------------------------------------------------------------ + +validate_dbname(DbName, Normalized, Default) -> + maybe_handle(validate_dbname, [DbName, Normalized], Default). + + +before_doc_update(Db, Doc0, UpdateType) -> + Fun = fabric2_db:get_before_doc_update_fun(Db), + case with_pipe(before_doc_update, [Doc0, Db, UpdateType]) of + [Doc1, _Db, UpdateType1] when is_function(Fun) -> + Fun(Doc1, Db, UpdateType1); + [Doc1, _Db, _UpdateType] -> + Doc1 + end. + + +after_doc_read(Db, Doc0) -> + Fun = fabric2_db:get_after_doc_read_fun(Db), + case with_pipe(after_doc_read, [Doc0, Db]) of + [Doc1, _Db] when is_function(Fun) -> Fun(Doc1, Db); + [Doc1, _Db] -> Doc1 + end. + + +validate_docid(Id) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + %% callbacks return true only if it specifically allow the given Id + couch_epi:any(Handle, ?SERVICE_ID, validate_docid, [Id], []). + + +check_is_admin(Db) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + %% callbacks return true only if it specifically allow the given Id + R = couch_epi:any(Handle, ?SERVICE_ID, check_is_admin, [Db], []), + %io:format(standard_error, "~n FFFFFFF ~p check_is_admin Db:~p => ~p~n", [?MODULE, fabric2_db:name(Db), R]), + R. + + +is_valid_purge_client(DbName, Props) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + %% callbacks return true only if it specifically allow the given Id + couch_epi:any(Handle, ?SERVICE_ID, is_valid_purge_client, [DbName, Props], []). + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +with_pipe(Func, Args) -> + do_apply(Func, Args, [pipe]). + +do_apply(Func, Args, Opts) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + couch_epi:apply(Handle, ?SERVICE_ID, Func, Args, Opts). + +maybe_handle(Func, Args, Default) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + case couch_epi:decide(Handle, ?SERVICE_ID, Func, Args, []) of + no_decision when is_function(Default) -> + apply(Default, Args); + no_decision -> + Default; + {decided, Result} -> + Result + end. diff --git a/src/fabric/src/fabric2_epi.erl b/src/fabric/src/fabric2_epi.erl new file mode 100644 index 000000000..f73eeb0d2 --- /dev/null +++ b/src/fabric/src/fabric2_epi.erl @@ -0,0 +1,48 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_epi). + +-behaviour(couch_epi_plugin). + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + +app() -> + fabric. + +providers() -> + []. + +services() -> + [ + {fabric2_db, fabric2_db_plugin} + ]. + +data_subscriptions() -> + []. + +data_providers() -> + []. + +processes() -> + []. + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl index 73c6c1f4d..402474c32 100644 --- a/src/fabric/src/fabric2_sup.erl +++ b/src/fabric/src/fabric2_sup.erl @@ -29,19 +29,24 @@ start_link(Args) -> init([]) -> - Flags = #{ - strategy => one_for_one, - intensity => 1, - period => 5 - }, + Flags = {one_for_one, 1, 5}, Children = [ - #{ - id => fabric2_server, - start => {fabric2_server, start_link, []} + { + fabric2_server, + {fabric2_server, start_link, []}, + permanent, + 5000, + worker, + [fabric2_server] }, - #{ - id => fabric2_txids, - start => {fabric2_txids, start_link, []} + { + fabric2_txids, + {fabric2_txids, start_link, []}, + permanent, + 5000, + worker, + [fabric2_server] } ], - {ok, {Flags, Children}}. + ChildrenWithEpi = couch_epi:register_service(fabric2_epi, Children), + {ok, {Flags, ChildrenWithEpi}}. -- cgit v1.2.1 From bf216d36b069a9bb2c2e6bc1e50deb7c8565e3bd Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 22 Aug 2019 15:53:15 -0400 Subject: Fix DB reopen behavior Previously if a VDU was updated in the `before_doc_update/3` handlers, the Db handle wasn't refreshed soon enough such that the `prepare_and_validate/3` would not see the newly update VDU and would not run it. To fix make a stale Db throws an exception which bubbles up all the way to `fabric2_fdb:transactional/2` where the transaction is retried again with a reopened Db. During `reopen/1` make sure to properly transfer the `user_ctx` property to the new handle, and also make sure `user_ctx` is removed from `db_options` to avoid caching it in `fabric2_server`. --- src/fabric/src/fabric2_fdb.erl | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index c58b5f674..df3709673 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -78,10 +78,22 @@ transactional(DbName, Options, Fun) when is_binary(DbName) -> transactional(#{tx := undefined} = Db, Fun) -> - #{layer_prefix := LayerPrefix} = Db, - do_transaction(fun(Tx) -> - Fun(Db#{tx => Tx}) - end, LayerPrefix); + try + Reopen = maps:get(reopen, Db, false), + Db1 = maps:remove(reopen, Db), + LayerPrefix = case Reopen of + true -> undefined; + false -> maps:get(layer_prefix, Db1) + end, + do_transaction(fun(Tx) -> + case Reopen of + true -> Fun(reopen(Db1#{tx => Tx})); + false -> Fun(Db1#{tx => Tx}) + end + end, LayerPrefix) + catch throw:{?MODULE, reopen} -> + transactional(Db#{reopen => true}, Fun) + end; transactional(#{tx := {erlfdb_transaction, _}} = Db, Fun) -> Fun(Db). @@ -150,6 +162,7 @@ create(#{} = Db0, Options) -> end, Defaults), UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), + Options1 = lists:keydelete(user_ctx, 1, Options), Db#{ uuid => UUID, @@ -165,7 +178,7 @@ create(#{} = Db0, Options) -> after_doc_read => undefined, % All other db things as we add features, - db_options => Options + db_options => Options1 }. @@ -186,6 +199,7 @@ open(#{} = Db0, Options) -> DbVersion = erlfdb:wait(erlfdb:get(Tx, DbVersionKey)), UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), + Options1 = lists:keydelete(user_ctx, 1, Options), Db2 = Db1#{ db_prefix => DbPrefix, @@ -201,7 +215,7 @@ open(#{} = Db0, Options) -> before_doc_update => undefined, after_doc_read => undefined, - db_options => Options + db_options => Options1 }, Db3 = lists:foldl(fun({Key, Val}, DbAcc) -> @@ -223,9 +237,11 @@ reopen(#{} = OldDb) -> #{ tx := Tx, name := DbName, - db_options := Options + db_options := Options, + user_ctx := UserCtx } = OldDb, - open(init_db(Tx, DbName, Options), Options). + Options1 = lists:keystore(user_ctx, 1, Options, {user_ctx, UserCtx}), + open(init_db(Tx, DbName, Options1), Options1). delete(#{} = Db) -> @@ -1132,7 +1148,7 @@ ensure_current(#{} = Db, CheckDbVersion) -> case erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) of MetaDataVersion -> Db; - _NewVersion -> reopen(Db) + _NewVersion -> throw({?MODULE, reopen}) end, AlreadyChecked = get(?PDICT_CHECKED_DB_IS_CURRENT), @@ -1150,7 +1166,7 @@ ensure_current(#{} = Db, CheckDbVersion) -> Db; _NewDBVersion -> fabric2_server:remove(maps:get(name, Db)), - reopen(Db) + throw({?MODULE, reopen}) end end. -- cgit v1.2.1 From 6a0e07a79e4bb84f7f6217b6eb3edc3526b5ca49 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 22 Aug 2019 16:15:04 -0400 Subject: Open dbs with admin privileges in couch_views_indexer That is needed when indexing authentication dbs like `_users`. --- src/couch_views/src/couch_views_indexer.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index bebbd1a7a..60c819486 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -42,7 +42,7 @@ init() -> <<"sig">> := JobSig } = Data, - {ok, Db} = fabric2_db:open(DbName, []), + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), {ok, DDoc} = fabric2_db:open_doc(Db, DDocId), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), HexSig = fabric2_util:to_hex(Mrst#mrst.sig), -- cgit v1.2.1 From 6f5b432e634d8719ed042895b99b4bc753261a07 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 22 Aug 2019 16:44:09 -0400 Subject: Implement update_seq for couch_views --- src/couch_views/src/couch_views_reader.erl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl index c7989d89c..27671fb9c 100644 --- a/src/couch_views/src/couch_views_reader.erl +++ b/src/couch_views/src/couch_views_reader.erl @@ -35,9 +35,7 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) -> try fabric2_fdb:transactional(Db, fun(TxDb) -> - TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId), - - Meta = {meta, [{total, TotalRows}, {offset, null}]}, + Meta = get_meta(TxDb, Mrst, ViewId, Args), UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)), Acc0 = #{ @@ -73,6 +71,16 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) -> end. +get_meta(TxDb, Mrst, ViewId, #mrargs{update_seq = true}) -> + TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId), + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + {meta, [{update_seq, ViewSeq}, {total, TotalRows}, {offset, null}]}; + +get_meta(TxDb, Mrst, ViewId, #mrargs{}) -> + TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId), + {meta, [{total, TotalRows}, {offset, null}]}. + + handle_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 -> Acc#{skip := Skip - 1}; -- cgit v1.2.1 From 3310a81e3c5d0ceb89798faac72931cac02f23c0 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 22 Aug 2019 16:58:38 -0400 Subject: Move fabric2_dir_prefix_tests to test directory --- src/fabric/src/fabric2_dir_prefix_tests.erl | 69 ---------------------------- src/fabric/test/fabric2_dir_prefix_tests.erl | 69 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 69 deletions(-) delete mode 100644 src/fabric/src/fabric2_dir_prefix_tests.erl create mode 100644 src/fabric/test/fabric2_dir_prefix_tests.erl diff --git a/src/fabric/src/fabric2_dir_prefix_tests.erl b/src/fabric/src/fabric2_dir_prefix_tests.erl deleted file mode 100644 index c7bc8bba4..000000000 --- a/src/fabric/src/fabric2_dir_prefix_tests.erl +++ /dev/null @@ -1,69 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(fabric2_dir_prefix_tests). - - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("eunit/include/eunit.hrl"). - - --define(TDEF(A), {atom_to_list(A), fun A/0}). - - -dir_prefix_test_() -> - { - "Test couchdb fdb directory prefix", - foreach, - fun() -> - % erlfdb, rexi and mem3 are all dependent apps for fabric. We make - % sure to start them so when fabric is started during the test it - % already has its dependencies - test_util:start_couch([erlfdb, rexi, mem3]) - end, - fun(Ctx) -> - config:delete("fabric", "fdb_directory"), - ok = application:stop(fabric), - test_util:stop_couch(Ctx) - end, - [ - ?TDEF(default_prefix), - ?TDEF(custom_prefix) - ] - }. - - -default_prefix() -> - ok = application:start(fabric), - - ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), - - % Try again to test pdict caching code - ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), - - % Check that we can create dbs - DbName = ?tempdb(), - ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). - - -custom_prefix() -> - ok = config:set("fabric", "fdb_directory", "couchdb_foo"), - ok = application:start(fabric), - - ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), - - % Try again to test pdict caching code - ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), - - % Check that we can create dbs - DbName = ?tempdb(), - ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). diff --git a/src/fabric/test/fabric2_dir_prefix_tests.erl b/src/fabric/test/fabric2_dir_prefix_tests.erl new file mode 100644 index 000000000..c7bc8bba4 --- /dev/null +++ b/src/fabric/test/fabric2_dir_prefix_tests.erl @@ -0,0 +1,69 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_dir_prefix_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +dir_prefix_test_() -> + { + "Test couchdb fdb directory prefix", + foreach, + fun() -> + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3]) + end, + fun(Ctx) -> + config:delete("fabric", "fdb_directory"), + ok = application:stop(fabric), + test_util:stop_couch(Ctx) + end, + [ + ?TDEF(default_prefix), + ?TDEF(custom_prefix) + ] + }. + + +default_prefix() -> + ok = application:start(fabric), + + ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), + + % Try again to test pdict caching code + ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), + + % Check that we can create dbs + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). + + +custom_prefix() -> + ok = config:set("fabric", "fdb_directory", "couchdb_foo"), + ok = application:start(fabric), + + ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), + + % Try again to test pdict caching code + ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), + + % Check that we can create dbs + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). -- cgit v1.2.1 From fd5b1c8ee7e4874a9949ac1ac00034b5b28ae783 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 23 Aug 2019 17:14:00 -0400 Subject: Fix couch_views encoding test Start stop config and couch_drv in `setup/0` and `teardown/1` instead of the test function itself. --- src/couch_views/test/couch_views_encoding_test.erl | 125 ++++++++++++--------- 1 file changed, 74 insertions(+), 51 deletions(-) diff --git a/src/couch_views/test/couch_views_encoding_test.erl b/src/couch_views/test/couch_views_encoding_test.erl index 7c26583d2..d15f616cb 100644 --- a/src/couch_views/test/couch_views_encoding_test.erl +++ b/src/couch_views/test/couch_views_encoding_test.erl @@ -28,64 +28,87 @@ val_encoding_test() -> end, Values). -correct_ordering_test() -> +setup() -> % Load the ICU driver for couch_util:get_sort_key/1 {ok, CfgPid} = gen_server:start_link(config, [], []), {ok, DrvPid} = gen_server:start_link(couch_drv, [], []), + {CfgPid, DrvPid}. - Ordered = [ - % Special values sort before all other types - null, - false, - true, - % Then numbers - 1, - 2, - 3.0, - 4, +teardown({CfgPid, DrvPid}) -> + unlink(CfgPid), + unlink(DrvPid), + exit(CfgPid, kill), + exit(DrvPid, kill). - % Then text, case sensitive - <<"a">>, - <<"A">>, - <<"aa">>, - <<"b">>, - <<"B">>, - <<"ba">>, - <<"bb">>, - - % Then arrays, compared element by element until different. - % Longer arrays sort after their prefixes - [<<"a">>], - [<<"b">>], - [<<"b">>, <<"c">>], - [<<"b">>, <<"c">>, <<"a">>], - [<<"b">>, <<"d">>], - [<<"b">>, <<"d">>, <<"e">>], - - % Then objects, compared each key value in the list until different. - % Larger objects sort after their subset objects - {[{<<"a">>, 1}]}, - {[{<<"a">>, 2}]}, - {[{<<"b">>, 1}]}, - {[{<<"b">>, 2}]}, - - % Member order does matter for collation - {[{<<"b">>, 2}, {<<"a">>, 1}]}, - {[{<<"b">>, 2}, {<<"c">>, 2}]} - ], - Encoded = lists:map(fun(Elem) -> - K = couch_views_encoding:encode(Elem, key), - V = couch_views_encoding:encode(Elem, value), - {K, V} - end, Ordered), - Shuffled = shuffle(Encoded), - Reordered = lists:sort(Shuffled), - - lists:foreach(fun({Original, {_K, ViewEncoded}}) -> - ?assertEqual(Original, couch_views_encoding:decode(ViewEncoded)) - end, lists:zip(Ordered, Reordered)). +correct_ordering_test_() -> + { + setup, + fun setup/0, + fun teardown/1, + [ + fun t_correct_ordering/0 + ] + }. + + +t_correct_ordering() -> + ?_test(begin + Ordered = [ + % Special values sort before all other types + null, + false, + true, + + % Then numbers + 1, + 2, + 3.0, + 4, + + % Then text, case sensitive + <<"a">>, + <<"A">>, + <<"aa">>, + <<"b">>, + <<"B">>, + <<"ba">>, + <<"bb">>, + + % Then arrays, compared element by element until different. + % Longer arrays sort after their prefixes + [<<"a">>], + [<<"b">>], + [<<"b">>, <<"c">>], + [<<"b">>, <<"c">>, <<"a">>], + [<<"b">>, <<"d">>], + [<<"b">>, <<"d">>, <<"e">>], + + % Then objects, compared each key value in the list until different. + % Larger objects sort after their subset objects + {[{<<"a">>, 1}]}, + {[{<<"a">>, 2}]}, + {[{<<"b">>, 1}]}, + {[{<<"b">>, 2}]}, + + % Member order does matter for collation + {[{<<"b">>, 2}, {<<"a">>, 1}]}, + {[{<<"b">>, 2}, {<<"c">>, 2}]} + ], + + Encoded = lists:map(fun(Elem) -> + K = couch_views_encoding:encode(Elem, key), + V = couch_views_encoding:encode(Elem, value), + {K, V} + end, Ordered), + Shuffled = shuffle(Encoded), + Reordered = lists:sort(Shuffled), + + lists:foreach(fun({Original, {_K, ViewEncoded}}) -> + ?assertEqual(Original, couch_views_encoding:decode(ViewEncoded)) + end, lists:zip(Ordered, Reordered)) + end). shuffle(List) when is_list(List) -> -- cgit v1.2.1 From fdc52e660024f9b175cc1a9e69f284a8ed7200dc Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 29 Aug 2019 11:03:26 -0400 Subject: Skip before_doc_update processing for local docs This matches behavior on master: https://github.com/apache/couchdb/blob/master/src/couch/src/couch_db.erl#L1373-L1387 --- src/fabric/src/fabric2_db_plugin.erl | 7 +++++++ src/fabric/test/fabric2_doc_crud_tests.erl | 24 +++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_db_plugin.erl b/src/fabric/src/fabric2_db_plugin.erl index 41f9e9db6..cb824d7ab 100644 --- a/src/fabric/src/fabric2_db_plugin.erl +++ b/src/fabric/src/fabric2_db_plugin.erl @@ -21,6 +21,10 @@ is_valid_purge_client/2 ]). + +-include_lib("couch/include/couch_db.hrl"). + + -define(SERVICE_ID, fabric2_db). @@ -32,6 +36,9 @@ validate_dbname(DbName, Normalized, Default) -> maybe_handle(validate_dbname, [DbName, Normalized], Default). +before_doc_update(_, #doc{id = <>} = Doc, _) -> + Doc; + before_doc_update(Db, Doc0, UpdateType) -> Fun = fabric2_db:get_before_doc_update_fun(Db), case with_pipe(before_doc_update, [Doc0, Db, UpdateType]) of diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index c19c47421..3cb380827 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -60,7 +60,8 @@ doc_crud_test_() -> fun delete_local_doc_basic/1, fun recreate_local_doc/1, fun create_local_doc_bad_rev/1, - fun create_local_doc_random_rev/1 + fun create_local_doc_random_rev/1, + fun before_doc_update_skips_local_docs/1 ]} } }. @@ -762,3 +763,24 @@ create_local_doc_random_rev({Db, _}) -> ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc5)), {ok, Doc6} = fabric2_db:open_doc(Db, LDocId, []), ?assertEqual(Doc5#doc{revs = {0, [<<"2">>]}}, Doc6). + + +before_doc_update_skips_local_docs({Db0, _}) -> + + BduFun = fun(Doc, _, _) -> + Doc#doc{body = {[<<"bdu_was_here">>, true]}} + end, + + Db = Db0#{before_doc_update := BduFun}, + + LDoc1 = #doc{id = <<"_local/ldoc1">>}, + Doc1 = #doc{id = <<"doc1">>}, + + ?assertMatch({ok, {_, _}}, fabric2_db:update_doc(Db, LDoc1)), + ?assertMatch({ok, {_, _}}, fabric2_db:update_doc(Db, Doc1)), + + {ok, LDoc2} = fabric2_db:open_doc(Db, LDoc1#doc.id), + {ok, Doc2} = fabric2_db:open_doc(Db, Doc1#doc.id), + + ?assertEqual({[]}, LDoc2#doc.body), + ?assertEqual({[<<"bdu_was_here">>, true]}, Doc2#doc.body). -- cgit v1.2.1 From 4da359533fc6c15b9ee4b34a6b1230961ccb1188 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 29 Aug 2019 11:09:39 -0400 Subject: Remove debug logging in fabric2_db_plugin --- src/fabric/src/fabric2_db_plugin.erl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/fabric/src/fabric2_db_plugin.erl b/src/fabric/src/fabric2_db_plugin.erl index cb824d7ab..fb83ed407 100644 --- a/src/fabric/src/fabric2_db_plugin.erl +++ b/src/fabric/src/fabric2_db_plugin.erl @@ -66,9 +66,7 @@ validate_docid(Id) -> check_is_admin(Db) -> Handle = couch_epi:get_handle(?SERVICE_ID), %% callbacks return true only if it specifically allow the given Id - R = couch_epi:any(Handle, ?SERVICE_ID, check_is_admin, [Db], []), - %io:format(standard_error, "~n FFFFFFF ~p check_is_admin Db:~p => ~p~n", [?MODULE, fabric2_db:name(Db), R]), - R. + couch_epi:any(Handle, ?SERVICE_ID, check_is_admin, [Db], []). is_valid_purge_client(DbName, Props) -> -- cgit v1.2.1 From 175da5fa253e45854477cee089172522138b5edb Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 29 Aug 2019 16:22:49 -0400 Subject: Fix starkey_docid, endkey_docid and inclusive_end=false for _all_docs --- src/chttpd/src/chttpd_db.erl | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 42e145bd5..bcecf8b1c 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -857,15 +857,7 @@ all_docs_view(Req, Db, Keys, OP) -> }, case Args1#mrargs.keys of undefined -> - Options = [ - {user_ctx, Req#httpd.user_ctx}, - {dir, Args1#mrargs.direction}, - {start_key, Args1#mrargs.start_key}, - {end_key, Args1#mrargs.end_key}, - {limit, Args1#mrargs.limit}, - {skip, Args1#mrargs.skip}, - {update_seq, Args1#mrargs.update_seq} - ], + Options = all_docs_view_opts(Args1, Req), Acc = {iter, Db, Args1, VAcc0}, {ok, {iter, _, _, Resp}} = fabric2_db:fold_docs(Db, fun view_cb/2, Acc, Options), @@ -938,6 +930,29 @@ all_docs_view(Req, Db, Keys, OP) -> end. +all_docs_view_opts(Args, Req) -> + StartKey = case Args#mrargs.start_key of + undefined -> Args#mrargs.start_key_docid; + SKey -> SKey + end, + EndKey = case Args#mrargs.end_key of + undefined -> Args#mrargs.end_key_docid; + EKey -> EKey + end, + EndKeyOpts = case {EndKey, Args#mrargs.inclusive_end} of + {<<_/binary>>, false} -> [{end_key_gt, EndKey}]; + {_, _} -> [{end_key, EndKey}] + end, + [ + {user_ctx, Req#httpd.user_ctx}, + {dir, Args#mrargs.direction}, + {start_key, StartKey}, + {limit, Args#mrargs.limit}, + {skip, Args#mrargs.skip}, + {update_seq, Args#mrargs.update_seq} + ] ++ EndKeyOpts. + + apply_args_to_keylist(Args, Keys0) -> Keys1 = case Args#mrargs.direction of fwd -> Keys0; -- cgit v1.2.1 From 2fd4cbc9ef3aea598158fc3a4249af8ea70a807c Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 30 Aug 2019 14:16:31 -0400 Subject: Implement _all_docs/queries Also add mrargs validation to match what master does and provide some helpful feedback to the users. --- src/chttpd/src/chttpd_db.erl | 193 +++++++++++++++++-------------- src/couch_views/src/couch_views_util.erl | 145 ++++++++++++++++++++++- test/elixir/test/basics_test.exs | 40 +++++++ 3 files changed, 290 insertions(+), 88 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index bcecf8b1c..21c2c2c73 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -824,113 +824,132 @@ db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> db_attachment_req(Req, Db, DocId, FileNameParts). multi_all_docs_view(Req, Db, OP, Queries) -> + UserCtx = Req#httpd.user_ctx, Args0 = couch_mrview_http:parse_params(Req, undefined), Args1 = Args0#mrargs{view_type=map}, ArgQueries = lists:map(fun({Query}) -> QueryArg1 = couch_mrview_http:parse_params(Query, undefined, Args1, [decoded]), - QueryArgs2 = fabric_util:validate_all_docs_args(Db, QueryArg1), + QueryArgs2 = couch_views_util:validate_args(QueryArg1), set_namespace(OP, QueryArgs2) end, Queries), - Options = [{user_ctx, Req#httpd.user_ctx}], - VAcc0 = #vacc{db=Db, req=Req, prepend="\r\n"}, - FirstChunk = "{\"results\":[", - {ok, Resp0} = chttpd:start_delayed_json_response(VAcc0#vacc.req, - 200, [], FirstChunk), - VAcc1 = VAcc0#vacc{resp=Resp0}, - VAcc2 = lists:foldl(fun(Args, Acc0) -> - {ok, Acc1} = fabric2_db:fold_docs(Db, Options, - fun view_cb/2, Acc0, Args), - Acc1 - end, VAcc1, ArgQueries), - {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), + Max = chttpd:chunked_response_buffer_size(), + First = "{\"results\":[", + {ok, Resp0} = chttpd:start_delayed_json_response(Req, 200, [], First), + VAcc0 = #vacc{ + db = Db, + req = Req, + resp = Resp0, + threshold = Max, + prepend = "\r\n" + }, + VAcc1 = lists:foldl(fun + (#mrargs{keys = undefined} = Args, Acc0) -> + send_all_docs(Db, Args, UserCtx, Acc0); + (#mrargs{keys = Keys} = Args, Acc0) when is_list(Keys) -> + send_all_docs_keys(Db, Args, UserCtx, Acc0) + end, VAcc0, ArgQueries), + {ok, Resp1} = chttpd:send_delayed_chunk(VAcc1#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). + all_docs_view(Req, Db, Keys, OP) -> + UserCtx = Req#httpd.user_ctx, Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), - Aparse_body_and_queryrgs1 = set_namespace(OP, Args0), + Args1 = Args0#mrargs{view_type=map}, + Args2 = couch_views_util:validate_args(Args1), + Args3 = set_namespace(OP, Args2), Max = chttpd:chunked_response_buffer_size(), VAcc0 = #vacc{ db = Db, req = Req, threshold = Max }, - case Args1#mrargs.keys of + case Args3#mrargs.keys of undefined -> - Options = all_docs_view_opts(Args1, Req), - Acc = {iter, Db, Args1, VAcc0}, - {ok, {iter, _, _, Resp}} = - fabric2_db:fold_docs(Db, fun view_cb/2, Acc, Options), - {ok, Resp#vacc.resp}; - Keys0 when is_list(Keys0) -> - Keys1 = apply_args_to_keylist(Args1, Keys0), - %% namespace can be _set_ to `undefined`, so we - %% want simulate enum here - NS = case couch_util:get_value(namespace, Args1#mrargs.extra) of - <<"_all_docs">> -> <<"_all_docs">>; - <<"_design">> -> <<"_design">>; - <<"_local">> -> <<"_local">>; - _ -> <<"_all_docs">> - end, - TotalRows = fabric2_db:get_doc_count(Db, NS), - Meta = case Args1#mrargs.update_seq of - true -> - UpdateSeq = fabric2_db:get_update_seq(Db), - [{update_seq, UpdateSeq}]; - false -> - [] - end ++ [{total, TotalRows}, {offset, null}], - {ok, VAcc1} = view_cb({meta, Meta}, VAcc0), - DocOpts = case Args1#mrargs.conflicts of - true -> [conflicts | Args1#mrargs.doc_options]; - _ -> Args1#mrargs.doc_options - end ++ [{user_ctx, Req#httpd.user_ctx}], - IncludeDocs = Args1#mrargs.include_docs, - VAcc2 = lists:foldl(fun(DocId, Acc) -> - OpenOpts = [deleted | DocOpts], - Row0 = case fabric2_db:open_doc(Db, DocId, OpenOpts) of - {not_found, missing} -> - #view_row{key = DocId}; - {ok, #doc{deleted = true, revs = Revs}} -> - {RevPos, [RevId | _]} = Revs, - Value = {[ - {rev, couch_doc:rev_to_str({RevPos, RevId})}, - {deleted, true} - ]}, - DocValue = if not IncludeDocs -> undefined; true -> - null - end, - #view_row{ - key = DocId, - id = DocId, - value = Value, - doc = DocValue - }; - {ok, #doc{revs = Revs} = Doc0} -> - {RevPos, [RevId | _]} = Revs, - Value = {[ - {rev, couch_doc:rev_to_str({RevPos, RevId})} - ]}, - DocValue = if not IncludeDocs -> undefined; true -> - couch_doc:to_json_obj(Doc0, DocOpts) - end, - #view_row{ - key = DocId, - id = DocId, - value = Value, - doc = DocValue - } - end, - Row1 = fabric_view:transform_row(Row0), - {ok, NewAcc} = view_cb(Row1, Acc), - NewAcc - end, VAcc1, Keys1), - {ok, VAcc3} = view_cb(complete, VAcc2), - {ok, VAcc3#vacc.resp} + VAcc1 = send_all_docs(Db, Args3, UserCtx, VAcc0), + {ok, VAcc1#vacc.resp}; + Keys when is_list(Keys) -> + VAcc1 = send_all_docs_keys(Db, Args3, UserCtx, VAcc0), + {ok, VAcc2} = view_cb(complete, VAcc1), + {ok, VAcc2#vacc.resp} end. -all_docs_view_opts(Args, Req) -> +send_all_docs(Db, #mrargs{keys = undefined} = Args, UserCtx, VAcc0) -> + Opts = all_docs_view_opts(Args, UserCtx), + Acc = {iter, Db, Args, VAcc0}, + ViewCb = fun view_cb/2, + {ok, {iter, _, _, VAcc1}} = fabric2_db:fold_docs(Db, ViewCb, Acc, Opts), + VAcc1. + + +send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> + Keys = apply_args_to_keylist(Args, Args#mrargs.keys), + %% namespace can be _set_ to `undefined`, so we + %% want simulate enum here + NS = case couch_util:get_value(namespace, Args#mrargs.extra) of + <<"_all_docs">> -> <<"_all_docs">>; + <<"_design">> -> <<"_design">>; + <<"_local">> -> <<"_local">>; + _ -> <<"_all_docs">> + end, + TotalRows = fabric2_db:get_doc_count(Db, NS), + Meta = case Args#mrargs.update_seq of + true -> + UpdateSeq = fabric2_db:get_update_seq(Db), + [{update_seq, UpdateSeq}]; + false -> + [] + end ++ [{total, TotalRows}, {offset, null}], + {ok, VAcc1} = view_cb({meta, Meta}, VAcc0), + DocOpts = case Args#mrargs.conflicts of + true -> [conflicts | Args#mrargs.doc_options]; + _ -> Args#mrargs.doc_options + end ++ [{user_ctx, UserCtx}], + IncludeDocs = Args#mrargs.include_docs, + VAcc2 = lists:foldl(fun(DocId, Acc) -> + OpenOpts = [deleted | DocOpts], + Row0 = case fabric2_db:open_doc(Db, DocId, OpenOpts) of + {not_found, missing} -> + #view_row{key = DocId}; + {ok, #doc{deleted = true, revs = Revs}} -> + {RevPos, [RevId | _]} = Revs, + Value = {[ + {rev, couch_doc:rev_to_str({RevPos, RevId})}, + {deleted, true} + ]}, + DocValue = if not IncludeDocs -> undefined; true -> + null + end, + #view_row{ + key = DocId, + id = DocId, + value = Value, + doc = DocValue + }; + {ok, #doc{revs = Revs} = Doc0} -> + {RevPos, [RevId | _]} = Revs, + Value = {[ + {rev, couch_doc:rev_to_str({RevPos, RevId})} + ]}, + DocValue = if not IncludeDocs -> undefined; true -> + couch_doc:to_json_obj(Doc0, DocOpts) + end, + #view_row{ + key = DocId, + id = DocId, + value = Value, + doc = DocValue + } + end, + Row1 = fabric_view:transform_row(Row0), + {ok, NewAcc} = view_cb(Row1, Acc), + NewAcc + end, VAcc1, Keys). + + +all_docs_view_opts(Args, UserCtx) -> StartKey = case Args#mrargs.start_key of undefined -> Args#mrargs.start_key_docid; SKey -> SKey @@ -944,7 +963,7 @@ all_docs_view_opts(Args, Req) -> {_, _} -> [{end_key, EndKey}] end, [ - {user_ctx, Req#httpd.user_ctx}, + {user_ctx, UserCtx}, {dir, Args#mrargs.direction}, {start_key, StartKey}, {limit, Args#mrargs.limit}, diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl index cfc89bdaf..24e2f8a2d 100644 --- a/src/couch_views/src/couch_views_util.erl +++ b/src/couch_views/src/couch_views_util.erl @@ -14,7 +14,8 @@ -export([ - ddoc_to_mrst/2 + ddoc_to_mrst/2, + validate_args/1 ]). @@ -76,3 +77,145 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> }, SigInfo = {Views, Language, DesignOpts, couch_index_util:sort_lib(Lib)}, {ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}. + + +% This is mostly a copy of couch_mrview_util:validate_args/1 but it doesn't +% update start / end keys and also throws a not_implemented error for reduce +% +validate_args(#mrargs{} = Args) -> + GroupLevel = determine_group_level(Args), + Reduce = Args#mrargs.reduce, + + case Reduce =/= undefined orelse Args#mrargs.view_type == red of + true -> throw(not_implemented); + false -> ok + end, + + case Reduce == undefined orelse is_boolean(Reduce) of + true -> ok; + _ -> mrverror(<<"Invalid `reduce` value.">>) + end, + + case {Args#mrargs.view_type, Reduce} of + {map, true} -> mrverror(<<"Reduce is invalid for map-only views.">>); + _ -> ok + end, + + case {Args#mrargs.view_type, GroupLevel, Args#mrargs.keys} of + {red, exact, _} -> ok; + {red, _, KeyList} when is_list(KeyList) -> + Msg = <<"Multi-key fetchs for reduce views must use `group=true`">>, + mrverror(Msg); + _ -> ok + end, + + case Args#mrargs.keys of + Keys when is_list(Keys) -> ok; + undefined -> ok; + _ -> mrverror(<<"`keys` must be an array of strings.">>) + end, + + case {Args#mrargs.keys, Args#mrargs.start_key, + Args#mrargs.end_key} of + {undefined, _, _} -> ok; + {[], _, _} -> ok; + {[_|_], undefined, undefined} -> ok; + _ -> mrverror(<<"`keys` is incompatible with `key`" + ", `start_key` and `end_key`">>) + end, + + case Args#mrargs.start_key_docid of + undefined -> ok; + SKDocId0 when is_binary(SKDocId0) -> ok; + _ -> mrverror(<<"`start_key_docid` must be a string.">>) + end, + + case Args#mrargs.end_key_docid of + undefined -> ok; + EKDocId0 when is_binary(EKDocId0) -> ok; + _ -> mrverror(<<"`end_key_docid` must be a string.">>) + end, + + case Args#mrargs.direction of + fwd -> ok; + rev -> ok; + _ -> mrverror(<<"Invalid direction.">>) + end, + + case {Args#mrargs.limit >= 0, Args#mrargs.limit == undefined} of + {true, _} -> ok; + {_, true} -> ok; + _ -> mrverror(<<"`limit` must be a positive integer.">>) + end, + + case Args#mrargs.skip < 0 of + true -> mrverror(<<"`skip` must be >= 0">>); + _ -> ok + end, + + case {Args#mrargs.view_type, GroupLevel} of + {red, exact} -> ok; + {_, 0} -> ok; + {red, Int} when is_integer(Int), Int >= 0 -> ok; + {red, _} -> mrverror(<<"`group_level` must be >= 0">>); + {map, _} -> mrverror(<<"Invalid use of grouping on a map view.">>) + end, + + case Args#mrargs.stable of + true -> ok; + false -> ok; + _ -> mrverror(<<"Invalid value for `stable`.">>) + end, + + case Args#mrargs.update of + true -> ok; + false -> ok; + lazy -> ok; + _ -> mrverror(<<"Invalid value for `update`.">>) + end, + + case is_boolean(Args#mrargs.inclusive_end) of + true -> ok; + _ -> mrverror(<<"Invalid value for `inclusive_end`.">>) + end, + + case {Args#mrargs.view_type, Args#mrargs.include_docs} of + {red, true} -> mrverror(<<"`include_docs` is invalid for reduce">>); + {_, ID} when is_boolean(ID) -> ok; + _ -> mrverror(<<"Invalid value for `include_docs`">>) + end, + + case {Args#mrargs.view_type, Args#mrargs.conflicts} of + {_, undefined} -> ok; + {map, V} when is_boolean(V) -> ok; + {red, undefined} -> ok; + {map, _} -> mrverror(<<"Invalid value for `conflicts`.">>); + {red, _} -> mrverror(<<"`conflicts` is invalid for reduce views.">>) + end, + + case is_boolean(Args#mrargs.sorted) of + true -> ok; + _ -> mrverror(<<"Invalid value for `sorted`.">>) + end, + + Args#mrargs{group_level=GroupLevel}. + + +determine_group_level(#mrargs{group=undefined, group_level=undefined}) -> + 0; + +determine_group_level(#mrargs{group=false, group_level=undefined}) -> + 0; + +determine_group_level(#mrargs{group=false, group_level=Level}) when Level > 0 -> + mrverror(<<"Can't specify group=false and group_level>0 at the same time">>); + +determine_group_level(#mrargs{group=true, group_level=undefined}) -> + exact; + +determine_group_level(#mrargs{group_level=GroupLevel}) -> + GroupLevel. + + +mrverror(Mesg) -> + throw({query_parse_error, Mesg}). diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index c29e48e04..723d65974 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -316,4 +316,44 @@ defmodule BasicsTest do # TODO assert true end + + @tag :with_db + test "_all_docs/queries works", context do + db_name = context[:db_name] + + resp = Couch.post("/#{db_name}/_all_docs/queries", body: %{:queries => []}) + assert resp.status_code == 200 + assert resp.body["results"] == [] + + assert Couch.put("/#{db_name}/doc1", body: %{:a => 1}).body["ok"] + + body = %{ + :queries => [ + %{:limit => 1}, + %{:limit => 0} + ] + } + resp = Couch.post("/#{db_name}/_all_docs/queries", body: body) + assert resp.status_code == 200 + + assert Map.has_key?(resp.body, "results") + results = Enum.sort(resp.body["results"]) + assert length(results) == 2 + [res1, res2] = results + + assert res1 == %{"offset" => :null, "rows" => [], "total_rows" => 1} + + assert res2["offset"] == :null + assert res2["total_rows"] == 1 + rows = res2["rows"] + + assert length(rows) == 1 + [row] = rows + assert row["id"] == "doc1" + assert row["key"] == "doc1" + + val = row["value"] + assert Map.has_key?(val, "rev") + end + end -- cgit v1.2.1 From 3f5ae3a6dde4d66248410495591edf7d38619a78 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 30 Aug 2019 20:17:42 -0400 Subject: Implement _design_docs and _local_docs `_design_docs` reuses `_all_docs` logic and adjusts `start_key` and `end_key` to be within the `_design/` prefix range. Namespace setting was simplified to never have an `undefined` value. This way it doesn't need extra case statements to handle it further down in the FDB code. --- src/chttpd/src/chttpd_db.erl | 38 +++++++------ src/fabric/src/fabric2_db.erl | 89 +++++++++++++++++++++++++++-- src/fabric/src/fabric2_fdb.erl | 6 ++ test/elixir/test/basics_test.exs | 119 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+), 21 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 21c2c2c73..21313c219 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -878,22 +878,21 @@ all_docs_view(Req, Db, Keys, OP) -> send_all_docs(Db, #mrargs{keys = undefined} = Args, UserCtx, VAcc0) -> Opts = all_docs_view_opts(Args, UserCtx), - Acc = {iter, Db, Args, VAcc0}, + NS = couch_util:get_value(namespace, Opts), + FoldFun = case NS of + <<"_all_docs">> -> fold_docs; + <<"_design">> -> fold_design_docs; + <<"_local">> -> fold_local_docs + end, ViewCb = fun view_cb/2, - {ok, {iter, _, _, VAcc1}} = fabric2_db:fold_docs(Db, ViewCb, Acc, Opts), + Acc = {iter, Db, Args, VAcc0}, + {ok, {iter, _, _, VAcc1}} = fabric2_db:FoldFun(Db, ViewCb, Acc, Opts), VAcc1. send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> Keys = apply_args_to_keylist(Args, Args#mrargs.keys), - %% namespace can be _set_ to `undefined`, so we - %% want simulate enum here - NS = case couch_util:get_value(namespace, Args#mrargs.extra) of - <<"_all_docs">> -> <<"_all_docs">>; - <<"_design">> -> <<"_design">>; - <<"_local">> -> <<"_local">>; - _ -> <<"_all_docs">> - end, + NS = couch_util:get_value(namespace, Args#mrargs.extra), TotalRows = fabric2_db:get_doc_count(Db, NS), Meta = case Args#mrargs.update_seq of true -> @@ -908,7 +907,7 @@ send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> _ -> Args#mrargs.doc_options end ++ [{user_ctx, UserCtx}], IncludeDocs = Args#mrargs.include_docs, - VAcc2 = lists:foldl(fun(DocId, Acc) -> + lists:foldl(fun(DocId, Acc) -> OpenOpts = [deleted | DocOpts], Row0 = case fabric2_db:open_doc(Db, DocId, OpenOpts) of {not_found, missing} -> @@ -950,6 +949,7 @@ send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> all_docs_view_opts(Args, UserCtx) -> + NS = couch_util:get_value(namespace, Args#mrargs.extra), StartKey = case Args#mrargs.start_key of undefined -> Args#mrargs.start_key_docid; SKey -> SKey @@ -958,18 +958,23 @@ all_docs_view_opts(Args, UserCtx) -> undefined -> Args#mrargs.end_key_docid; EKey -> EKey end, + StartKeyOpts = case StartKey of + <<_/binary>> -> [{start_key, StartKey}]; + undefined -> [] + end, EndKeyOpts = case {EndKey, Args#mrargs.inclusive_end} of {<<_/binary>>, false} -> [{end_key_gt, EndKey}]; - {_, _} -> [{end_key, EndKey}] + {<<_/binary>>, true} -> [{end_key, EndKey}]; + {undefined, _} -> [] end, [ {user_ctx, UserCtx}, {dir, Args#mrargs.direction}, - {start_key, StartKey}, {limit, Args#mrargs.limit}, {skip, Args#mrargs.skip}, - {update_seq, Args#mrargs.update_seq} - ] ++ EndKeyOpts. + {update_seq, Args#mrargs.update_seq}, + {namespace, NS} + ] ++ StartKeyOpts ++ EndKeyOpts. apply_args_to_keylist(Args, Keys0) -> @@ -2050,8 +2055,7 @@ monitor_attachments(Att) -> demonitor_refs(Refs) when is_list(Refs) -> [demonitor(Ref) || Ref <- Refs]. -set_namespace(<<"_all_docs">>, Args) -> - set_namespace(undefined, Args); + set_namespace(<<"_local_docs">>, Args) -> set_namespace(<<"_local">>, Args); set_namespace(<<"_design_docs">>, Args) -> diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 2afb780fa..b2945b68c 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -102,8 +102,8 @@ fold_docs/3, fold_docs/4, - %% fold_local_docs/4, - %% fold_design_docs/4, + fold_design_docs/4, + fold_local_docs/4, fold_changes/4, fold_changes/5, %% count_changes_since/2, @@ -136,6 +136,8 @@ "(\\.[0-9]{10,})?$" % but allow an optional shard timestamp at the end ). +-define(FIRST_DDOC_KEY, <<"_design/">>). +-define(LAST_DDOC_KEY, <<"_design0">>). -define(RETURN(Term), throw({?MODULE, Term})). @@ -314,6 +316,9 @@ get_doc_count(Db) -> get_doc_count(Db, <<"doc_count">>). +get_doc_count(Db, undefined) -> + get_doc_count(Db, <<"doc_count">>); + get_doc_count(Db, <<"_all_docs">>) -> get_doc_count(Db, <<"doc_count">>); @@ -729,8 +734,8 @@ fold_docs(Db, UserFun, UserAcc0, Options) -> } = TxDb, Prefix = erlfdb_tuple:pack({?DB_ALL_DOCS}, DbPrefix), - DocCount = get_doc_count(TxDb), - + NS = couch_util:get_value(namespace, Options), + DocCount = get_doc_count(TxDb, NS), Meta = case lists:keyfind(update_seq, 1, Options) of {_, true} -> UpdateSeq = fabric2_db:get_update_seq(TxDb), @@ -758,6 +763,42 @@ fold_docs(Db, UserFun, UserAcc0, Options) -> end). +fold_design_docs(Db, UserFun, UserAcc0, Options1) -> + Options2 = set_design_doc_keys(Options1), + fold_docs(Db, UserFun, UserAcc0, Options2). + + +fold_local_docs(Db, UserFun, UserAcc0, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOCS}, DbPrefix), + DocCount = get_doc_count(TxDb, <<"doc_local_count">>), + Meta = [{total, DocCount}, {offset, null}], + + UserAcc1 = maybe_stop(UserFun({meta, Meta}, UserAcc0)), + + UserAcc2 = fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> + {DocId} = erlfdb_tuple:unpack(K, Prefix), + LDoc = fabric2_fdb:get_local_doc(TxDb, DocId, V), + #doc{revs = {Pos, [Rev]}} = LDoc, + maybe_stop(UserFun({row, [ + {id, DocId}, + {key, DocId}, + {value, {[{rev, couch_doc:rev_to_str({Pos, Rev})}]}} + ]}, Acc)) + end, UserAcc1, Options), + + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + fold_changes(Db, SinceSeq, UserFun, UserAcc) -> fold_changes(Db, SinceSeq, UserFun, UserAcc, []). @@ -1615,3 +1656,43 @@ maybe_stop({ok, Acc}) -> Acc; maybe_stop({stop, Acc}) -> throw({stop, Acc}). + + +set_design_doc_keys(Options1) -> + Dir = couch_util:get_value(dir, Options1, fwd), + Options2 = set_design_doc_start_key(Options1, Dir), + set_design_doc_end_key(Options2, Dir). + + +set_design_doc_start_key(Options, fwd) -> + Key1 = couch_util:get_value(start_key, Options, ?FIRST_DDOC_KEY), + Key2 = max(Key1, ?FIRST_DDOC_KEY), + lists:keystore(start_key, 1, Options, {start_key, Key2}); + +set_design_doc_start_key(Options, rev) -> + Key1 = couch_util:get_value(start_key, Options, ?LAST_DDOC_KEY), + Key2 = min(Key1, ?LAST_DDOC_KEY), + lists:keystore(start_key, 1, Options, {start_key, Key2}). + + +set_design_doc_end_key(Options, fwd) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + Key1 = couch_util:get_value(end_key, Options, ?LAST_DDOC_KEY), + Key2 = min(Key1, ?LAST_DDOC_KEY), + lists:keystore(end_key, 1, Options, {end_key, Key2}); + EKeyGT -> + Key2 = min(EKeyGT, ?LAST_DDOC_KEY), + lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2}) + end; + +set_design_doc_end_key(Options, rev) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + Key1 = couch_util:get_value(end_key, Options, ?FIRST_DDOC_KEY), + Key2 = max(Key1, ?FIRST_DDOC_KEY), + lists:keystore(end_key, 1, Options, {end_key, Key2}); + EKeyGT -> + Key2 = max(EKeyGT, ?FIRST_DDOC_KEY), + lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2}) + end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index df3709673..391122ee3 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -43,6 +43,7 @@ get_doc_body_future/3, get_doc_body_wait/4, get_local_doc/2, + get_local_doc/3, write_doc/6, write_local_doc/2, @@ -498,6 +499,11 @@ get_local_doc(#{} = Db0, <> = DocId) -> fdb_to_local_doc(Db, DocId, Val). +get_local_doc(#{} = Db, <> = DocId, Val) + when is_binary(Val) orelse Val =:= not_found -> + fdb_to_local_doc(ensure_current(Db), DocId, Val). + + write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> #{ tx := Tx, diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index 723d65974..1f15eb690 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -356,4 +356,123 @@ defmodule BasicsTest do assert Map.has_key?(val, "rev") end + @tag :with_db + test "_design_docs works", context do + db_name = context[:db_name] + body = %{:a => 1} + + resp = Couch.get("/#{db_name}/_design_docs") + assert resp.status_code == 200 + assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 0} + + assert Couch.put("/#{db_name}/doc1", body: body).body["ok"] + + # Make sure regular documents didn't get picked up + resp = Couch.get("/#{db_name}/_design_docs") + assert resp.status_code == 200 + assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 0} + + # Add _design/doc1 + assert Couch.put("/#{db_name}/_design/doc1", body: body).body["ok"] + resp = Couch.get("/#{db_name}/_design_docs") + assert resp.status_code == 200 + assert resp.body["total_rows"] == 1 + [row] = resp.body["rows"] + + assert row["id"] == "_design/doc1" + assert row["key"] == "_design/doc1" + + val = row["value"] + assert Map.has_key?(val, "rev") + + # Add _design/doc5 + assert Couch.put("/#{db_name}/_design/doc5", body: body).body["ok"] + resp = Couch.get("/#{db_name}/_design_docs") + assert resp.status_code == 200 + [row1, row2] = resp.body["rows"] + assert row1["id"] == "_design/doc1" + assert row2["id"] == "_design/doc5" + + # descending=true + resp = Couch.get("/#{db_name}/_design_docs?descending=true") + assert resp.status_code == 200 + [row1, row2] = resp.body["rows"] + assert row1["id"] == "_design/doc5" + assert row2["id"] == "_design/doc1" + + # start_key=doc2 + resp = Couch.get("/#{db_name}/_design_docs?start_key=\"_design/doc2\"") + assert resp.status_code == 200 + [row] = resp.body["rows"] + assert row["id"] == "_design/doc5" + + # end_key=doc2 + resp = Couch.get("/#{db_name}/_design_docs?end_key=\"_design/doc2\"") + assert resp.status_code == 200 + [row] = resp.body["rows"] + assert row["id"] == "_design/doc1" + + # inclusive_end=false + qstr = "start_key=\"_design/doc2\"&end_key=\"_design/doc5\"&inclusive_end=false" + resp = Couch.get("/#{db_name}/_design_docs?" <> qstr) + assert resp.status_code == 200 + assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 2} + end + + @tag :with_db + test "_local_docs works", context do + db_name = context[:db_name] + body = %{:a => 1} + + resp = Couch.get("/#{db_name}/_local_docs") + assert resp.status_code == 200 + assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 0} + + # Add _local/doc1 + assert Couch.put("/#{db_name}/_local/doc1", body: body).body["ok"] + resp = Couch.get("/#{db_name}/_local_docs") + assert resp.status_code == 200 + assert resp.body["total_rows"] == 1 + [row] = resp.body["rows"] + + assert row["id"] == "_local/doc1" + assert row["key"] == "_local/doc1" + + val = row["value"] + assert Map.has_key?(val, "rev") + + # Add _local/doc5 + assert Couch.put("/#{db_name}/_local/doc5", body: body).body["ok"] + resp = Couch.get("/#{db_name}/_local_docs") + assert resp.status_code == 200 + [row1, row2] = resp.body["rows"] + assert row1["id"] == "_local/doc1" + assert row2["id"] == "_local/doc5" + + # descending=true + resp = Couch.get("/#{db_name}/_local_docs?descending=true") + assert resp.status_code == 200 + [row1, row2] = resp.body["rows"] + assert row1["id"] == "_local/doc5" + assert row2["id"] == "_local/doc1" + + # start_key=doc2 + resp = Couch.get("/#{db_name}/_local_docs?start_key=\"_local/doc2\"") + assert resp.status_code == 200 + [row] = resp.body["rows"] + assert row["id"] == "_local/doc5" + + # end_key=doc2 + resp = Couch.get("/#{db_name}/_local_docs?end_key=\"_local/doc2\"") + assert resp.status_code == 200 + [row] = resp.body["rows"] + assert row["id"] == "_local/doc1" + + # inclusive_end=false + qstr = "start_key=\"_local/doc2\"&end_key=\"_local/doc5\"&inclusive_end=false" + resp = Couch.get("/#{db_name}/_local_docs?" <> qstr) + assert resp.status_code == 200 + assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 2} + end + end -- cgit v1.2.1 From ee1ec0fc7aade40699b45bd4be01c23323295b0d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 5 Sep 2019 14:50:58 -0400 Subject: Add a max db name length config option This is done for compatibility with CouchDB < 4.x where this limit was implicitly enforced by the file system's max filename size. The default value enforces the same limit for FDB in case users decide to replicate back and forth between old and new instances with 'create_target = true' option. --- rel/overlay/etc/default.ini | 7 +++++++ src/fabric/src/fabric2_db.erl | 20 ++++++++++++++++++++ test/elixir/test/basics_test.exs | 7 +++++++ 3 files changed, 34 insertions(+) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 85dc62b30..e6f2f5441 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -36,6 +36,13 @@ default_security = admin_only ; influenced directly with this setting - increase for faster processing at the ; expense of more memory usage. changes_doc_ids_optimization_threshold = 100 +; +; Maximum database name length. The default setting is chosen for CouchDB < 4.x +; compatibility, where it was determined by the maximum file name size. On most +; current file systems that is 255, and with timestamp and ".couch" extension +; subtracted it ends up as 238. +;max_database_name_length = 238 +; ; Maximum document ID length. Can be set to an integer or 'infinity'. ;max_document_id_length = infinity ; diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index b2945b68c..f3036e4c3 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -131,6 +131,10 @@ -include("fabric2.hrl"). +% Default max database name length is based on CouchDb < 4.x compatibility. See +% default.ini entry for additional information. +-define(DEFAULT_MAX_DATABASE_NAME_LENGTH, 238). + -define(DBNAME_REGEX, "^[a-z][a-z0-9\\_\\$()\\+\\-\\/]*" % use the stock CouchDB regex "(\\.[0-9]{10,})?$" % but allow an optional shard timestamp at the end @@ -866,6 +870,22 @@ validate_dbname(DbName) when is_binary(DbName) -> DbName, Normalized, fun validate_dbname_int/2). validate_dbname_int(DbName, Normalized) when is_binary(DbName) -> + case validate_dbname_length(DbName) of + ok -> validate_dbname_pat(DbName, Normalized); + {error, _} = Error -> Error + end. + + +validate_dbname_length(DbName) -> + MaxLength = config:get_integer("couchdb", "max_database_name_length", + ?DEFAULT_MAX_DATABASE_NAME_LENGTH), + case byte_size(DbName) =< MaxLength of + true -> ok; + false -> {error, {database_name_too_long, DbName}} + end. + + +validate_dbname_pat(DbName, Normalized) -> DbNoExt = couch_util:drop_dot_couch_ext(DbName), case re:run(DbNoExt, ?DBNAME_REGEX, [{capture,none}, dollar_endonly]) of match -> diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index 1f15eb690..cd46e6351 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -45,6 +45,13 @@ defmodule BasicsTest do {:ok, _} = delete_db(db_name) end + test "Exceeding configured DB name size limit returns an error" do + db_name = String.duplicate("x", 239) + resp = Couch.put("/#{db_name}") + assert resp.status_code == 400 + assert resp.body["error"] == "database_name_too_long" + end + @tag :with_db test "Created database has appropriate db info name", context do db_name = context[:db_name] -- cgit v1.2.1 From abf97c30888ef5895687d44add0d3dba27098230 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 5 Sep 2019 16:03:17 -0400 Subject: Handle update_seq for _local_docs On master `_local_docs&update_seq=true` returns the update sequence of the database. Even though it might not make much sense since updating local docs doesn't bump the sequence, it's probably a good idea to stay consistent. It's also worth mentioning another inconsistency is when FDB returns a `total_rows` count for `_local_docs` while master returns `null`. I think that's probably acceptable. Master would return the count if had it available easily and having it seems like a useful thing and an improvement. --- src/fabric/src/fabric2_db.erl | 25 ++++++++++++++----------- test/elixir/test/basics_test.exs | 10 ++++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index f3036e4c3..853b5021a 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -738,15 +738,7 @@ fold_docs(Db, UserFun, UserAcc0, Options) -> } = TxDb, Prefix = erlfdb_tuple:pack({?DB_ALL_DOCS}, DbPrefix), - NS = couch_util:get_value(namespace, Options), - DocCount = get_doc_count(TxDb, NS), - Meta = case lists:keyfind(update_seq, 1, Options) of - {_, true} -> - UpdateSeq = fabric2_db:get_update_seq(TxDb), - [{update_seq, UpdateSeq}]; - _ -> - [] - end ++ [{total, DocCount}, {offset, null}], + Meta = get_all_docs_meta(TxDb, Options), UserAcc1 = maybe_stop(UserFun({meta, Meta}, UserAcc0)), @@ -780,8 +772,7 @@ fold_local_docs(Db, UserFun, UserAcc0, Options) -> } = TxDb, Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOCS}, DbPrefix), - DocCount = get_doc_count(TxDb, <<"doc_local_count">>), - Meta = [{total, DocCount}, {offset, null}], + Meta = get_all_docs_meta(TxDb, Options), UserAcc1 = maybe_stop(UserFun({meta, Meta}, UserAcc0)), @@ -959,6 +950,18 @@ new_revid(Db, Doc) -> }. +get_all_docs_meta(TxDb, Options) -> + NS = couch_util:get_value(namespace, Options), + DocCount = get_doc_count(TxDb, NS), + case lists:keyfind(update_seq, 1, Options) of + {_, true} -> + UpdateSeq = fabric2_db:get_update_seq(TxDb), + [{update_seq, UpdateSeq}]; + _ -> + [] + end ++ [{total, DocCount}, {offset, null}]. + + maybe_set_user_ctx(Db, Options) -> case fabric2_util:get_value(user_ctx, Options) of #user_ctx{} = UserCtx -> diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index cd46e6351..3ed21dac8 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -424,6 +424,11 @@ defmodule BasicsTest do resp = Couch.get("/#{db_name}/_design_docs?" <> qstr) assert resp.status_code == 200 assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 2} + + # update_seq=true + resp = Couch.get("/#{db_name}/_design_docs?update_seq=true") + assert resp.status_code == 200 + assert Map.has_key?(resp.body, "update_seq") end @tag :with_db @@ -480,6 +485,11 @@ defmodule BasicsTest do resp = Couch.get("/#{db_name}/_local_docs?" <> qstr) assert resp.status_code == 200 assert resp.body == %{"offset" => :null, "rows" => [], "total_rows" => 2} + + # update_seq=true + resp = Couch.get("/#{db_name}/_local_docs?update_seq=true") + assert resp.status_code == 200 + assert Map.has_key?(resp.body, "update_seq") end end -- cgit v1.2.1 From 15aff7ab36cee3038f93d1cd51f0102356dbd2a0 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 9 Sep 2019 10:37:39 -0400 Subject: Handle _changes since=now param On master this is happening in `fabric_view_changes` but on FDB we don't go through that module anymore, so we do in `chttpd_changes` module instead. --- src/chttpd/src/chttpd_changes.erl | 8 +++----- src/chttpd/src/chttpd_db.erl | 3 +++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index d4318b5ce..3ec57ae6e 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -69,11 +69,9 @@ handle_db_changes(Args0, Req, Db0) -> ) end, Start = fun() -> - StartSeq = case Dir of - rev -> - fabric2_fdb:get_update_seq(Db); - fwd -> - Since + StartSeq = case Dir =:= rev orelse Since =:= now of + true -> fabric2_db:get_update_seq(Db0); + false -> Since end, {Db0, StartSeq} end, diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 21313c219..38de7f8dc 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1971,6 +1971,9 @@ parse_changes_query(Req) -> end. +parse_since_seq(<<"now">>) -> + now; + parse_since_seq(Seq) when is_binary(Seq), size(Seq) > 30 -> throw({bad_request, url_encoded_since_seq}); -- cgit v1.2.1 From 4e4b1a8dc62a2a3599b9674c517f42bcef5f5b74 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 9 Sep 2019 16:01:41 +0200 Subject: Fetch docs in parallel for view indexing --- src/couch_views/src/couch_views_indexer.erl | 58 ++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 60c819486..83d1b6aa2 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -99,7 +99,8 @@ update(#{} = Db, Mrst0, State0) -> last_seq := LastSeq } = State2, - {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc), + DocAcc1 = fetch_docs(TxDb, DocAcc), + {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), write_docs(TxDb, Mrst1, MappedDocs, State2), case Count < Limit of @@ -140,14 +141,12 @@ process_changes(Change, Acc) -> #{ doc_acc := DocAcc, count := Count, - tx_db := TxDb, design_opts := DesignOpts } = Acc, #{ id := Id, - sequence := LastSeq, - deleted := Deleted + sequence := LastSeq } = Change, IncludeDesign = lists:keymember(<<"include_design">>, 1, DesignOpts), @@ -159,16 +158,8 @@ process_changes(Change, Acc) -> last_seq => LastSeq }); _ -> - % Making a note here that we should make fetching all the docs - % a parallel fdb operation - {ok, Doc} = case Deleted of - true -> {ok, []}; - false -> fabric2_db:open_doc(TxDb, Id) - end, - - Change1 = maps:put(doc, Doc, Change), Acc#{ - doc_acc := DocAcc ++ [Change1], + doc_acc := DocAcc ++ [Change], count := Count + 1, last_seq := LastSeq } @@ -215,6 +206,47 @@ write_docs(TxDb, Mrst, Docs, State) -> couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). +fetch_docs(Db, Changes) -> + {Deleted, NotDeleted} = lists:partition(fun(Doc) -> + #{deleted := Deleted} = Doc, + Deleted + end, Changes), + + RevState = lists:foldl(fun(Change, Acc) -> + #{id := Id} = Change, + RevFuture = fabric2_fdb:get_winning_revs_future(Db, Id, 1), + Acc#{ + RevFuture => {Id, Change} + } + end, #{}, NotDeleted), + + RevFutures = maps:keys(RevState), + BodyState = lists:foldl(fun(RevFuture, Acc) -> + {Id, Change} = maps:get(RevFuture, RevState), + Revs = fabric2_fdb:get_winning_revs_wait(Db, RevFuture), + + % I'm assuming that in this changes transaction that the winning + % doc body exists since it is listed in the changes feed as not deleted + #{winner := true} = RevInfo = lists:last(Revs), + BodyFuture = fabric2_fdb:get_doc_body_future(Db, Id, RevInfo), + Acc#{ + BodyFuture => {Id, RevInfo, Change} + } + end, #{}, erlfdb:wait_for_all(RevFutures)), + + BodyFutures = maps:keys(BodyState), + ChangesWithDocs = lists:map(fun (BodyFuture) -> + {Id, RevInfo, Change} = maps:get(BodyFuture, BodyState), + Doc = fabric2_fdb:get_doc_body_wait(Db, Id, RevInfo, BodyFuture), + Change#{doc => Doc} + end, erlfdb:wait_for_all(BodyFutures)), + + % This combines the deleted changes with the changes that contain docs + % Important to note that this is now unsorted. Which is fine for now + % But later could be an issue if we split this across transactions + Deleted ++ ChangesWithDocs. + + start_query_server(#mrst{qserver = nil} = Mrst) -> #mrst{ language = Language, -- cgit v1.2.1 From 3b9a0e344de661928b3afed412218d50100e5060 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 9 Sep 2019 13:40:57 -0400 Subject: Fix _changes heartbeat option `TimeoutFun` was already returning `{ok|stop, UserAcc}` so there was no need to wrap it another `{ok, ...} tuple. Also TimeoutFun was calling user with`{timeout, _ResponseType}` not just timeout, so added a clause to handle that as well. --- src/chttpd/src/chttpd_changes.erl | 8 ++++---- src/chttpd/src/chttpd_db.erl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index 3ec57ae6e..b2084fab6 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -389,10 +389,10 @@ get_changes_timeout(Args, Callback) -> end; true -> {DefaultTimeout, - fun(UserAcc) -> {ok, Callback({timeout, ResponseType}, UserAcc)} end}; + fun(UserAcc) -> Callback({timeout, ResponseType}, UserAcc) end}; _ -> {lists:min([DefaultTimeout, Heartbeat]), - fun(UserAcc) -> {ok, Callback({timeout, ResponseType}, UserAcc)} end} + fun(UserAcc) -> Callback({timeout, ResponseType}, UserAcc) end} end. start_sending_changes(Callback, UserAcc) -> @@ -729,9 +729,9 @@ maybe_heartbeat(Timeout, TimeoutFun, Acc) -> Now = os:timestamp(), case timer:now_diff(Now, Before) div 1000 >= Timeout of true -> - Acc2 = TimeoutFun(Acc), + {StopOrGo, Acc2} = TimeoutFun(Acc), put(last_changes_heartbeat, Now), - Acc2; + {StopOrGo, Acc2}; false -> {ok, Acc} end diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 38de7f8dc..5178d31f3 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -223,7 +223,7 @@ changes_callback(waiting_for_updates, Acc) -> mochi = Resp1, chunks_sent = ChunksSent + 1 }}; -changes_callback(timeout, Acc) -> +changes_callback({timeout, _ResponseType}, Acc) -> #cacc{mochi = Resp, chunks_sent = ChunksSent} = Acc, {ok, Resp1} = chttpd:send_delayed_chunk(Resp, "\n"), {ok, Acc#cacc{mochi = Resp1, chunks_sent = ChunksSent + 1}}; -- cgit v1.2.1 From 95ae5ba098c7be5dab341dcf446187762546960c Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 16 Sep 2019 13:48:31 -0400 Subject: Implement setting and getting _revs_limit --- src/chttpd/src/chttpd_db.erl | 4 ++-- test/elixir/test/basics_test.exs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 5178d31f3..b65f79c19 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -763,11 +763,11 @@ db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>],user_ctx=Ctx}=Req, Db) -> Limit = chttpd:json_body(Req), - ok = fabric:set_revs_limit(Db, Limit, [{user_ctx,Ctx}]), + ok = fabric2_db:set_revs_limit(Db, Limit), send_json(Req, {[{<<"ok">>, true}]}); db_req(#httpd{method='GET',path_parts=[_,<<"_revs_limit">>]}=Req, Db) -> - send_json(Req, fabric:get_revs_limit(Db)); + send_json(Req, fabric2_db:get_revs_limit(Db)); db_req(#httpd{path_parts=[_,<<"_revs_limit">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index 3ed21dac8..35bace2ac 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -492,4 +492,21 @@ defmodule BasicsTest do assert Map.has_key?(resp.body, "update_seq") end + @tag :with_db + test "Check _revs_limit", context do + db_name = context[:db_name] + + resp = Couch.get("/#{db_name}/_revs_limit") + assert resp.status_code == 200 + assert resp.body == 1000 + + body = "999" + resp = Couch.put("/#{db_name}/_revs_limit", body: "999") + assert resp.status_code == 200 + assert resp.body["ok"] == true + + resp = Couch.get("/#{db_name}/_revs_limit") + assert resp.status_code == 200 + assert resp.body == 999 + end end -- cgit v1.2.1 From 9e1317fa1e95324fab7ac0381b6f1e712849de2b Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 16 Sep 2019 17:39:32 -0400 Subject: Make get_security and get_revs_limit calls consistent There are two fixes: 1) In `fabric2_fdb:get_config/1`, Db was matched before and after `ensure_current/1`. Only the db prefix path was used, which doesn't normally change, but it's worth fixing it anyway. 2) We used a cached version of the security document outside the transaction. Now we force it go through a transaction to call `fabric2_fdb:get_config/1` which call `ensure_current/1`. When done, we also update the cached Db handle. Do the same thing for revs_limit even thought it is not as critical as security. --- src/fabric/src/fabric2_db.erl | 14 +++++++--- src/fabric/src/fabric2_fdb.erl | 16 ++++++++++- src/fabric/test/fabric2_db_security_tests.erl | 38 ++++++++++++++------------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 853b5021a..8927ce365 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -346,12 +346,18 @@ get_pid(#{}) -> nil. -get_revs_limit(#{revs_limit := RevsLimit}) -> - RevsLimit. +get_revs_limit(#{} = Db) -> + RevsLimitBin = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_config(TxDb, <<"revs_limit">>) + end), + ?bin2uint(RevsLimitBin). -get_security(#{security_doc := SecurityDoc}) -> - SecurityDoc. +get_security(#{} = Db) -> + SecBin = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_config(TxDb, <<"security_doc">>) + end), + ?JSON_DECODE(SecBin). get_update_seq(#{} = Db) -> diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 391122ee3..ccfeb3c06 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -28,6 +28,7 @@ get_info/1, get_config/1, + get_config/2, set_config/3, get_stat/2, @@ -338,7 +339,7 @@ get_config(#{} = Db) -> #{ tx := Tx, db_prefix := DbPrefix - } = Db = ensure_current(Db), + } = ensure_current(Db), {Start, End} = erlfdb_tuple:range({?DB_CONFIG}, DbPrefix), Future = erlfdb:get_range(Tx, Start, End), @@ -349,6 +350,19 @@ get_config(#{} = Db) -> end, erlfdb:wait(Future)). +get_config(#{} = Db, ConfigKey) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_CONFIG, ConfigKey}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + % config values are expected to be set so we blow if not_found + Val when Val =/= not_found -> Val + end. + + set_config(#{} = Db, ConfigKey, ConfigVal) -> #{ tx := Tx, diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl index 979601167..b4df3b4dd 100644 --- a/src/fabric/test/fabric2_db_security_tests.erl +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -47,6 +47,7 @@ security_test_() -> setup() -> Ctx = test_util:start_couch([fabric]), DbName = ?tempdb(), + PubDbName = ?tempdb(), {ok, Db1} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), SecProps = {[ {<<"admins">>, {[ @@ -60,40 +61,42 @@ setup() -> ]}, ok = fabric2_db:set_security(Db1, SecProps), {ok, Db2} = fabric2_db:open(DbName, []), - {Db2, Ctx}. + {ok, PubDb} = fabric2_db:create(PubDbName, []), + {Db2, PubDb, Ctx}. -cleanup({Db, Ctx}) -> +cleanup({Db, PubDb, Ctx}) -> ok = fabric2_db:delete(fabric2_db:name(Db), []), + ok = fabric2_db:delete(fabric2_db:name(PubDb), []), test_util:stop_couch(Ctx). -is_admin_name({Db, _}) -> +is_admin_name({Db, _, _}) -> UserCtx = #user_ctx{name = <<"admin_name1">>}, ?assertEqual(true, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). -is_not_admin_name({Db, _}) -> +is_not_admin_name({Db, _, _}) -> UserCtx = #user_ctx{name = <<"member1">>}, ?assertEqual(false, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). -is_admin_role({Db, _}) -> +is_admin_role({Db, _, _}) -> UserCtx = #user_ctx{roles = [<<"admin_role1">>]}, ?assertEqual(true, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). -is_not_admin_role({Db, _}) -> +is_not_admin_role({Db, _, _}) -> UserCtx = #user_ctx{roles = [<<"member_role1">>]}, ?assertEqual(false, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). -check_is_admin({Db, _}) -> +check_is_admin({Db, _, _}) -> UserCtx = #user_ctx{name = <<"admin_name1">>}, ?assertEqual(ok, fabric2_db:check_is_admin(Db#{user_ctx := UserCtx})). -check_is_not_admin({Db, _}) -> +check_is_not_admin({Db, _, _}) -> UserCtx = #user_ctx{name = <<"member_name1">>}, ?assertThrow( {unauthorized, <<"You are not a db or server admin.">>}, @@ -105,12 +108,12 @@ check_is_not_admin({Db, _}) -> ). -check_is_member_name({Db, _}) -> +check_is_member_name({Db, _, _}) -> UserCtx = #user_ctx{name = <<"member_name1">>}, ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). -check_is_not_member_name({Db, _}) -> +check_is_not_member_name({Db, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>}, ?assertThrow( {unauthorized, <<"You are not authorized", _/binary>>}, @@ -122,12 +125,12 @@ check_is_not_member_name({Db, _}) -> ). -check_is_member_role({Db, _}) -> +check_is_member_role({Db, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"member_role1">>]}, ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). -check_is_not_member_role({Db, _}) -> +check_is_not_member_role({Db, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, ?assertThrow( {forbidden, <<"You are not allowed to access", _/binary>>}, @@ -135,25 +138,24 @@ check_is_not_member_role({Db, _}) -> ). -check_admin_is_member({Db, _}) -> +check_admin_is_member({Db, _, _}) -> UserCtx = #user_ctx{name = <<"admin_name1">>}, ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). -check_is_member_of_public_db({Db, _}) -> - PublicDb = Db#{security_doc := {[]}}, +check_is_member_of_public_db({_, PubDb, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, ?assertEqual( ok, - fabric2_db:check_is_member(PublicDb#{user_ctx := #user_ctx{}}) + fabric2_db:check_is_member(PubDb#{user_ctx := #user_ctx{}}) ), ?assertEqual( ok, - fabric2_db:check_is_member(PublicDb#{user_ctx := UserCtx}) + fabric2_db:check_is_member(PubDb#{user_ctx := UserCtx}) ). -check_set_user_ctx({Db0, _}) -> +check_set_user_ctx({Db0, _, _}) -> DbName = fabric2_db:name(Db0), UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), -- cgit v1.2.1 From f37b6d5fea2c3372d1b90ddc99800b33769da84d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 19 Sep 2019 16:42:35 -0400 Subject: Check members after db is opened This brings this to parity with master `couch_db:open/2` logic: https://github.com/apache/couchdb/blob/master/src/couch/src/couch_db.erl#L166 There are two separate cases that have to be handled: 1) Db was already opened and cached. In that case, call `check_is_member(Db)` which reads the security from the Db to ensure we don't authorize against a stale security doc. Otherwise, the delay could be as long as the last write that went through that node. A potential future optimization could be to have a timestamp and only get the new security context if last refresh hasn't happened recently. 2) Db was not cached, and was just opened. To avoid running another two read transactions to get the security doc after the main transaction finished, call a version of check_is_member which gets the security doc passed in as an argument. As a bonus, `check_is_members(Db, SecDoc)` version ends up saving one extra security read since we don't read twice in is_member and is_admin calls. `delete/2` was updated to pass ?ADMIN_CTX to `open/2` since it only cares about getting a `database_does_not_exist` error thrown. There is a check for server admin at the HTTP API level that would care of authentication / authorization. --- src/fabric/src/fabric2_db.erl | 29 ++++++++++++++++++--------- src/fabric/test/fabric2_db_security_tests.erl | 26 ++++++++++++++++++++---- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 8927ce365..a316517eb 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -175,14 +175,17 @@ create(DbName, Options) -> open(DbName, Options) -> case fabric2_server:fetch(DbName) of #{} = Db -> - {ok, maybe_set_user_ctx(Db, Options)}; + Db1 = maybe_set_user_ctx(Db, Options), + ok = check_is_member(Db1), + {ok, Db1}; undefined -> Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> fabric2_fdb:open(TxDb, Options) end), % Cache outside the transaction retry loop case Result of - #{} = Db0 -> + #{security_doc := SecDoc} = Db0 -> + ok = check_is_member(Db0, SecDoc), Db1 = maybe_add_sys_db_callbacks(Db0), ok = fabric2_server:store(Db1), {ok, Db1#{tx := undefined}}; @@ -193,8 +196,10 @@ open(DbName, Options) -> delete(DbName, Options) -> - % This will throw if the db does not exist - {ok, Db} = open(DbName, Options), + % Delete doesn't check user_ctx, that's done at the HTTP API level + % here we just care to get the `database_does_not_exist` error thrown + Options1 = lists:keystore(user_ctx, 1, Options, ?ADMIN_CTX), + {ok, Db} = open(DbName, Options1), Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> fabric2_fdb:delete(TxDb) end), @@ -236,11 +241,14 @@ list_dbs(UserFun, UserAcc0, Options) -> is_admin(Db) -> + is_admin(Db, get_security(Db)). + + +is_admin(Db, {SecProps}) when is_list(SecProps) -> case fabric2_db_plugin:check_is_admin(Db) of true -> true; false -> - {SecProps} = get_security(Db), UserCtx = get_user_ctx(Db), {Admins} = get_admins(SecProps), is_authorized(Admins, UserCtx) @@ -259,7 +267,11 @@ check_is_admin(Db) -> check_is_member(Db) -> - case is_member(Db) of + check_is_member(Db, get_security(Db)). + + +check_is_member(Db, SecDoc) -> + case is_member(Db, SecDoc) of true -> ok; false -> @@ -977,9 +989,8 @@ maybe_set_user_ctx(Db, Options) -> end. -is_member(Db) -> - {SecProps} = get_security(Db), - case is_admin(Db) of +is_member(Db, {SecProps}) when is_list(SecProps) -> + case is_admin(Db, {SecProps}) of true -> true; false -> diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl index b4df3b4dd..4a54083ac 100644 --- a/src/fabric/test/fabric2_db_security_tests.erl +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -38,7 +38,10 @@ security_test_() -> fun check_is_not_member_role/1, fun check_admin_is_member/1, fun check_is_member_of_public_db/1, - fun check_set_user_ctx/1 + fun check_set_user_ctx/1, + fun check_open_forbidden/1, + fun check_fail_open_no_opts/1, + fun check_fail_open_name_null/1 ]} } }. @@ -48,7 +51,7 @@ setup() -> Ctx = test_util:start_couch([fabric]), DbName = ?tempdb(), PubDbName = ?tempdb(), - {ok, Db1} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), SecProps = {[ {<<"admins">>, {[ {<<"names">>, [<<"admin_name1">>, <<"admin_name2">>]}, @@ -60,7 +63,7 @@ setup() -> ]}} ]}, ok = fabric2_db:set_security(Db1, SecProps), - {ok, Db2} = fabric2_db:open(DbName, []), + {ok, Db2} = fabric2_db:open(DbName, [?ADMIN_CTX]), {ok, PubDb} = fabric2_db:create(PubDbName, []), {Db2, PubDb, Ctx}. @@ -157,8 +160,23 @@ check_is_member_of_public_db({_, PubDb, _}) -> check_set_user_ctx({Db0, _, _}) -> DbName = fabric2_db:name(Db0), - UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"admin_role1">>]}, {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertEqual(UserCtx, fabric2_db:get_user_ctx(Db1)). +check_open_forbidden({Db0, _, _}) -> + DbName = fabric2_db:name(Db0), + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + ?assertThrow({forbidden, _}, fabric2_db:open(DbName, [{user_ctx, UserCtx}])). + + +check_fail_open_no_opts({Db0, _, _}) -> + DbName = fabric2_db:name(Db0), + ?assertThrow({unauthorized, _}, fabric2_db:open(DbName, [])). + + +check_fail_open_name_null({Db0, _, _}) -> + DbName = fabric2_db:name(Db0), + UserCtx = #user_ctx{name = null}, + ?assertThrow({unauthorized, _}, fabric2_db:open(DbName, [{user_ctx, UserCtx}])). -- cgit v1.2.1 From f920172aac358d82d0a05f795f4a00f8e294a4be Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 20 Sep 2019 16:45:44 -0400 Subject: Add revision stemming for interactive docs --- src/fabric/src/fabric2_db.erl | 29 +++-- src/fabric/test/fabric2_rev_stemming.erl | 204 +++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 9 deletions(-) create mode 100644 src/fabric/test/fabric2_rev_stemming.erl diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index a316517eb..9ef0bd358 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -442,7 +442,7 @@ is_users_db(DbName) when is_binary(DbName) -> set_revs_limit(#{} = Db, RevsLimit) -> check_is_admin(Db), - RevsLimBin = ?uint2bin(RevsLimit), + RevsLimBin = ?uint2bin(max(1, RevsLimit)), Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin) end), @@ -1325,10 +1325,12 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % new revinfo map Doc2 = prep_and_validate(Db, Doc1, Target), + Doc3 = new_revid(Db, Doc2), + #doc{ deleted = NewDeleted, revs = {NewRevPos, [NewRev | NewRevPath]} - } = Doc3 = new_revid(Db, Doc2), + } = Doc4 = stem_revisions(Db, Doc3), NewRevInfo = #{ winner => undefined, @@ -1341,9 +1343,9 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % Gather the list of possible winnig revisions Possible = case Target == Winner of - true when not Doc3#doc.deleted -> + true when not Doc4#doc.deleted -> [NewRevInfo]; - true when Doc3#doc.deleted -> + true when Doc4#doc.deleted -> case SecondPlace of #{} -> [NewRevInfo, SecondPlace]; not_found -> [NewRevInfo] @@ -1368,7 +1370,7 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> ok = fabric2_fdb:write_doc( Db, - Doc3, + Doc4, NewWinner, Winner, ToUpdate, @@ -1403,6 +1405,7 @@ update_doc_replicated(Db, Doc0, _Options) -> end, [], AllRevInfos), DocRevPath = fabric2_util:revinfo_to_path(DocRevInfo0), + {NewTree, Status} = couch_key_tree:merge(RevTree, DocRevPath), if Status /= internal_node -> ok; true -> % We already know this revision so nothing @@ -1416,10 +1419,9 @@ update_doc_replicated(Db, Doc0, _Options) -> % tree and use the combined path after stemming. {[{_, {RevPos, UnstemmedRevs}}], []} = couch_key_tree:get(NewTree, [{RevPos, Rev}]), - RevsLimit = fabric2_db:get_revs_limit(Db), - Doc1 = Doc0#doc{ - revs = {RevPos, lists:sublist(UnstemmedRevs, RevsLimit)} - }, + + Doc1 = stem_revisions(Db, Doc0#doc{revs = {RevPos, UnstemmedRevs}}), + {RevPos, [Rev | NewRevPath]} = Doc1#doc.revs, DocRevInfo1 = DocRevInfo0#{rev_path := NewRevPath}, @@ -1736,3 +1738,12 @@ set_design_doc_end_key(Options, rev) -> Key2 = max(EKeyGT, ?FIRST_DDOC_KEY), lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2}) end. + + +stem_revisions(#{} = Db, #doc{} = Doc) -> + #{revs_limit := RevsLimit} = Db, + #doc{revs = {RevPos, Revs}} = Doc, + case RevPos >= RevsLimit of + true -> Doc#doc{revs = {RevPos, lists:sublist(Revs, RevsLimit)}}; + false -> Doc + end. diff --git a/src/fabric/test/fabric2_rev_stemming.erl b/src/fabric/test/fabric2_rev_stemming.erl new file mode 100644 index 000000000..99e086e2a --- /dev/null +++ b/src/fabric/test/fabric2_rev_stemming.erl @@ -0,0 +1,204 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_rev_stemming). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations with stemming", + { + setup, + fun setup/0, + fun cleanup/1, + {with, [ + fun update_doc/1, + fun update_doc_replicated_no_stemming/1, + fun update_doc_replicated_with_stemming/1, + fun update_doc_replicate_existing_rev/1, + fun update_winning_conflict_branch/1, + fun update_non_winning_conflict_branch/1, + fun delete_doc_basic/1, + fun recreate_doc_basic/1 + ]} + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +update_doc({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + Doc1 = #doc{id = fabric2_util:uuid()}, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{revs = {Pos1, [Rev1]}}, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{revs = {Pos2, [Rev2, Rev1]}}, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)), + + {ok, {_, Rev3}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual({3, [Rev3, Rev2]}, Doc4#doc.revs). + + +update_doc_replicated_no_stemming({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]), + {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id), + ?assertEqual({2, [Rev2, Rev1]}, Revs). + + +update_doc_replicated_with_stemming({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]), + {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id), + ?assertEqual({2, [Rev2]}, Revs). + + +update_doc_replicate_existing_rev({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + {ok, []} = fabric2_db:update_docs(Db, [Doc1], [replicated_changes]), + {ok, Doc} = fabric2_db:open_doc(Db, Doc1#doc.id), + ?assertEqual({2, [Rev2]}, Doc#doc.revs). + + +update_winning_conflict_branch({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev3]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +update_non_winning_conflict_branch({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the non winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev2]}, Doc4#doc.revs). + + +delete_doc_basic({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{revs = {Pos2, [Rev2]}}, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id, [deleted])). + + +recreate_doc_basic({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {0, []}, + deleted = false, + body = {[{<<"state">>, 3}]} + }, + {ok, {3, Rev3}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual({3, [Rev3]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). -- cgit v1.2.1 From e6fb78be6809f8d727b216cb1e0614f4bfa52566 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 24 Sep 2019 02:25:39 -0400 Subject: Fix doc counts for replicated deletions Do not decrement `doc_count` stat if document was previously missing, or if it was already deleted. Deleted documents could be brought in by replication. In that case, if there were more replicated documents than the current `doc_count`, the `doc_count` could even underflow the 64 bit unsigned integer range and end up somewhere in the vicinity of 2^64. The counter, of course, would still be incorrect even if it didn't underflow, the huge value would just make the issue more visible. --- src/fabric/src/fabric2_fdb.erl | 12 ++++++++++-- src/fabric/test/fabric2_doc_count_tests.erl | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index ccfeb3c06..5c58da482 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -556,7 +556,7 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> {not_found, #{deleted := false}} -> created; {not_found, #{deleted := true}} -> - deleted; + replicate_deleted; {#{deleted := true}, #{deleted := false}} -> recreated; {#{deleted := false}, #{deleted := false}} -> @@ -564,10 +564,14 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> {#{deleted := false}, #{deleted := true}} -> deleted; {#{deleted := true}, #{deleted := true}} -> - deleted + ignore end, case UpdateStatus of + replicate_deleted -> + ok; + ignore -> + ok; deleted -> ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), ok = erlfdb:clear(Tx, ADKey); @@ -614,6 +618,10 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> end, incr_stat(Db, <<"doc_count">>, 1), incr_stat(Db, <<"doc_del_count">>, -1); + replicate_deleted -> + incr_stat(Db, <<"doc_del_count">>, 1); + ignore -> + ok; deleted -> if not IsDDoc -> ok; true -> incr_stat(Db, <<"doc_design_count">>, -1) diff --git a/src/fabric/test/fabric2_doc_count_tests.erl b/src/fabric/test/fabric2_doc_count_tests.erl index 37d08404d..743ae7665 100644 --- a/src/fabric/test/fabric2_doc_count_tests.erl +++ b/src/fabric/test/fabric2_doc_count_tests.erl @@ -30,6 +30,7 @@ doc_count_test_() -> fun cleanup/1, {with, [ fun normal_docs/1, + fun replicated_docs/1, fun design_docs/1, fun local_docs/1 ]} @@ -109,6 +110,31 @@ normal_docs({Db, _}) -> ). +replicated_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Opts = [replicated_changes], + {R1, R2, R3} = {<<"r1">>, <<"r2">>, <<"r3">>}, + + % First case is a simple replicated update + Doc1 = #doc{id = <<"rd1">>, revs = {1, [R1]}}, + {ok, {1, R1}} = fabric2_db:update_doc(Db, Doc1, Opts), + check_doc_counts(Db, DocCount + 1, DelDocCount, DDocCount, LDocCount), + + % Here a deleted document is replicated into the db. Doc count should not + % change, only deleted doc count. + Doc2 = #doc{id = <<"rd2">>, revs = {1, [R2]}, deleted = true}, + {ok, {1, R2}} = fabric2_db:update_doc(Db, Doc2, Opts), + check_doc_counts(Db, DocCount + 1, DelDocCount + 1, DDocCount, LDocCount), + + % Here we extended the deleted document's rev path but keep it deleted. + % Deleted doc count doesn't bumped since the document was already counted + % as deleted + Doc3 = #doc{id = <<"rd2">>, revs = {2, [R3, R2]}, deleted = true}, + {ok, {2, R3}} = fabric2_db:update_doc(Db, Doc3, Opts), + check_doc_counts(Db, DocCount + 1, DelDocCount + 1 , DDocCount, LDocCount). + + design_docs({Db, _}) -> {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), -- cgit v1.2.1 From a71fc301366ac03ff1f41cbdf123b32efff1d452 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 24 Sep 2019 13:07:12 -0400 Subject: Add more deleted docs replication cases to integration test We test that a newly deleted document is replicated to the target and it bumps the deled doc count but doesn't change doc count. Another thing to test is that an already deleted document is replicated in where its revision path was extended on the source then gets replicated to the target. In that case neither del doc count not doc count are bumped. --- test/elixir/test/replication_test.exs | 120 +++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 25 deletions(-) diff --git a/test/elixir/test/replication_test.exs b/test/elixir/test/replication_test.exs index e513ddd16..78f36602d 100644 --- a/test/elixir/test/replication_test.exs +++ b/test/elixir/test/replication_test.exs @@ -428,6 +428,76 @@ defmodule ReplicationTest do assert change["id"] == del_doc["_id"] assert change["deleted"] + # Test new deletion is replicated, document wasn't on the target yet + [del_doc] = save_docs(src_db_name, [%{"_id" => "new_del_doc_1"}]) + + del_doc = Map.put(del_doc, "_deleted", true) + [del_doc] = save_docs(src_db_name, [del_doc]) + + result = replicate(src_prefix <> src_db_name, tgt_prefix <> tgt_db_name) + assert result["ok"] + + retry_until(fn -> + src_info = get_db_info(src_db_name) + tgt_info = get_db_info(tgt_db_name) + + assert tgt_info["doc_count"] == src_info["doc_count"] + assert tgt_info["doc_del_count"] == src_info["doc_del_count"] + assert tgt_info["doc_del_count"] == 2 + end) + + assert is_list(result["history"]) + assert length(result["history"]) == 4 + history = Enum.at(result["history"], 0) + assert history["missing_checked"] == 29 + assert history["missing_found"] == 29 + assert history["docs_read"] == 29 + assert history["docs_written"] == 29 + assert history["doc_write_failures"] == 0 + + resp = Couch.get("/#{tgt_db_name}/#{del_doc["_id"]}") + assert resp.status_code == 404 + + resp = Couch.get!("/#{tgt_db_name}/_changes") + [change] = Enum.filter(resp.body["results"], &(&1["id"] == del_doc["_id"])) + assert change["id"] == del_doc["_id"] + assert change["deleted"] + + # Test an already deleted deletion being replicated + [del_doc] = save_docs(src_db_name, [%{"_id" => "new_del_doc_1"}]) + del_doc = Map.put(del_doc, "_deleted", true) + [del_doc] = save_docs(src_db_name, [del_doc]) + + result = replicate(src_prefix <> src_db_name, tgt_prefix <> tgt_db_name) + assert result["ok"] + + retry_until(fn -> + src_info = get_db_info(src_db_name) + tgt_info = get_db_info(tgt_db_name) + + assert tgt_info["doc_count"] == src_info["doc_count"] + assert tgt_info["doc_del_count"] == src_info["doc_del_count"] + assert tgt_info["doc_del_count"] == 2 + end) + + assert is_list(result["history"]) + assert length(result["history"]) == 5 + history = Enum.at(result["history"], 0) + assert history["missing_checked"] == 30 + assert history["missing_found"] == 30 + assert history["docs_read"] == 30 + assert history["docs_written"] == 30 + assert history["doc_write_failures"] == 0 + + resp = Couch.get("/#{tgt_db_name}/#{del_doc["_id"]}") + assert resp.status_code == 404 + + resp = Couch.get!("/#{tgt_db_name}/_changes") + [change] = Enum.filter(resp.body["results"], &(&1["id"] == del_doc["_id"])) + assert change["id"] == del_doc["_id"] + assert change["deleted"] + + # Test replicating a conflict doc = Couch.get!("/#{src_db_name}/2").body [doc] = save_docs(src_db_name, [Map.put(doc, :value, "white")]) @@ -444,12 +514,12 @@ defmodule ReplicationTest do assert tgt_info["doc_count"] == src_info["doc_count"] assert is_list(result["history"]) - assert length(result["history"]) == 4 + assert length(result["history"]) == 6 history = Enum.at(result["history"], 0) - assert history["missing_checked"] == 29 - assert history["missing_found"] == 29 - assert history["docs_read"] == 29 - assert history["docs_written"] == 29 + assert history["missing_checked"] == 31 + assert history["missing_found"] == 31 + assert history["docs_read"] == 31 + assert history["docs_written"] == 31 assert history["doc_write_failures"] == 0 copy = Couch.get!("/#{tgt_db_name}/2", query: %{:conflicts => true}).body @@ -471,12 +541,12 @@ defmodule ReplicationTest do assert tgt_info["doc_count"] == src_info["doc_count"] assert is_list(result["history"]) - assert length(result["history"]) == 5 + assert length(result["history"]) == 7 history = Enum.at(result["history"], 0) - assert history["missing_checked"] == 30 - assert history["missing_found"] == 30 - assert history["docs_read"] == 30 - assert history["docs_written"] == 30 + assert history["missing_checked"] == 32 + assert history["missing_found"] == 32 + assert history["docs_read"] == 32 + assert history["docs_written"] == 32 assert history["doc_write_failures"] == 0 copy = Couch.get!("/#{tgt_db_name}/2", query: %{:conflicts => true}).body @@ -500,12 +570,12 @@ defmodule ReplicationTest do assert tgt_info["doc_count"] == src_info["doc_count"] assert is_list(result["history"]) - assert length(result["history"]) == 6 + assert length(result["history"]) == 8 history = Enum.at(result["history"], 0) - assert history["missing_checked"] == 31 - assert history["missing_found"] == 31 - assert history["docs_read"] == 31 - assert history["docs_written"] == 31 + assert history["missing_checked"] == 33 + assert history["missing_found"] == 33 + assert history["docs_read"] == 33 + assert history["docs_written"] == 33 assert history["doc_write_failures"] == 0 copy = Couch.get!("/#{tgt_db_name}/2", query: %{:conflicts => true}).body @@ -532,12 +602,12 @@ defmodule ReplicationTest do assert tgt_info["doc_count"] == src_info["doc_count"] assert is_list(result["history"]) - assert length(result["history"]) == 7 + assert length(result["history"]) == 9 history = Enum.at(result["history"], 0) - assert history["missing_checked"] == 34 - assert history["missing_found"] == 32 - assert history["docs_read"] == 32 - assert history["docs_written"] == 32 + assert history["missing_checked"] == 36 + assert history["missing_found"] == 34 + assert history["docs_read"] == 34 + assert history["docs_written"] == 34 assert history["doc_write_failures"] == 0 docs = [ @@ -557,12 +627,12 @@ defmodule ReplicationTest do assert tgt_info["doc_count"] == src_info["doc_count"] assert is_list(result["history"]) - assert length(result["history"]) == 8 + assert length(result["history"]) == 10 history = Enum.at(result["history"], 0) - assert history["missing_checked"] == 36 - assert history["missing_found"] == 32 - assert history["docs_read"] == 32 - assert history["docs_written"] == 32 + assert history["missing_checked"] == 38 + assert history["missing_found"] == 34 + assert history["docs_read"] == 34 + assert history["docs_written"] == 34 assert history["doc_write_failures"] == 0 # Test nothing to replicate -- cgit v1.2.1 From 3ccec8270bdabf96b0da50ff42494f0d107e32af Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 14 Aug 2019 17:18:56 +0200 Subject: Add couch_eval abstraction layer --- rebar.config.script | 1 + rel/reltool.config | 2 + src/couch_eval/README.md | 5 ++ src/couch_eval/rebar.config | 14 +++++ src/couch_eval/src/couch_eval.app.src | 23 +++++++++ src/couch_eval/src/couch_eval.erl | 97 +++++++++++++++++++++++++++++++++++ 6 files changed, 142 insertions(+) create mode 100644 src/couch_eval/README.md create mode 100644 src/couch_eval/rebar.config create mode 100644 src/couch_eval/src/couch_eval.app.src create mode 100644 src/couch_eval/src/couch_eval.erl diff --git a/rebar.config.script b/rebar.config.script index 16ec16c8b..16dc44f26 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -117,6 +117,7 @@ SubDirs = [ "src/couch_log", "src/chttpd", "src/couch", + "src/couch_eval", "src/couch_event", "src/mem3", "src/couch_index", diff --git a/rel/reltool.config b/rel/reltool.config index a96be105e..6e0780095 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -40,6 +40,7 @@ couch_plugins, couch_replicator, couch_stats, + couch_eval, couch_event, couch_peruser, couch_views, @@ -94,6 +95,7 @@ {app, config, [{incl_cond, include}]}, {app, couch, [{incl_cond, include}]}, {app, couch_epi, [{incl_cond, include}]}, + {app, couch_eval, [{incl_cond, include}]}, {app, couch_jobs, [{incl_cond, include}]}, {app, couch_index, [{incl_cond, include}]}, {app, couch_log, [{incl_cond, include}]}, diff --git a/src/couch_eval/README.md b/src/couch_eval/README.md new file mode 100644 index 000000000..048a165fb --- /dev/null +++ b/src/couch_eval/README.md @@ -0,0 +1,5 @@ +couch_eval +===== + +An an initial abstraction layer for evaluating user provided code. So far +this is only used by `couch_views` to provide map function support. Currently this is implemented in `couch_js` by reusing the existing `couchjs` mechanics. diff --git a/src/couch_eval/rebar.config b/src/couch_eval/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_eval/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_eval/src/couch_eval.app.src b/src/couch_eval/src/couch_eval.app.src new file mode 100644 index 000000000..87193d806 --- /dev/null +++ b/src/couch_eval/src/couch_eval.app.src @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_eval, [ + {description, "An OTP application"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + couch_log, + config + ]} + ]}. diff --git a/src/couch_eval/src/couch_eval.erl b/src/couch_eval/src/couch_eval.erl new file mode 100644 index 000000000..23ca263ab --- /dev/null +++ b/src/couch_eval/src/couch_eval.erl @@ -0,0 +1,97 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_eval). + + +-export([ + acquire_map_context/6, + release_map_context/1, + map_docs/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-type db_name() :: binary(). +-type doc_id() :: binary(). +-type ddoc_id() :: binary(). +-type language() :: binary(). +-type sig() :: binary(). +-type lib() :: any(). +-type map_fun() :: binary(). +-type map_funs() :: [map_fun()]. +-type result() :: {doc_id(), [[{any(), any()}]]}. +-type api_mod() :: atom(). +-type context() :: {api_mod(), any()}. + +-type context_opts() :: #{ + db_name := db_name(), + ddoc_id => ddoc_id(), + language => language(), + sig => sig(), + lib => lib(), + map_funs => map_funs(), + api_mod => api_mod() +}. + + +-callback acquire_map_context(context_opts()) -> {ok, any()} | {error, any()}. +-callback release_map_context(context()) -> ok | {error, any()}. +-callback map_docs(context(), [doc()]) -> {ok, [result()]} | {error, any()}. + + +-spec acquire_map_context( + db_name(), + ddoc_id(), + language(), + sig(), + lib(), + map_funs() + ) -> {ok, context()} | {error, any()}. +acquire_map_context(DbName, DDocId, Language, Sig, Lib, MapFuns) -> + ApiMod = get_api_mod(Language), + CtxOpts = #{ + db_name => DbName, + ddoc_id => DDocId, + language => Language, + sig => Sig, + lib => Lib, + map_funs => MapFuns + }, + {ok, Ctx} = ApiMod:acquire_map_context(CtxOpts), + {ok, {ApiMod, Ctx}}. + + +-spec release_map_context(context()) -> ok | {error, any()}. +release_map_context({ApiMod, Ctx}) -> + ApiMod:release_map_context(Ctx). + + +-spec map_docs(context(), [doc()]) -> {ok, result()} | {error, any()}. +map_docs({ApiMod, Ctx}, Docs) -> + ApiMod:map_docs(Ctx, Docs). + + +get_api_mod(Language) when is_binary(Language) -> + try + LangStr = binary_to_list(Language), + ModStr = config:get("couch_eval.languages", LangStr), + if ModStr /= undefined -> ok; true -> + erlang:error({unknown_eval_api_language, Language}) + end, + list_to_existing_atom(ModStr) + catch error:badarg -> + erlang:error({invalid_eval_api_mod, Language}) + end. -- cgit v1.2.1 From 36945e7bd577a772a77308a5b33d5e90701aa853 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 20 Aug 2019 12:43:14 -0500 Subject: Initial creation of couch_js application This commit is mostly a copy paste of the existing modules in the `couch` application. For now I've left the build of the `couchjs` executable in `couch/priv` to avoid having to do the work of moving the build config over. I had contemplated just referencing the modules as they current exist but decided this would prepare us a bit better for when we eventually remove the old modules. --- rebar.config.script | 1 + rel/reltool.config | 2 + src/couch_js/README.md | 6 + src/couch_js/src/couch_js.app.src | 27 ++ src/couch_js/src/couch_js_app.erl | 31 ++ src/couch_js/src/couch_js_io_logger.erl | 107 +++++ src/couch_js/src/couch_js_native_process.erl | 452 ++++++++++++++++++ src/couch_js/src/couch_js_os_process.erl | 265 +++++++++++ src/couch_js/src/couch_js_proc_manager.erl | 602 +++++++++++++++++++++++ src/couch_js/src/couch_js_query_servers.erl | 683 +++++++++++++++++++++++++++ src/couch_js/src/couch_js_sup.erl | 45 ++ 11 files changed, 2221 insertions(+) create mode 100644 src/couch_js/README.md create mode 100644 src/couch_js/src/couch_js.app.src create mode 100644 src/couch_js/src/couch_js_app.erl create mode 100644 src/couch_js/src/couch_js_io_logger.erl create mode 100644 src/couch_js/src/couch_js_native_process.erl create mode 100644 src/couch_js/src/couch_js_os_process.erl create mode 100644 src/couch_js/src/couch_js_proc_manager.erl create mode 100644 src/couch_js/src/couch_js_query_servers.erl create mode 100644 src/couch_js/src/couch_js_sup.erl diff --git a/rebar.config.script b/rebar.config.script index 16dc44f26..05000b51f 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -122,6 +122,7 @@ SubDirs = [ "src/mem3", "src/couch_index", "src/couch_mrview", + "src/couch_js", "src/couch_replicator", "src/couch_plugins", "src/couch_pse_tests", diff --git a/rel/reltool.config b/rel/reltool.config index 6e0780095..9fbf28544 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -41,6 +41,7 @@ couch_replicator, couch_stats, couch_eval, + couch_js, couch_event, couch_peruser, couch_views, @@ -96,6 +97,7 @@ {app, couch, [{incl_cond, include}]}, {app, couch_epi, [{incl_cond, include}]}, {app, couch_eval, [{incl_cond, include}]}, + {app, couch_js, [{incl_cond, include}]}, {app, couch_jobs, [{incl_cond, include}]}, {app, couch_index, [{incl_cond, include}]}, {app, couch_log, [{incl_cond, include}]}, diff --git a/src/couch_js/README.md b/src/couch_js/README.md new file mode 100644 index 000000000..4084b7d8e --- /dev/null +++ b/src/couch_js/README.md @@ -0,0 +1,6 @@ +couch_js +=== + +This application is just an isolation of most of the code required for running couchjs. + +For the time being I'm not moving the implementation of couchjs due to the specifics of the build system configuration. Once we go to remove the `couch` application we'll have to revisit that approach. \ No newline at end of file diff --git a/src/couch_js/src/couch_js.app.src b/src/couch_js/src/couch_js.app.src new file mode 100644 index 000000000..0db37b68c --- /dev/null +++ b/src/couch_js/src/couch_js.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_js, [ + {description, "An OTP application"}, + {vsn, git}, + {registered, [ + couch_js_proc_manager + ]}, + {mod, {couch_js_app, []}}, + {applications, [ + kernel, + stdlib, + config, + couch_log, + couch + ]} + ]}. diff --git a/src/couch_js/src/couch_js_app.erl b/src/couch_js/src/couch_js_app.erl new file mode 100644 index 000000000..b28f5852e --- /dev/null +++ b/src/couch_js/src/couch_js_app.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_js_app). + + +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_StartType, _StartArgs) -> + couch_js_sup:start_link(). + + +stop(_State) -> + ok. \ No newline at end of file diff --git a/src/couch_js/src/couch_js_io_logger.erl b/src/couch_js/src/couch_js_io_logger.erl new file mode 100644 index 000000000..5a1695c01 --- /dev/null +++ b/src/couch_js/src/couch_js_io_logger.erl @@ -0,0 +1,107 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_io_logger). + +-export([ + start/1, + log_output/1, + log_input/1, + stop_noerror/0, + stop_error/1 +]). + + +start(undefined) -> + ok; +start(Dir) -> + case filelib:is_dir(Dir) of + true -> + Name = log_name(), + Path = Dir ++ "/" ++ Name, + OPath = Path ++ ".out.log_", + IPath = Path ++ ".in.log_", + {ok, OFd} = file:open(OPath, [read, write, raw]), + {ok, IFd} = file:open(IPath, [read, write, raw]), + ok = file:delete(OPath), + ok = file:delete(IPath), + put(logger_path, Path), + put(logger_out_fd, OFd), + put(logger_in_fd, IFd), + ok; + false -> + ok + end. + + +stop_noerror() -> + case get(logger_path) of + undefined -> + ok; + _Path -> + close_logs() + end. + + +stop_error(Err) -> + case get(logger_path) of + undefined -> + ok; + Path -> + save_error_logs(Path, Err), + close_logs() + end. + + +log_output(Data) -> + log(get(logger_out_fd), Data). + + +log_input(Data) -> + log(get(logger_in_fd), Data). + + +unix_time() -> + {Mega, Sec, USec} = os:timestamp(), + UnixTs = (Mega * 1000000 + Sec) * 1000000 + USec, + integer_to_list(UnixTs). + + +log_name() -> + Ts = unix_time(), + Pid0 = erlang:pid_to_list(self()), + Pid1 = string:strip(Pid0, left, $<), + Pid2 = string:strip(Pid1, right, $>), + lists:flatten(io_lib:format("~s_~s", [Ts, Pid2])). + + +close_logs() -> + file:close(get(logger_out_fd)), + file:close(get(logger_in_fd)). + + +save_error_logs(Path, Err) -> + Otp = erlang:system_info(otp_release), + Msg = io_lib:format("Error: ~p~nNode: ~p~nOTP: ~p~n", [Err, node(), Otp]), + file:write_file(Path ++ ".meta", Msg), + IFd = get(logger_out_fd), + OFd = get(logger_in_fd), + file:position(IFd, 0), + file:position(OFd, 0), + file:copy(IFd, Path ++ ".out.log"), + file:copy(OFd, Path ++ ".in.log"). + + +log(undefined, _Data) -> + ok; +log(Fd, Data) -> + ok = file:write(Fd, [Data, io_lib:nl()]). diff --git a/src/couch_js/src/couch_js_native_process.erl b/src/couch_js/src/couch_js_native_process.erl new file mode 100644 index 000000000..d2c4c1ee0 --- /dev/null +++ b/src/couch_js/src/couch_js_native_process.erl @@ -0,0 +1,452 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% +% You may obtain a copy of the License at +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +% either express or implied. +% +% See the License for the specific language governing permissions +% and limitations under the License. +% +% This file drew much inspiration from erlview, which was written by and +% copyright Michael McDaniel [http://autosys.us], and is also under APL 2.0 +% +% +% This module provides the smallest possible native view-server. +% With this module in-place, you can add the following to your couch INI files: +% [native_query_servers] +% erlang={couch_native_process, start_link, []} +% +% Which will then allow following example map function to be used: +% +% fun({Doc}) -> +% % Below, we emit a single record - the _id as key, null as value +% DocId = couch_util:get_value(<<"_id">>, Doc, null), +% Emit(DocId, null) +% end. +% +% which should be roughly the same as the javascript: +% emit(doc._id, null); +% +% This module exposes enough functions such that a native erlang server can +% act as a fully-fleged view server, but no 'helper' functions specifically +% for simplifying your erlang view code. It is expected other third-party +% extensions will evolve which offer useful layers on top of this view server +% to help simplify your view code. +-module(couch_js_native_process). +-behaviour(gen_server). +-vsn(1). + +-export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, + handle_info/2]). +-export([set_timeout/2, prompt/2]). + +-define(STATE, native_proc_state). +-record(evstate, { + ddocs, + funs = [], + query_config = [], + list_pid = nil, + timeout = 5000, + idle = 5000 +}). + +-include_lib("couch/include/couch_db.hrl"). + +start_link() -> + gen_server:start_link(?MODULE, [], []). + +% this is a bit messy, see also couch_query_servers handle_info +% stop(_Pid) -> +% ok. + +set_timeout(Pid, TimeOut) -> + gen_server:call(Pid, {set_timeout, TimeOut}). + +prompt(Pid, Data) when is_list(Data) -> + gen_server:call(Pid, {prompt, Data}). + +% gen_server callbacks +init([]) -> + V = config:get("query_server_config", "os_process_idle_limit", "300"), + Idle = list_to_integer(V) * 1000, + {ok, #evstate{ddocs=dict:new(), idle=Idle}, Idle}. + +handle_call({set_timeout, TimeOut}, _From, State) -> + {reply, ok, State#evstate{timeout=TimeOut}, State#evstate.idle}; + +handle_call({prompt, Data}, _From, State) -> + couch_log:debug("Prompt native qs: ~s",[?JSON_ENCODE(Data)]), + {NewState, Resp} = try run(State, to_binary(Data)) of + {S, R} -> {S, R} + catch + throw:{error, Why} -> + {State, [<<"error">>, Why, Why]} + end, + + Idle = State#evstate.idle, + case Resp of + {error, Reason} -> + Msg = io_lib:format("couch native server error: ~p", [Reason]), + Error = [<<"error">>, <<"native_query_server">>, list_to_binary(Msg)], + {reply, Error, NewState, Idle}; + [<<"error">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {reply, [<<"error">> | Rest], NewState, Idle}; + [<<"fatal">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {stop, fatal, [<<"error">> | Rest], NewState}; + Resp -> + {reply, Resp, NewState, Idle} + end. + +handle_cast(garbage_collect, State) -> + erlang:garbage_collect(), + {noreply, State, State#evstate.idle}; +handle_cast(stop, State) -> + {stop, normal, State}; +handle_cast(_Msg, State) -> + {noreply, State, State#evstate.idle}. + +handle_info(timeout, State) -> + gen_server:cast(couch_js_proc_manager, {os_proc_idle, self()}), + erlang:garbage_collect(), + {noreply, State, State#evstate.idle}; +handle_info({'EXIT',_,normal}, State) -> + {noreply, State, State#evstate.idle}; +handle_info({'EXIT',_,Reason}, State) -> + {stop, Reason, State}. +terminate(_Reason, _State) -> ok. +code_change(_OldVersion, State, _Extra) -> {ok, State}. + +run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> + Pid ! {self(), list_row, Row}, + receive + {Pid, chunks, Data} -> + {State, [<<"chunks">>, Data]}; + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, [<<"end">>, Data]} + after State#evstate.timeout -> + throw({timeout, list_row}) + end; +run(#evstate{list_pid=Pid}=State, [<<"list_end">>]) when is_pid(Pid) -> + Pid ! {self(), list_end}, + Resp = + receive + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + [<<"end">>, Data] + after State#evstate.timeout -> + throw({timeout, list_end}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, Resp}; +run(#evstate{list_pid=Pid}=State, _Command) when is_pid(Pid) -> + {State, [<<"error">>, list_error, list_error]}; +run(#evstate{ddocs=DDocs}, [<<"reset">>]) -> + {#evstate{ddocs=DDocs}, true}; +run(#evstate{ddocs=DDocs, idle=Idle}, [<<"reset">>, QueryConfig]) -> + NewState = #evstate{ + ddocs = DDocs, + query_config = QueryConfig, + idle = Idle + }, + {NewState, true}; +run(#evstate{funs=Funs}=State, [<<"add_fun">> , BinFunc]) -> + FunInfo = makefun(State, BinFunc), + {State#evstate{funs=Funs ++ [FunInfo]}, true}; +run(State, [<<"map_doc">> , Doc]) -> + Resp = lists:map(fun({Sig, Fun}) -> + erlang:put(Sig, []), + Fun(Doc), + lists:reverse(erlang:get(Sig)) + end, State#evstate.funs), + {State, Resp}; +run(State, [<<"reduce">>, Funs, KVs]) -> + {Keys, Vals} = + lists:foldl(fun([K, V], {KAcc, VAcc}) -> + {[K | KAcc], [V | VAcc]} + end, {[], []}, KVs), + Keys2 = lists:reverse(Keys), + Vals2 = lists:reverse(Vals), + {State, catch reduce(State, Funs, Keys2, Vals2, false)}; +run(State, [<<"rereduce">>, Funs, Vals]) -> + {State, catch reduce(State, Funs, null, Vals, true)}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, <<"new">>, DDocId, DDoc]) -> + DDocs2 = store_ddoc(DDocs, DDocId, DDoc), + {State#evstate{ddocs=DDocs2}, true}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, DDocId | Rest]) -> + DDoc = load_ddoc(DDocs, DDocId), + ddoc(State, DDoc, Rest); +run(_, Unknown) -> + couch_log:error("Native Process: Unknown command: ~p~n", [Unknown]), + throw({error, unknown_command}). + +ddoc(State, {DDoc}, [FunPath, Args]) -> + % load fun from the FunPath + BFun = lists:foldl(fun + (Key, {Props}) when is_list(Props) -> + couch_util:get_value(Key, Props, nil); + (_Key, Fun) when is_binary(Fun) -> + Fun; + (_Key, nil) -> + throw({error, not_found}); + (_Key, _Fun) -> + throw({error, malformed_ddoc}) + end, {DDoc}, FunPath), + ddoc(State, makefun(State, BFun, {DDoc}), FunPath, Args). + +ddoc(State, {_, Fun}, [<<"validate_doc_update">>], Args) -> + {State, (catch apply(Fun, Args))}; +ddoc(State, {_, Fun}, [<<"rewrites">>], Args) -> + {State, (catch apply(Fun, Args))}; +ddoc(State, {_, Fun}, [<<"filters">>|_], [Docs, Req]) -> + FilterFunWrapper = fun(Doc) -> + case catch Fun(Doc, Req) of + true -> true; + false -> false; + {'EXIT', Error} -> couch_log:error("~p", [Error]) + end + end, + Resp = lists:map(FilterFunWrapper, Docs), + {State, [true, Resp]}; +ddoc(State, {_, Fun}, [<<"views">>|_], [Docs]) -> + MapFunWrapper = fun(Doc) -> + case catch Fun(Doc) of + undefined -> true; + ok -> false; + false -> false; + [_|_] -> true; + {'EXIT', Error} -> couch_log:error("~p", [Error]) + end + end, + Resp = lists:map(MapFunWrapper, Docs), + {State, [true, Resp]}; +ddoc(State, {_, Fun}, [<<"shows">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + FunResp when is_list(FunResp) -> + FunResp; + {FunResp} -> + [<<"resp">>, {FunResp}]; + FunResp -> + FunResp + end, + {State, Resp}; +ddoc(State, {_, Fun}, [<<"updates">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + [JsonDoc, JsonResp] -> + [<<"up">>, JsonDoc, JsonResp] + end, + {State, Resp}; +ddoc(State, {Sig, Fun}, [<<"lists">>|_], Args) -> + Self = self(), + SpawnFun = fun() -> + LastChunk = (catch apply(Fun, Args)), + case start_list_resp(Self, Sig) of + started -> + receive + {Self, list_row, _Row} -> ignore; + {Self, list_end} -> ignore + after State#evstate.timeout -> + throw({timeout, list_cleanup_pid}) + end; + _ -> + ok + end, + LastChunks = + case erlang:get(Sig) of + undefined -> [LastChunk]; + OtherChunks -> [LastChunk | OtherChunks] + end, + Self ! {self(), list_end, lists:reverse(LastChunks)} + end, + erlang:put(do_trap, process_flag(trap_exit, true)), + Pid = spawn_link(SpawnFun), + Resp = + receive + {Pid, start, Chunks, JsonResp} -> + [<<"start">>, Chunks, JsonResp] + after State#evstate.timeout -> + throw({timeout, list_start}) + end, + {State#evstate{list_pid=Pid}, Resp}. + +store_ddoc(DDocs, DDocId, DDoc) -> + dict:store(DDocId, DDoc, DDocs). +load_ddoc(DDocs, DDocId) -> + try dict:fetch(DDocId, DDocs) of + {DDoc} -> {DDoc} + catch + _:_Else -> throw({error, ?l2b(io_lib:format("Native Query Server missing DDoc with Id: ~s",[DDocId]))}) + end. + +bindings(State, Sig) -> + bindings(State, Sig, nil). +bindings(State, Sig, DDoc) -> + Self = self(), + + Log = fun(Msg) -> + couch_log:info(Msg, []) + end, + + Emit = fun(Id, Value) -> + Curr = erlang:get(Sig), + erlang:put(Sig, [[Id, Value] | Curr]) + end, + + Start = fun(Headers) -> + erlang:put(list_headers, Headers) + end, + + Send = fun(Chunk) -> + Curr = + case erlang:get(Sig) of + undefined -> []; + Else -> Else + end, + erlang:put(Sig, [Chunk | Curr]) + end, + + GetRow = fun() -> + case start_list_resp(Self, Sig) of + started -> + ok; + _ -> + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), chunks, lists:reverse(Chunks)} + end, + erlang:put(Sig, []), + receive + {Self, list_row, Row} -> Row; + {Self, list_end} -> nil + after State#evstate.timeout -> + throw({timeout, list_pid_getrow}) + end + end, + + FoldRows = fun(Fun, Acc) -> foldrows(GetRow, Fun, Acc) end, + + Bindings = [ + {'Log', Log}, + {'Emit', Emit}, + {'Start', Start}, + {'Send', Send}, + {'GetRow', GetRow}, + {'FoldRows', FoldRows} + ], + case DDoc of + {_Props} -> + Bindings ++ [{'DDoc', DDoc}]; + _Else -> Bindings + end. + +% thanks to erlview, via: +% http://erlang.org/pipermail/erlang-questions/2003-November/010544.html +makefun(State, Source) -> + Sig = couch_hash:md5_hash(Source), + BindFuns = bindings(State, Sig), + {Sig, makefun(State, Source, BindFuns)}. +makefun(State, Source, {DDoc}) -> + Sig = couch_hash:md5_hash(lists:flatten([Source, term_to_binary(DDoc)])), + BindFuns = bindings(State, Sig, {DDoc}), + {Sig, makefun(State, Source, BindFuns)}; +makefun(_State, Source, BindFuns) when is_list(BindFuns) -> + FunStr = binary_to_list(Source), + {ok, Tokens, _} = erl_scan:string(FunStr), + Form = case (catch erl_parse:parse_exprs(Tokens)) of + {ok, [ParsedForm]} -> + ParsedForm; + {error, {LineNum, _Mod, [Mesg, Params]}}=Error -> + couch_log:error("Syntax error on line: ~p~n~s~p~n", + [LineNum, Mesg, Params]), + throw(Error) + end, + Bindings = lists:foldl(fun({Name, Fun}, Acc) -> + erl_eval:add_binding(Name, Fun, Acc) + end, erl_eval:new_bindings(), BindFuns), + {value, Fun, _} = erl_eval:expr(Form, Bindings), + Fun. + +reduce(State, BinFuns, Keys, Vals, ReReduce) -> + Funs = case is_list(BinFuns) of + true -> + lists:map(fun(BF) -> makefun(State, BF) end, BinFuns); + _ -> + [makefun(State, BinFuns)] + end, + Reds = lists:map(fun({_Sig, Fun}) -> + Fun(Keys, Vals, ReReduce) + end, Funs), + [true, Reds]. + +foldrows(GetRow, ProcRow, Acc) -> + case GetRow() of + nil -> + {ok, Acc}; + Row -> + case (catch ProcRow(Row, Acc)) of + {ok, Acc2} -> + foldrows(GetRow, ProcRow, Acc2); + {stop, Acc2} -> + {ok, Acc2} + end + end. + +start_list_resp(Self, Sig) -> + case erlang:get(list_started) of + undefined -> + Headers = + case erlang:get(list_headers) of + undefined -> {[{<<"headers">>, {[]}}]}; + CurrHdrs -> CurrHdrs + end, + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), start, lists:reverse(Chunks), Headers}, + erlang:put(list_started, true), + erlang:put(Sig, []), + started; + _ -> + ok + end. + +to_binary({Data}) -> + Pred = fun({Key, Value}) -> + {to_binary(Key), to_binary(Value)} + end, + {lists:map(Pred, Data)}; +to_binary(Data) when is_list(Data) -> + [to_binary(D) || D <- Data]; +to_binary(null) -> + null; +to_binary(true) -> + true; +to_binary(false) -> + false; +to_binary(Data) when is_atom(Data) -> + list_to_binary(atom_to_list(Data)); +to_binary(Data) -> + Data. diff --git a/src/couch_js/src/couch_js_os_process.erl b/src/couch_js/src/couch_js_os_process.erl new file mode 100644 index 000000000..a453d1ab2 --- /dev/null +++ b/src/couch_js/src/couch_js_os_process.erl @@ -0,0 +1,265 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_os_process). +-behaviour(gen_server). +-vsn(1). + +-export([start_link/1, start_link/2, start_link/3, stop/1]). +-export([set_timeout/2, prompt/2, killer/1]). +-export([send/2, writeline/2, readline/1, writejson/2, readjson/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(PORT_OPTIONS, [stream, {line, 4096}, binary, exit_status, hide]). + +-record(os_proc, + {command, + port, + writer, + reader, + timeout=5000, + idle + }). + +start_link(Command) -> + start_link(Command, []). +start_link(Command, Options) -> + start_link(Command, Options, ?PORT_OPTIONS). +start_link(Command, Options, PortOptions) -> + gen_server:start_link(?MODULE, [Command, Options, PortOptions], []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +% Read/Write API +set_timeout(Pid, TimeOut) when is_integer(TimeOut) -> + ok = gen_server:call(Pid, {set_timeout, TimeOut}, infinity). + +% Used by couch_event_os_process.erl +send(Pid, Data) -> + gen_server:cast(Pid, {send, Data}). + +prompt(Pid, Data) -> + case ioq:call(Pid, {prompt, Data}, erlang:get(io_priority)) of + {ok, Result} -> + Result; + Error -> + couch_log:error("OS Process Error ~p :: ~p",[Pid,Error]), + throw(Error) + end. + +% Utility functions for reading and writing +% in custom functions +writeline(OsProc, Data) when is_record(OsProc, os_proc) -> + Res = port_command(OsProc#os_proc.port, [Data, $\n]), + couch_js_io_logger:log_output(Data), + Res. + +readline(#os_proc{} = OsProc) -> + Res = readline(OsProc, []), + couch_js_io_logger:log_input(Res), + Res. +readline(#os_proc{port = Port} = OsProc, Acc) -> + receive + {Port, {data, {noeol, Data}}} when is_binary(Acc) -> + readline(OsProc, <>); + {Port, {data, {noeol, Data}}} when is_binary(Data) -> + readline(OsProc, Data); + {Port, {data, {noeol, Data}}} -> + readline(OsProc, [Data|Acc]); + {Port, {data, {eol, <>}}} when is_binary(Acc) -> + [<>]; + {Port, {data, {eol, Data}}} when is_binary(Data) -> + [Data]; + {Port, {data, {eol, Data}}} -> + lists:reverse(Acc, Data); + {Port, Err} -> + catch port_close(Port), + throw({os_process_error, Err}) + after OsProc#os_proc.timeout -> + catch port_close(Port), + throw({os_process_error, "OS process timed out."}) + end. + +% Standard JSON functions +writejson(OsProc, Data) when is_record(OsProc, os_proc) -> + JsonData = ?JSON_ENCODE(Data), + couch_log:debug("OS Process ~p Input :: ~s", + [OsProc#os_proc.port, JsonData]), + true = writeline(OsProc, JsonData). + +readjson(OsProc) when is_record(OsProc, os_proc) -> + Line = iolist_to_binary(readline(OsProc)), + couch_log:debug("OS Process ~p Output :: ~s", [OsProc#os_proc.port, Line]), + try + % Don't actually parse the whole JSON. Just try to see if it's + % a command or a doc map/reduce/filter/show/list/update output. + % If it's a command then parse the whole JSON and execute the + % command, otherwise return the raw JSON line to the caller. + pick_command(Line) + catch + throw:abort -> + {json, Line}; + throw:{cmd, _Cmd} -> + case ?JSON_DECODE(Line) of + [<<"log">>, Msg] when is_binary(Msg) -> + % we got a message to log. Log it and continue + couch_log:info("OS Process ~p Log :: ~s", + [OsProc#os_proc.port, Msg]), + readjson(OsProc); + [<<"error">>, Id, Reason] -> + throw({error, {couch_util:to_existing_atom(Id),Reason}}); + [<<"fatal">>, Id, Reason] -> + couch_log:info("OS Process ~p Fatal Error :: ~s ~p", + [OsProc#os_proc.port, Id, Reason]), + throw({couch_util:to_existing_atom(Id),Reason}); + _Result -> + {json, Line} + end + end. + +pick_command(Line) -> + json_stream_parse:events(Line, fun pick_command0/1). + +pick_command0(array_start) -> + fun pick_command1/1; +pick_command0(_) -> + throw(abort). + +pick_command1(<<"log">> = Cmd) -> + throw({cmd, Cmd}); +pick_command1(<<"error">> = Cmd) -> + throw({cmd, Cmd}); +pick_command1(<<"fatal">> = Cmd) -> + throw({cmd, Cmd}); +pick_command1(_) -> + throw(abort). + + +% gen_server API +init([Command, Options, PortOptions]) -> + couch_js_io_logger:start(os:getenv("COUCHDB_IO_LOG_DIR")), + PrivDir = couch_util:priv_dir(), + Spawnkiller = "\"" ++ filename:join(PrivDir, "couchspawnkillable") ++ "\"", + V = config:get("query_server_config", "os_process_idle_limit", "300"), + IdleLimit = list_to_integer(V) * 1000, + BaseProc = #os_proc{ + command=Command, + port=open_port({spawn, Spawnkiller ++ " " ++ Command}, PortOptions), + writer=fun ?MODULE:writejson/2, + reader=fun ?MODULE:readjson/1, + idle=IdleLimit + }, + KillCmd = iolist_to_binary(readline(BaseProc)), + Pid = self(), + couch_log:debug("OS Process Start :: ~p", [BaseProc#os_proc.port]), + spawn(fun() -> + % this ensure the real os process is killed when this process dies. + erlang:monitor(process, Pid), + killer(?b2l(KillCmd)) + end), + OsProc = + lists:foldl(fun(Opt, Proc) -> + case Opt of + {writer, Writer} when is_function(Writer) -> + Proc#os_proc{writer=Writer}; + {reader, Reader} when is_function(Reader) -> + Proc#os_proc{reader=Reader}; + {timeout, TimeOut} when is_integer(TimeOut) -> + Proc#os_proc{timeout=TimeOut} + end + end, BaseProc, Options), + {ok, OsProc, IdleLimit}. + +terminate(Reason, #os_proc{port=Port}) -> + catch port_close(Port), + case Reason of + normal -> + couch_js_io_logger:stop_noerror(); + Error -> + couch_js_io_logger:stop_error(Error) + end, + ok. + +handle_call({set_timeout, TimeOut}, _From, #os_proc{idle=Idle}=OsProc) -> + {reply, ok, OsProc#os_proc{timeout=TimeOut}, Idle}; +handle_call({prompt, Data}, _From, #os_proc{idle=Idle}=OsProc) -> + #os_proc{writer=Writer, reader=Reader} = OsProc, + try + Writer(OsProc, Data), + {reply, {ok, Reader(OsProc)}, OsProc, Idle} + catch + throw:{error, OsError} -> + {reply, OsError, OsProc, Idle}; + throw:{fatal, OsError} -> + {stop, normal, OsError, OsProc}; + throw:OtherError -> + {stop, normal, OtherError, OsProc} + after + garbage_collect() + end. + +handle_cast({send, Data}, #os_proc{writer=Writer, idle=Idle}=OsProc) -> + try + Writer(OsProc, Data), + {noreply, OsProc, Idle} + catch + throw:OsError -> + couch_log:error("Failed sending data: ~p -> ~p", [Data, OsError]), + {stop, normal, OsProc} + end; +handle_cast(garbage_collect, #os_proc{idle=Idle}=OsProc) -> + erlang:garbage_collect(), + {noreply, OsProc, Idle}; +handle_cast(stop, OsProc) -> + {stop, normal, OsProc}; +handle_cast(Msg, #os_proc{idle=Idle}=OsProc) -> + couch_log:debug("OS Proc: Unknown cast: ~p", [Msg]), + {noreply, OsProc, Idle}. + +handle_info(timeout, #os_proc{idle=Idle}=OsProc) -> + gen_server:cast(couch_js_proc_manager, {os_proc_idle, self()}), + erlang:garbage_collect(), + {noreply, OsProc, Idle}; +handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) -> + couch_log:info("OS Process terminated normally", []), + {stop, normal, OsProc}; +handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) -> + couch_log:error("OS Process died with status: ~p", [Status]), + {stop, {exit_status, Status}, OsProc}; +handle_info(Msg, #os_proc{idle=Idle}=OsProc) -> + couch_log:debug("OS Proc: Unknown info: ~p", [Msg]), + {noreply, OsProc, Idle}. + +code_change(_, {os_proc, Cmd, Port, W, R, Timeout} , _) -> + V = config:get("query_server_config","os_process_idle_limit","300"), + State = #os_proc{ + command = Cmd, + port = Port, + writer = W, + reader = R, + timeout = Timeout, + idle = list_to_integer(V) * 1000 + }, + {ok, State}; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +killer(KillCmd) -> + receive _ -> + os:cmd(KillCmd) + after 1000 -> + ?MODULE:killer(KillCmd) + end. + diff --git a/src/couch_js/src/couch_js_proc_manager.erl b/src/couch_js/src/couch_js_proc_manager.erl new file mode 100644 index 000000000..096469612 --- /dev/null +++ b/src/couch_js/src/couch_js_proc_manager.erl @@ -0,0 +1,602 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_proc_manager). +-behaviour(gen_server). +-behaviour(config_listener). +-vsn(1). + +-export([ + start_link/0, + get_proc_count/0, + get_stale_proc_count/0, + new_proc/1, + reload/0, + terminate_stale_procs/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + +-export([ + handle_config_change/5, + handle_config_terminate/3 +]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(PROCS, couch_js_proc_manager_procs). +-define(WAITERS, couch_js_proc_manager_waiters). +-define(OPENING, couch_js_proc_manager_opening). +-define(SERVERS, couch_js_proc_manager_servers). +-define(RELISTEN_DELAY, 5000). + +-record(state, { + config, + counts, + threshold_ts, + hard_limit, + soft_limit +}). + +-type docid() :: iodata(). +-type revision() :: {integer(), binary()}. + +-record(client, { + timestamp :: os:timestamp() | '_', + from :: undefined | {pid(), reference()} | '_', + lang :: binary() | '_', + ddoc :: #doc{} | '_', + ddoc_key :: undefined | {DDocId :: docid(), Rev :: revision()} | '_' +}). + +-record(proc_int, { + pid, + lang, + client, + ddoc_keys = [], + prompt_fun, + set_timeout_fun, + stop_fun, + t0 = os:timestamp() +}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +get_proc_count() -> + gen_server:call(?MODULE, get_proc_count). + + +get_stale_proc_count() -> + gen_server:call(?MODULE, get_stale_proc_count). + + +reload() -> + gen_server:call(?MODULE, set_threshold_ts). + + +terminate_stale_procs() -> + gen_server:call(?MODULE, terminate_stale_procs). + + +init([]) -> + process_flag(trap_exit, true), + ok = config:listen_for_changes(?MODULE, undefined), + + TableOpts = [public, named_table, ordered_set], + ets:new(?PROCS, TableOpts ++ [{keypos, #proc_int.pid}]), + ets:new(?WAITERS, TableOpts ++ [{keypos, #client.timestamp}]), + ets:new(?OPENING, [public, named_table, set]), + ets:new(?SERVERS, [public, named_table, set]), + ets:insert(?SERVERS, get_servers_from_env("COUCHDB_QUERY_SERVER_")), + ets:insert(?SERVERS, get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_")), + ets:insert(?SERVERS, [{"QUERY", {mango_native_proc, start_link, []}}]), + maybe_configure_erlang_native_servers(), + + {ok, #state{ + config = get_proc_config(), + counts = dict:new(), + threshold_ts = os:timestamp(), + hard_limit = get_hard_limit(), + soft_limit = get_soft_limit() + }}. + + +terminate(_Reason, _State) -> + ets:foldl(fun(#proc_int{pid=P}, _) -> + couch_util:shutdown_sync(P) + end, 0, ?PROCS), + ok. + + +handle_call(get_proc_count, _From, State) -> + NumProcs = ets:info(?PROCS, size), + NumOpening = ets:info(?OPENING, size), + {reply, NumProcs + NumOpening, State}; + +handle_call(get_stale_proc_count, _From, State) -> + #state{threshold_ts = T0} = State, + MatchSpec = [{#proc_int{t0='$1', _='_'}, [{'<', '$1', {T0}}], [true]}], + {reply, ets:select_count(?PROCS, MatchSpec), State}; + +handle_call({get_proc, #doc{body={Props}}=DDoc, DDocKey}, From, State) -> + LangStr = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + Lang = couch_util:to_binary(LangStr), + Client = #client{from=From, lang=Lang, ddoc=DDoc, ddoc_key=DDocKey}, + add_waiting_client(Client), + {noreply, flush_waiters(State, Lang)}; + +handle_call({get_proc, LangStr}, From, State) -> + Lang = couch_util:to_binary(LangStr), + Client = #client{from=From, lang=Lang}, + add_waiting_client(Client), + {noreply, flush_waiters(State, Lang)}; + +handle_call({ret_proc, #proc{client=Ref} = Proc}, _From, State) -> + erlang:demonitor(Ref, [flush]), + NewState = case ets:lookup(?PROCS, Proc#proc.pid) of + [#proc_int{}=ProcInt] -> + return_proc(State, ProcInt); + [] -> + % Proc must've died and we already + % cleared it out of the table in + % the handle_info clause. + State + end, + {reply, true, NewState}; + +handle_call(set_threshold_ts, _From, State) -> + FoldFun = fun + (#proc_int{client = undefined} = Proc, StateAcc) -> + remove_proc(StateAcc, Proc); + (_, StateAcc) -> + StateAcc + end, + NewState = ets:foldl(FoldFun, State, ?PROCS), + {reply, ok, NewState#state{threshold_ts = os:timestamp()}}; + +handle_call(terminate_stale_procs, _From, #state{threshold_ts = Ts1} = State) -> + FoldFun = fun + (#proc_int{client = undefined, t0 = Ts2} = Proc, StateAcc) -> + case Ts1 > Ts2 of + true -> + remove_proc(StateAcc, Proc); + false -> + StateAcc + end; + (_, StateAcc) -> + StateAcc + end, + NewState = ets:foldl(FoldFun, State, ?PROCS), + {reply, ok, NewState}; + +handle_call(_Call, _From, State) -> + {reply, ignored, State}. + + +handle_cast({os_proc_idle, Pid}, #state{counts=Counts}=State) -> + NewState = case ets:lookup(?PROCS, Pid) of + [#proc_int{client=undefined, lang=Lang}=Proc] -> + case dict:find(Lang, Counts) of + {ok, Count} when Count >= State#state.soft_limit -> + couch_log:info("Closing idle OS Process: ~p", [Pid]), + remove_proc(State, Proc); + {ok, _} -> + State + end; + _ -> + State + end, + {noreply, NewState}; + +handle_cast(reload_config, State) -> + NewState = State#state{ + config = get_proc_config(), + hard_limit = get_hard_limit(), + soft_limit = get_soft_limit() + }, + maybe_configure_erlang_native_servers(), + {noreply, flush_waiters(NewState)}; + +handle_cast(_Msg, State) -> + {noreply, State}. + + +handle_info(shutdown, State) -> + {stop, shutdown, State}; + +handle_info({'EXIT', Pid, {spawn_ok, Proc0, {ClientPid,_} = From}}, State) -> + ets:delete(?OPENING, Pid), + link(Proc0#proc_int.pid), + Proc = assign_proc(ClientPid, Proc0), + gen_server:reply(From, {ok, Proc, State#state.config}), + {noreply, State}; + +handle_info({'EXIT', Pid, spawn_error}, State) -> + [{Pid, #client{lang=Lang}}] = ets:lookup(?OPENING, Pid), + ets:delete(?OPENING, Pid), + NewState = State#state{ + counts = dict:update_counter(Lang, -1, State#state.counts) + }, + {noreply, flush_waiters(NewState, Lang)}; + +handle_info({'EXIT', Pid, Reason}, State) -> + couch_log:info("~p ~p died ~p", [?MODULE, Pid, Reason]), + case ets:lookup(?PROCS, Pid) of + [#proc_int{} = Proc] -> + NewState = remove_proc(State, Proc), + {noreply, flush_waiters(NewState, Proc#proc_int.lang)}; + [] -> + {noreply, State} + end; + +handle_info({'DOWN', Ref, _, _, _Reason}, State0) -> + case ets:match_object(?PROCS, #proc_int{client=Ref, _='_'}) of + [#proc_int{} = Proc] -> + {noreply, return_proc(State0, Proc)}; + [] -> + {noreply, State0} + end; + + +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; + +handle_info(_Msg, State) -> + {noreply, State}. + + +code_change(_OldVsn, #state{}=State, _Extra) -> + {ok, State}. + +handle_config_terminate(_, stop, _) -> + ok; +handle_config_terminate(_Server, _Reason, _State) -> + gen_server:cast(?MODULE, reload_config), + erlang:send_after(?RELISTEN_DELAY, whereis(?MODULE), restart_config_listener). + +handle_config_change("native_query_servers", _, _, _, _) -> + gen_server:cast(?MODULE, reload_config), + {ok, undefined}; +handle_config_change("query_server_config", _, _, _, _) -> + gen_server:cast(?MODULE, reload_config), + {ok, undefined}; +handle_config_change(_, _, _, _, _) -> + {ok, undefined}. + + +find_proc(#client{lang = Lang, ddoc_key = undefined}) -> + Pred = fun(_) -> + true + end, + find_proc(Lang, Pred); +find_proc(#client{lang = Lang, ddoc = DDoc, ddoc_key = DDocKey} = Client) -> + Pred = fun(#proc_int{ddoc_keys = DDocKeys}) -> + lists:member(DDocKey, DDocKeys) + end, + case find_proc(Lang, Pred) of + not_found -> + case find_proc(Client#client{ddoc_key=undefined}) of + {ok, Proc} -> + teach_ddoc(DDoc, DDocKey, Proc); + Else -> + Else + end; + Else -> + Else + end. + +find_proc(Lang, Fun) -> + try iter_procs(Lang, Fun) + catch error:Reason -> + StackTrace = erlang:get_stacktrace(), + couch_log:error("~p ~p ~p", [?MODULE, Reason, StackTrace]), + {error, Reason} + end. + + +iter_procs(Lang, Fun) when is_binary(Lang) -> + Pattern = #proc_int{lang=Lang, client=undefined, _='_'}, + MSpec = [{Pattern, [], ['$_']}], + case ets:select_reverse(?PROCS, MSpec, 25) of + '$end_of_table' -> + not_found; + Continuation -> + iter_procs_int(Continuation, Fun) + end. + + +iter_procs_int({[], Continuation0}, Fun) -> + case ets:select_reverse(Continuation0) of + '$end_of_table' -> + not_found; + Continuation1 -> + iter_procs_int(Continuation1, Fun) + end; +iter_procs_int({[Proc | Rest], Continuation}, Fun) -> + case Fun(Proc) of + true -> + {ok, Proc}; + false -> + iter_procs_int({Rest, Continuation}, Fun) + end. + + +spawn_proc(State, Client) -> + Pid = spawn_link(?MODULE, new_proc, [Client]), + ets:insert(?OPENING, {Pid, Client}), + Counts = State#state.counts, + Lang = Client#client.lang, + State#state{ + counts = dict:update_counter(Lang, 1, Counts) + }. + + +new_proc(#client{ddoc=undefined, ddoc_key=undefined}=Client) -> + #client{from=From, lang=Lang} = Client, + Resp = try + case new_proc_int(From, Lang) of + {ok, Proc} -> + {spawn_ok, Proc, From}; + Error -> + gen_server:reply(From, {error, Error}), + spawn_error + end + catch _:_ -> + spawn_error + end, + exit(Resp); + +new_proc(Client) -> + #client{from=From, lang=Lang, ddoc=DDoc, ddoc_key=DDocKey} = Client, + Resp = try + case new_proc_int(From, Lang) of + {ok, NewProc} -> + {ok, Proc} = teach_ddoc(DDoc, DDocKey, NewProc), + {spawn_ok, Proc, From}; + Error -> + gen_server:reply(From, {error, Error}), + spawn_error + end + catch _:_ -> + spawn_error + end, + exit(Resp). + +split_string_if_longer(String, Pos) -> + case length(String) > Pos of + true -> lists:split(Pos, String); + false -> false + end. + +split_by_char(String, Char) -> + %% 17.5 doesn't have string:split + %% the function doesn't handle errors + %% it is designed to be used only in specific context + Pos = string:chr(String, Char), + {Key, [_Eq | Value]} = lists:split(Pos - 1, String), + {Key, Value}. + +get_servers_from_env(Spec) -> + SpecLen = length(Spec), + % loop over os:getenv(), match SPEC_ + lists:filtermap(fun(EnvStr) -> + case split_string_if_longer(EnvStr, SpecLen) of + {Spec, Rest} -> + {true, split_by_char(Rest, $=)}; + _ -> + false + end + end, os:getenv()). + +get_query_server(LangStr) -> + case ets:lookup(?SERVERS, string:to_upper(LangStr)) of + [{_, Command}] -> Command; + _ -> undefined + end. + +native_query_server_enabled() -> + % 1. [native_query_server] enable_erlang_query_server = true | false + % 2. if [native_query_server] erlang == {couch_native_process, start_link, []} -> pretend true as well + NativeEnabled = config:get_boolean("native_query_servers", "enable_erlang_query_server", false), + NativeLegacyConfig = config:get("native_query_servers", "erlang", ""), + NativeLegacyEnabled = NativeLegacyConfig =:= "{couch_native_process, start_link, []}", + NativeEnabled orelse NativeLegacyEnabled. + +maybe_configure_erlang_native_servers() -> + case native_query_server_enabled() of + true -> + ets:insert(?SERVERS, [ + {"ERLANG", {couch_js_native_process, start_link, []}}]); + _Else -> + ok + end. + +new_proc_int(From, Lang) when is_binary(Lang) -> + LangStr = binary_to_list(Lang), + case get_query_server(LangStr) of + undefined -> + gen_server:reply(From, {unknown_query_language, Lang}); + {M, F, A} -> + {ok, Pid} = apply(M, F, A), + make_proc(Pid, Lang, M); + Command -> + {ok, Pid} = couch_js_os_process:start_link(Command), + make_proc(Pid, Lang, couch_js_os_process) + end. + + +teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc_int{ddoc_keys=Keys}=Proc) -> + % send ddoc over the wire + % we only share the rev with the client we know to update code + % but it only keeps the latest copy, per each ddoc, around. + true = couch_js_query_servers:proc_prompt( + export_proc(Proc), + [<<"ddoc">>, <<"new">>, DDocId, couch_doc:to_json_obj(DDoc, [])]), + % we should remove any other ddocs keys for this docid + % because the query server overwrites without the rev + Keys2 = [{D,R} || {D,R} <- Keys, D /= DDocId], + % add ddoc to the proc + {ok, Proc#proc_int{ddoc_keys=[DDocKey|Keys2]}}. + + +make_proc(Pid, Lang, Mod) when is_binary(Lang) -> + Proc = #proc_int{ + lang = Lang, + pid = Pid, + prompt_fun = {Mod, prompt}, + set_timeout_fun = {Mod, set_timeout}, + stop_fun = {Mod, stop} + }, + unlink(Pid), + {ok, Proc}. + + +assign_proc(Pid, #proc_int{client=undefined}=Proc0) when is_pid(Pid) -> + Proc = Proc0#proc_int{client = erlang:monitor(process, Pid)}, + ets:insert(?PROCS, Proc), + export_proc(Proc); +assign_proc(#client{}=Client, #proc_int{client=undefined}=Proc) -> + {Pid, _} = Client#client.from, + assign_proc(Pid, Proc). + + +return_proc(#state{} = State, #proc_int{} = ProcInt) -> + #proc_int{pid = Pid, lang = Lang} = ProcInt, + NewState = case is_process_alive(Pid) of true -> + case ProcInt#proc_int.t0 < State#state.threshold_ts of + true -> + remove_proc(State, ProcInt); + false -> + gen_server:cast(Pid, garbage_collect), + true = ets:update_element(?PROCS, Pid, [ + {#proc_int.client, undefined} + ]), + State + end; + false -> + remove_proc(State, ProcInt) + end, + flush_waiters(NewState, Lang). + + +remove_proc(State, #proc_int{}=Proc) -> + ets:delete(?PROCS, Proc#proc_int.pid), + case is_process_alive(Proc#proc_int.pid) of true -> + unlink(Proc#proc_int.pid), + gen_server:cast(Proc#proc_int.pid, stop); + false -> + ok + end, + Counts = State#state.counts, + Lang = Proc#proc_int.lang, + State#state{ + counts = dict:update_counter(Lang, -1, Counts) + }. + + +-spec export_proc(#proc_int{}) -> #proc{}. +export_proc(#proc_int{} = ProcInt) -> + ProcIntList = tuple_to_list(ProcInt), + ProcLen = record_info(size, proc), + [_ | Data] = lists:sublist(ProcIntList, ProcLen), + list_to_tuple([proc | Data]). + + +flush_waiters(State) -> + dict:fold(fun(Lang, Count, StateAcc) -> + case Count < State#state.hard_limit of + true -> + flush_waiters(StateAcc, Lang); + false -> + StateAcc + end + end, State, State#state.counts). + + +flush_waiters(State, Lang) -> + CanSpawn = can_spawn(State, Lang), + case get_waiting_client(Lang) of + #client{from = From} = Client -> + case find_proc(Client) of + {ok, ProcInt} -> + Proc = assign_proc(Client, ProcInt), + gen_server:reply(From, {ok, Proc, State#state.config}), + remove_waiting_client(Client), + flush_waiters(State, Lang); + {error, Error} -> + gen_server:reply(From, {error, Error}), + remove_waiting_client(Client), + flush_waiters(State, Lang); + not_found when CanSpawn -> + NewState = spawn_proc(State, Client), + remove_waiting_client(Client), + flush_waiters(NewState, Lang); + not_found -> + State + end; + undefined -> + State + end. + + +add_waiting_client(Client) -> + ets:insert(?WAITERS, Client#client{timestamp=os:timestamp()}). + +-spec get_waiting_client(Lang :: binary()) -> undefined | #client{}. +get_waiting_client(Lang) -> + case ets:match_object(?WAITERS, #client{lang=Lang, _='_'}, 1) of + '$end_of_table' -> + undefined; + {[#client{}=Client], _} -> + Client + end. + + +remove_waiting_client(#client{timestamp = Timestamp}) -> + ets:delete(?WAITERS, Timestamp). + + +can_spawn(#state{hard_limit = HardLimit, counts = Counts}, Lang) -> + case dict:find(Lang, Counts) of + {ok, Count} -> Count < HardLimit; + error -> true + end. + + +get_proc_config() -> + Limit = config:get("query_server_config", "reduce_limit", "true"), + Timeout = config:get("couchdb", "os_process_timeout", "5000"), + {[ + {<<"reduce_limit">>, list_to_atom(Limit)}, + {<<"timeout">>, list_to_integer(Timeout)} + ]}. + + +get_hard_limit() -> + LimStr = config:get("query_server_config", "os_process_limit", "100"), + list_to_integer(LimStr). + + +get_soft_limit() -> + LimStr = config:get("query_server_config", "os_process_soft_limit", "100"), + list_to_integer(LimStr). diff --git a/src/couch_js/src/couch_js_query_servers.erl b/src/couch_js/src/couch_js_query_servers.erl new file mode 100644 index 000000000..12dc864ea --- /dev/null +++ b/src/couch_js/src/couch_js_query_servers.erl @@ -0,0 +1,683 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_query_servers). + +-export([try_compile/4]). +-export([start_doc_map/3, map_doc_raw/2, stop_doc_map/1, raw_to_ejson/1]). +-export([reduce/3, rereduce/3,validate_doc_update/5]). +-export([filter_docs/5]). +-export([filter_view/3]). +-export([finalize/2]). +-export([rewrite/3]). + +-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). + +% For 210-os-proc-pool.t +-export([get_os_process/1, get_ddoc_process/2, ret_os_process/1]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(SUMERROR, <<"The _sum function requires that map values be numbers, " + "arrays of numbers, or objects. Objects cannot be mixed with other " + "data structures. Objects can be arbitrarily nested, provided that the values " + "for all fields are themselves numbers, arrays of numbers, or objects.">>). + +-define(STATERROR, <<"The _stats function requires that map values be numbers " + "or arrays of numbers, not '~p'">>). + + +try_compile(Proc, FunctionType, FunctionName, FunctionSource) -> + try + proc_prompt(Proc, [<<"add_fun">>, FunctionSource]), + ok + catch + {compilation_error, E} -> + Fmt = "Compilation of the ~s function in the '~s' view failed: ~s", + Msg = io_lib:format(Fmt, [FunctionType, FunctionName, E]), + throw({compilation_error, Msg}); + {os_process_error, {exit_status, ExitStatus}} -> + Fmt = "Compilation of the ~s function in the '~s' view failed with exit status: ~p", + Msg = io_lib:format(Fmt, [FunctionType, FunctionName, ExitStatus]), + throw({compilation_error, Msg}) + end. + +start_doc_map(Lang, Functions, Lib) -> + Proc = get_os_process(Lang), + case Lib of + {[]} -> ok; + Lib -> + true = proc_prompt(Proc, [<<"add_lib">>, Lib]) + end, + lists:foreach(fun(FunctionSource) -> + true = proc_prompt(Proc, [<<"add_fun">>, FunctionSource]) + end, Functions), + {ok, Proc}. + +map_doc_raw(Proc, Doc) -> + Json = couch_doc:to_json_obj(Doc, []), + {ok, proc_prompt_raw(Proc, [<<"map_doc">>, Json])}. + + +stop_doc_map(nil) -> + ok; +stop_doc_map(Proc) -> + ok = ret_os_process(Proc). + +group_reductions_results([]) -> + []; +group_reductions_results(List) -> + {Heads, Tails} = lists:foldl( + fun([H|T], {HAcc,TAcc}) -> + {[H|HAcc], [T|TAcc]} + end, {[], []}, List), + case Tails of + [[]|_] -> % no tails left + [Heads]; + _ -> + [Heads | group_reductions_results(Tails)] + end. + +finalize(<<"_approx_count_distinct",_/binary>>, Reduction) -> + true = hyper:is_hyper(Reduction), + {ok, round(hyper:card(Reduction))}; +finalize(<<"_stats",_/binary>>, Unpacked) -> + {ok, pack_stats(Unpacked)}; +finalize(_RedSrc, Reduction) -> + {ok, Reduction}. + +rereduce(_Lang, [], _ReducedValues) -> + {ok, []}; +rereduce(Lang, RedSrcs, ReducedValues) -> + Grouped = group_reductions_results(ReducedValues), + Results = lists:zipwith( + fun + (<<"_", _/binary>> = FunSrc, Values) -> + {ok, [Result]} = builtin_reduce(rereduce, [FunSrc], [[[], V] || V <- Values], []), + Result; + (FunSrc, Values) -> + os_rereduce(Lang, [FunSrc], Values) + end, RedSrcs, Grouped), + {ok, Results}. + +reduce(_Lang, [], _KVs) -> + {ok, []}; +reduce(Lang, RedSrcs, KVs) -> + {OsRedSrcs, BuiltinReds} = lists:partition(fun + (<<"_", _/binary>>) -> false; + (_OsFun) -> true + end, RedSrcs), + {ok, OsResults} = os_reduce(Lang, OsRedSrcs, KVs), + {ok, BuiltinResults} = builtin_reduce(reduce, BuiltinReds, KVs, []), + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, []). + + +recombine_reduce_results([], [], [], Acc) -> + {ok, lists:reverse(Acc)}; +recombine_reduce_results([<<"_", _/binary>>|RedSrcs], OsResults, [BRes|BuiltinResults], Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [BRes|Acc]); +recombine_reduce_results([_OsFun|RedSrcs], [OsR|OsResults], BuiltinResults, Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [OsR|Acc]). + +os_reduce(_Lang, [], _KVs) -> + {ok, []}; +os_reduce(Lang, OsRedSrcs, KVs) -> + Proc = get_os_process(Lang), + OsResults = try proc_prompt(Proc, [<<"reduce">>, OsRedSrcs, KVs]) of + [true, Reductions] -> Reductions + catch + throw:{reduce_overflow_error, Msg} -> + [{[{reduce_overflow_error, Msg}]} || _ <- OsRedSrcs] + after + ok = ret_os_process(Proc) + end, + {ok, OsResults}. + +os_rereduce(Lang, OsRedSrcs, KVs) -> + case get_overflow_error(KVs) of + undefined -> + Proc = get_os_process(Lang), + try proc_prompt(Proc, [<<"rereduce">>, OsRedSrcs, KVs]) of + [true, [Reduction]] -> Reduction + catch + throw:{reduce_overflow_error, Msg} -> + {[{reduce_overflow_error, Msg}]} + after + ok = ret_os_process(Proc) + end; + Error -> + Error + end. + + +get_overflow_error([]) -> + undefined; +get_overflow_error([{[{reduce_overflow_error, _}]} = Error | _]) -> + Error; +get_overflow_error([_ | Rest]) -> + get_overflow_error(Rest). + + +builtin_reduce(_Re, [], _KVs, Acc) -> + {ok, lists:reverse(Acc)}; +builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> + Sum = builtin_sum_rows(KVs, 0), + Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum), + builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]); +builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = length(KVs), + builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = builtin_sum_rows(KVs, 0), + builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) -> + Stats = builtin_stats(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]); +builtin_reduce(Re, [<<"_approx_count_distinct",_/binary>>|BuiltinReds], KVs, Acc) -> + Distinct = approx_count_distinct(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Distinct|Acc]). + + +builtin_sum_rows([], Acc) -> + Acc; +builtin_sum_rows([[_Key, Value] | RestKVs], Acc) -> + try sum_values(Value, Acc) of + NewAcc -> + builtin_sum_rows(RestKVs, NewAcc) + catch + throw:{builtin_reduce_error, Obj} -> + Obj; + throw:{invalid_value, Reason, Cause} -> + {[{<<"error">>, <<"builtin_reduce_error">>}, + {<<"reason">>, Reason}, {<<"caused_by">>, Cause}]} + end. + + +sum_values(Value, Acc) when is_number(Value), is_number(Acc) -> + Acc + Value; +sum_values(Value, Acc) when is_list(Value), is_list(Acc) -> + sum_arrays(Acc, Value); +sum_values(Value, Acc) when is_number(Value), is_list(Acc) -> + sum_arrays(Acc, [Value]); +sum_values(Value, Acc) when is_list(Value), is_number(Acc) -> + sum_arrays([Acc], Value); +sum_values({Props}, Acc) -> + case lists:keyfind(<<"error">>, 1, Props) of + {<<"error">>, <<"builtin_reduce_error">>} -> + throw({builtin_reduce_error, {Props}}); + false -> + ok + end, + case Acc of + 0 -> + {Props}; + {AccProps} -> + {sum_objects(lists:sort(Props), lists:sort(AccProps))} + end; +sum_values(Else, _Acc) -> + throw_sum_error(Else). + +sum_objects([{K1, V1} | Rest1], [{K1, V2} | Rest2]) -> + [{K1, sum_values(V1, V2)} | sum_objects(Rest1, Rest2)]; +sum_objects([{K1, V1} | Rest1], [{K2, V2} | Rest2]) when K1 < K2 -> + [{K1, V1} | sum_objects(Rest1, [{K2, V2} | Rest2])]; +sum_objects([{K1, V1} | Rest1], [{K2, V2} | Rest2]) when K1 > K2 -> + [{K2, V2} | sum_objects([{K1, V1} | Rest1], Rest2)]; +sum_objects([], Rest) -> + Rest; +sum_objects(Rest, []) -> + Rest. + +sum_arrays([], []) -> + []; +sum_arrays([_|_]=Xs, []) -> + Xs; +sum_arrays([], [_|_]=Ys) -> + Ys; +sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) -> + [X+Y | sum_arrays(Xs,Ys)]; +sum_arrays(Else, _) -> + throw_sum_error(Else). + +check_sum_overflow(InSize, OutSize, Sum) -> + Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize, + case config:get("query_server_config", "reduce_limit", "true") of + "true" when Overflowed -> + Msg = log_sum_overflow(InSize, OutSize), + {[ + {<<"error">>, <<"builtin_reduce_error">>}, + {<<"reason">>, Msg} + ]}; + "log" when Overflowed -> + log_sum_overflow(InSize, OutSize), + Sum; + _ -> + Sum + end. + +log_sum_overflow(InSize, OutSize) -> + Fmt = "Reduce output must shrink more rapidly: " + "input size: ~b " + "output size: ~b", + Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])), + couch_log:error(Msg, []), + Msg. + +builtin_stats(_, []) -> + {0, 0, 0, 0, 0}; +builtin_stats(_, [[_,First]|Rest]) -> + lists:foldl(fun([_Key, Value], Acc) -> + stat_values(Value, Acc) + end, build_initial_accumulator(First), Rest). + +stat_values(Value, Acc) when is_list(Value), is_list(Acc) -> + lists:zipwith(fun stat_values/2, Value, Acc); +stat_values({PreRed}, Acc) when is_list(PreRed) -> + stat_values(unpack_stats({PreRed}), Acc); +stat_values(Value, Acc) when is_number(Value) -> + stat_values({Value, 1, Value, Value, Value*Value}, Acc); +stat_values(Value, Acc) when is_number(Acc) -> + stat_values(Value, {Acc, 1, Acc, Acc, Acc*Acc}); +stat_values(Value, Acc) when is_tuple(Value), is_tuple(Acc) -> + {Sum0, Cnt0, Min0, Max0, Sqr0} = Value, + {Sum1, Cnt1, Min1, Max1, Sqr1} = Acc, + { + Sum0 + Sum1, + Cnt0 + Cnt1, + erlang:min(Min0, Min1), + erlang:max(Max0, Max1), + Sqr0 + Sqr1 + }; +stat_values(Else, _Acc) -> + throw_stat_error(Else). + +build_initial_accumulator(L) when is_list(L) -> + [build_initial_accumulator(X) || X <- L]; +build_initial_accumulator(X) when is_number(X) -> + {X, 1, X, X, X*X}; +build_initial_accumulator({_, _, _, _, _} = AlreadyUnpacked) -> + AlreadyUnpacked; +build_initial_accumulator({Props}) -> + unpack_stats({Props}); +build_initial_accumulator(Else) -> + Msg = io_lib:format("non-numeric _stats input: ~w", [Else]), + throw({invalid_value, iolist_to_binary(Msg)}). + +unpack_stats({PreRed}) when is_list(PreRed) -> + { + get_number(<<"sum">>, PreRed), + get_number(<<"count">>, PreRed), + get_number(<<"min">>, PreRed), + get_number(<<"max">>, PreRed), + get_number(<<"sumsqr">>, PreRed) + }. + + +pack_stats({Sum, Cnt, Min, Max, Sqr}) -> + {[{<<"sum">>,Sum}, {<<"count">>,Cnt}, {<<"min">>,Min}, {<<"max">>,Max}, {<<"sumsqr">>,Sqr}]}; +pack_stats({Packed}) -> + % Legacy code path before we had the finalize operation + {Packed}; +pack_stats(Stats) when is_list(Stats) -> + lists:map(fun pack_stats/1, Stats). + +get_number(Key, Props) -> + case couch_util:get_value(Key, Props) of + X when is_number(X) -> + X; + undefined when is_binary(Key) -> + get_number(binary_to_atom(Key, latin1), Props); + undefined -> + Msg = io_lib:format("user _stats input missing required field ~s (~p)", + [Key, Props]), + throw({invalid_value, iolist_to_binary(Msg)}); + Else -> + Msg = io_lib:format("non-numeric _stats input received for ~s: ~w", + [Key, Else]), + throw({invalid_value, iolist_to_binary(Msg)}) + end. + +% TODO allow customization of precision in the ddoc. +approx_count_distinct(reduce, KVs) -> + lists:foldl(fun([[Key, _Id], _Value], Filter) -> + hyper:insert(term_to_binary(Key), Filter) + end, hyper:new(11), KVs); +approx_count_distinct(rereduce, Reds) -> + hyper:union([Filter || [_, Filter] <- Reds]). + +% use the function stored in ddoc.validate_doc_update to test an update. +-spec validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> ok when + DDoc :: ddoc(), + EditDoc :: doc(), + DiskDoc :: doc() | nil, + Ctx :: user_ctx(), + SecObj :: sec_obj(). + +validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> + JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]), + JsonDiskDoc = json_doc(DiskDoc), + Resp = ddoc_prompt( + DDoc, + [<<"validate_doc_update">>], + [JsonEditDoc, JsonDiskDoc, Ctx, SecObj] + ), + if Resp == 1 -> ok; true -> + couch_stats:increment_counter([couchdb, query_server, vdu_rejects], 1) + end, + case Resp of + RespCode when RespCode =:= 1; RespCode =:= ok; RespCode =:= true -> + ok; + {[{<<"forbidden">>, Message}]} -> + throw({forbidden, Message}); + {[{<<"unauthorized">>, Message}]} -> + throw({unauthorized, Message}); + {[{_, Message}]} -> + throw({unknown_error, Message}); + Message when is_binary(Message) -> + throw({unknown_error, Message}) + end. + + +rewrite(Req, Db, DDoc) -> + Fields = [F || F <- chttpd_external:json_req_obj_fields(), + F =/= <<"info">>, F =/= <<"form">>, + F =/= <<"uuid">>, F =/= <<"id">>], + JsonReq = chttpd_external:json_req_obj(Req, Db, null, Fields), + case ddoc_prompt(DDoc, [<<"rewrites">>], [JsonReq]) of + {[{<<"forbidden">>, Message}]} -> + throw({forbidden, Message}); + {[{<<"unauthorized">>, Message}]} -> + throw({unauthorized, Message}); + [<<"no_dispatch_rule">>] -> + undefined; + [<<"ok">>, {V}=Rewrite] when is_list(V) -> + ok = validate_rewrite_response(Rewrite), + Rewrite; + [<<"ok">>, _] -> + throw_rewrite_error(<<"bad rewrite">>); + V -> + couch_log:error("bad rewrite return ~p", [V]), + throw({unknown_error, V}) + end. + +validate_rewrite_response({Fields}) when is_list(Fields) -> + validate_rewrite_response_fields(Fields). + +validate_rewrite_response_fields([{Key, Value} | Rest]) -> + validate_rewrite_response_field(Key, Value), + validate_rewrite_response_fields(Rest); +validate_rewrite_response_fields([]) -> + ok. + +validate_rewrite_response_field(<<"method">>, Method) when is_binary(Method) -> + ok; +validate_rewrite_response_field(<<"method">>, _) -> + throw_rewrite_error(<<"bad method">>); +validate_rewrite_response_field(<<"path">>, Path) when is_binary(Path) -> + ok; +validate_rewrite_response_field(<<"path">>, _) -> + throw_rewrite_error(<<"bad path">>); +validate_rewrite_response_field(<<"body">>, Body) when is_binary(Body) -> + ok; +validate_rewrite_response_field(<<"body">>, _) -> + throw_rewrite_error(<<"bad body">>); +validate_rewrite_response_field(<<"headers">>, {Props}=Headers) when is_list(Props) -> + validate_object_fields(Headers); +validate_rewrite_response_field(<<"headers">>, _) -> + throw_rewrite_error(<<"bad headers">>); +validate_rewrite_response_field(<<"query">>, {Props}=Query) when is_list(Props) -> + validate_object_fields(Query); +validate_rewrite_response_field(<<"query">>, _) -> + throw_rewrite_error(<<"bad query">>); +validate_rewrite_response_field(<<"code">>, Code) when is_integer(Code) andalso Code >= 200 andalso Code < 600 -> + ok; +validate_rewrite_response_field(<<"code">>, _) -> + throw_rewrite_error(<<"bad code">>); +validate_rewrite_response_field(K, V) -> + couch_log:debug("unknown rewrite field ~p=~p", [K, V]), + ok. + +validate_object_fields({Props}) when is_list(Props) -> + lists:foreach(fun + ({Key, Value}) when is_binary(Key) andalso is_binary(Value) -> + ok; + ({Key, Value}) -> + Reason = io_lib:format( + "object key/value must be strings ~p=~p", [Key, Value]), + throw_rewrite_error(Reason); + (Value) -> + throw_rewrite_error(io_lib:format("bad value ~p", [Value])) + end, Props). + + +throw_rewrite_error(Reason) when is_list(Reason)-> + throw_rewrite_error(iolist_to_binary(Reason)); +throw_rewrite_error(Reason) when is_binary(Reason) -> + throw({rewrite_error, Reason}). + + +json_doc_options() -> + json_doc_options([]). + +json_doc_options(Options) -> + Limit = config:get_integer("query_server_config", "revs_limit", 20), + [{revs, Limit} | Options]. + +json_doc(Doc) -> + json_doc(Doc, json_doc_options()). + +json_doc(nil, _) -> + null; +json_doc(Doc, Options) -> + couch_doc:to_json_obj(Doc, Options). + +filter_view(DDoc, VName, Docs) -> + Options = json_doc_options(), + JsonDocs = [json_doc(Doc, Options) || Doc <- Docs], + [true, Passes] = ddoc_prompt(DDoc, [<<"views">>, VName, <<"map">>], [JsonDocs]), + {ok, Passes}. + +filter_docs(Req, Db, DDoc, FName, Docs) -> + JsonReq = case Req of + {json_req, JsonObj} -> + JsonObj; + #httpd{} = HttpReq -> + couch_httpd_external:json_req_obj(HttpReq, Db) + end, + Options = json_doc_options(), + JsonDocs = [json_doc(Doc, Options) || Doc <- Docs], + [true, Passes] = ddoc_prompt(DDoc, [<<"filters">>, FName], + [JsonDocs, JsonReq]), + {ok, Passes}. + +ddoc_proc_prompt({Proc, DDocId}, FunPath, Args) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]). + +ddoc_prompt(DDoc, FunPath, Args) -> + with_ddoc_proc(DDoc, fun({Proc, DDocId}) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]) + end). + +with_ddoc_proc(#doc{id=DDocId,revs={Start, [DiskRev|_]}}=DDoc, Fun) -> + Rev = couch_doc:rev_to_str({Start, DiskRev}), + DDocKey = {DDocId, Rev}, + Proc = get_ddoc_process(DDoc, DDocKey), + try Fun({Proc, DDocId}) + after + ok = ret_os_process(Proc) + end. + +proc_prompt(Proc, Args) -> + case proc_prompt_raw(Proc, Args) of + {json, Json} -> + ?JSON_DECODE(Json); + EJson -> + EJson + end. + +proc_prompt_raw(#proc{prompt_fun = {Mod, Func}} = Proc, Args) -> + apply(Mod, Func, [Proc#proc.pid, Args]). + +raw_to_ejson({json, Json}) -> + ?JSON_DECODE(Json); +raw_to_ejson(EJson) -> + EJson. + +proc_stop(Proc) -> + {Mod, Func} = Proc#proc.stop_fun, + apply(Mod, Func, [Proc#proc.pid]). + +proc_set_timeout(Proc, Timeout) -> + {Mod, Func} = Proc#proc.set_timeout_fun, + apply(Mod, Func, [Proc#proc.pid, Timeout]). + +get_os_process_timeout() -> + list_to_integer(config:get("couchdb", "os_process_timeout", "5000")). + +get_ddoc_process(#doc{} = DDoc, DDocKey) -> + % remove this case statement + case gen_server:call(couch_js_proc_manager, {get_proc, DDoc, DDocKey}, get_os_process_timeout()) of + {ok, Proc, {QueryConfig}} -> + % process knows the ddoc + case (catch proc_prompt(Proc, [<<"reset">>, {QueryConfig}])) of + true -> + proc_set_timeout(Proc, couch_util:get_value(<<"timeout">>, QueryConfig)), + Proc; + _ -> + catch proc_stop(Proc), + get_ddoc_process(DDoc, DDocKey) + end; + Error -> + throw(Error) + end. + +get_os_process(Lang) -> + case gen_server:call(couch_js_proc_manager, {get_proc, Lang}, get_os_process_timeout()) of + {ok, Proc, {QueryConfig}} -> + case (catch proc_prompt(Proc, [<<"reset">>, {QueryConfig}])) of + true -> + proc_set_timeout(Proc, couch_util:get_value(<<"timeout">>, QueryConfig)), + Proc; + _ -> + catch proc_stop(Proc), + get_os_process(Lang) + end; + Error -> + throw(Error) + end. + +ret_os_process(Proc) -> + true = gen_server:call(couch_js_proc_manager, {ret_proc, Proc}, infinity), + catch unlink(Proc#proc.pid), + ok. + +throw_sum_error(Else) -> + throw({invalid_value, ?SUMERROR, Else}). + +throw_stat_error(Else) -> + throw({invalid_value, iolist_to_binary(io_lib:format(?STATERROR, [Else]))}). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +builtin_sum_rows_negative_test() -> + A = [{[{<<"a">>, 1}]}, {[{<<"a">>, 2}]}, {[{<<"a">>, 3}]}], + E = {[{<<"error">>, <<"builtin_reduce_error">>}]}, + ?assertEqual(E, builtin_sum_rows([["K", E]], [])), + % The below case is where the value is invalid, but no error because + % it's only one document. + ?assertEqual(A, builtin_sum_rows([["K", A]], [])), + {Result} = builtin_sum_rows([["K", A]], [1, 2, 3]), + ?assertEqual({<<"error">>, <<"builtin_reduce_error">>}, + lists:keyfind(<<"error">>, 1, Result)). + +sum_values_test() -> + ?assertEqual(3, sum_values(1, 2)), + ?assertEqual([2,4,6], sum_values(1, [1,4,6])), + ?assertEqual([3,5,7], sum_values([3,2,4], [0,3,3])), + X = {[{<<"a">>,1}, {<<"b">>,[1,2]}, {<<"c">>, {[{<<"d">>,3}]}}, + {<<"g">>,1}]}, + Y = {[{<<"a">>,2}, {<<"b">>,3}, {<<"c">>, {[{<<"e">>, 5}]}}, + {<<"f">>,1}, {<<"g">>,1}]}, + Z = {[{<<"a">>,3}, {<<"b">>,[4,2]}, {<<"c">>, {[{<<"d">>,3},{<<"e">>,5}]}}, + {<<"f">>,1}, {<<"g">>,2}]}, + ?assertEqual(Z, sum_values(X, Y)), + ?assertEqual(Z, sum_values(Y, X)). + +sum_values_negative_test() -> + % invalid value + A = [{[{<<"a">>, 1}]}, {[{<<"a">>, 2}]}, {[{<<"a">>, 3}]}], + B = ["error 1", "error 2"], + C = [<<"error 3">>, <<"error 4">>], + KV = {[{<<"error">>, <<"builtin_reduce_error">>}, + {<<"reason">>, ?SUMERROR}, {<<"caused_by">>, <<"some cause">>}]}, + ?assertThrow({invalid_value, _, _}, sum_values(A, [1, 2, 3])), + ?assertThrow({invalid_value, _, _}, sum_values(A, 0)), + ?assertThrow({invalid_value, _, _}, sum_values(B, [1, 2])), + ?assertThrow({invalid_value, _, _}, sum_values(C, [0])), + ?assertThrow({builtin_reduce_error, KV}, sum_values(KV, [0])). + +stat_values_test() -> + ?assertEqual({1, 2, 0, 1, 1}, stat_values(1, 0)), + ?assertEqual({11, 2, 1, 10, 101}, stat_values(1, 10)), + ?assertEqual([{9, 2, 2, 7, 53}, + {14, 2, 3, 11, 130}, + {18, 2, 5, 13, 194} + ], stat_values([2,3,5], [7,11,13])). + +reduce_stats_test() -> + ?assertEqual([ + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], test_reduce(<<"_stats">>, [[[null, key], 2]])), + + ?assertEqual([[ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ]], test_reduce(<<"_stats">>, [[[null, key],[1,2]]])), + + ?assertEqual( + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + , element(2, finalize(<<"_stats">>, {2, 1, 2, 2, 4}))), + + ?assertEqual([ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], element(2, finalize(<<"_stats">>, [ + {1, 1, 1, 1, 1}, + {2, 1, 2, 2, 4} + ]))), + + ?assertEqual([ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], element(2, finalize(<<"_stats">>, [ + {1, 1, 1, 1, 1}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ]))), + + ?assertEqual([ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], element(2, finalize(<<"_stats">>, [ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {2, 1, 2, 2, 4} + ]))), + ok. + +test_reduce(Reducer, KVs) -> + ?assertMatch({ok, _}, reduce(<<"javascript">>, [Reducer], KVs)), + {ok, Reduced} = reduce(<<"javascript">>, [Reducer], KVs), + {ok, Finalized} = finalize(Reducer, Reduced), + Finalized. + +-endif. diff --git a/src/couch_js/src/couch_js_sup.erl b/src/couch_js/src/couch_js_sup.erl new file mode 100644 index 000000000..e87546127 --- /dev/null +++ b/src/couch_js/src/couch_js_sup.erl @@ -0,0 +1,45 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_js_sup). +-behaviour(supervisor). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 50, + period => 3600 + }, + Children = [ + #{ + id => couch_js_proc_manager, + restart => permanent, + shutdown => brutal_kill, + start => {couch_js_proc_manager, start_link, []} + } + ], + {ok, {Flags, Children}}. -- cgit v1.2.1 From 6574a4d76ba9436bed0dead12641603106608dce Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 20 Aug 2019 13:06:46 -0500 Subject: Implement couch_js callbacks for couch_eval --- rel/overlay/etc/default.ini | 6 +++++ src/couch_js/src/couch_js.erl | 51 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 src/couch_js/src/couch_js.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index e6f2f5441..3c9271605 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -336,6 +336,12 @@ os_process_limit = 100 ;query_limit = 268435456 ;partition_query_limit = 268435456 +[couch_eval.languages] +; The list of modules that implement the couch_eval +; beahvior for executing provided code in design +; documents. +javascript = couch_js + [mango] ; Set to true to disable the "index all fields" text index, which can lead ; to out of memory issues when users have documents with nested array fields. diff --git a/src/couch_js/src/couch_js.erl b/src/couch_js/src/couch_js.erl new file mode 100644 index 000000000..1bc0f1927 --- /dev/null +++ b/src/couch_js/src/couch_js.erl @@ -0,0 +1,51 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_js). +-behavior(couch_eval). + + +-export([ + acquire_map_context/1, + release_map_context/1, + map_docs/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-define(JS, <<"javascript">>). + + +acquire_map_context(Opts) -> + #{ + map_funs := MapFuns, + lib := Lib + } = Opts, + couch_js_query_servers:start_doc_map(?JS, MapFuns, Lib). + + +release_map_context(Proc) -> + couch_js_query_servers:stop_doc_map(Proc). + + +map_docs(Proc, Docs) -> + {ok, lists:map(fun(Doc) -> + {ok, RawResults} = couch_js_query_servers:map_doc_raw(Proc, Doc), + Results = couch_js_query_servers:raw_to_ejson(RawResults), + Tupled = lists:map(fun(ViewResult) -> + lists:map(fun([K, V]) -> {K, V} end, ViewResult) + end, Results), + {Doc#doc.id, Tupled} + end, Docs)}. -- cgit v1.2.1 From 88052345235f990f733de61c2a65a42b7f661b89 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 20 Aug 2019 14:21:00 -0500 Subject: Add tests for couch_js application These are ported over from the existing couch Eunit suite and updated to be less racey hopefully. --- src/couch_js/src/couch_js.app.src | 2 +- src/couch_js/test/couch_js_proc_manager_tests.erl | 373 +++++++++++++++++++++ src/couch_js/test/couch_js_query_servers_tests.erl | 96 ++++++ 3 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 src/couch_js/test/couch_js_proc_manager_tests.erl create mode 100644 src/couch_js/test/couch_js_query_servers_tests.erl diff --git a/src/couch_js/src/couch_js.app.src b/src/couch_js/src/couch_js.app.src index 0db37b68c..44efd6d7d 100644 --- a/src/couch_js/src/couch_js.app.src +++ b/src/couch_js/src/couch_js.app.src @@ -22,6 +22,6 @@ stdlib, config, couch_log, - couch + ioq ]} ]}. diff --git a/src/couch_js/test/couch_js_proc_manager_tests.erl b/src/couch_js/test/couch_js_proc_manager_tests.erl new file mode 100644 index 000000000..f138dd651 --- /dev/null +++ b/src/couch_js/test/couch_js_proc_manager_tests.erl @@ -0,0 +1,373 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_proc_manager_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + +-define(NUM_PROCS, 3). +-define(TIMEOUT, 1000). + +-define(TIMEOUT_ERROR(Msg), erlang:error({assertion_failed, [ + {module, ?MODULE}, + {line, ?LINE}, + {reason, Msg} + ]})). + + +start() -> + ok = application:set_env(config, ini_files, ?CONFIG_CHAIN), + {ok, Started} = application:ensure_all_started(couch_js), + config:set("native_query_servers", "enable_erlang_query_server", "true", false), + config:set("query_server_config", "os_process_limit", "3", false), + config:set("query_server_config", "os_process_soft_limit", "2", false), + config:set("query_server_config", "os_process_idle_limit", "1", false), + ok = config_wait("os_process_idle_limit", "1"), + Started. + + +stop(Apps) -> + lists:foreach(fun(App) -> + ok = application:stop(App) + end, lists:reverse(Apps)). + + +couch_js_proc_manager_test_() -> + { + "couch_js_proc_manger tests", + { + setup, + fun start/0, + fun stop/1, + [ + ?TDEF(should_block_new_proc_on_full_pool), + ?TDEF(should_free_slot_on_proc_unexpected_exit), + ?TDEF(should_reuse_known_proc), + ?TDEF(should_process_waiting_queue_as_fifo), + ?TDEF(should_reduce_pool_on_idle_os_procs) + ] + } + }. + + +should_block_new_proc_on_full_pool() -> + ok = couch_js_proc_manager:reload(), + + Clients = [ + spawn_client(), + spawn_client(), + spawn_client() + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + % Make sure everyone got a different proc + Procs = [get_client_proc(Client) || Client <- Clients], + ?assertEqual(lists:sort(Procs), lists:usort(Procs)), + + % This client will be stuck waiting for someone + % to give up their proc. + Client4 = spawn_client(), + ?assert(is_client_waiting(Client4)), + + Client1 = hd(Clients), + Proc1 = hd(Procs), + + ?assertEqual(ok, stop_client(Client1)), + ?assertEqual(ok, ping_client(Client4)), + + Proc4 = get_client_proc(Client4), + + ?assertEqual(Proc1#proc.pid, Proc4#proc.pid), + ?assertNotEqual(Proc1#proc.client, Proc4#proc.client), + + lists:map(fun(C) -> + ?assertEqual(ok, stop_client(C)) + end, [Client4 | tl(Clients)]). + + +should_free_slot_on_proc_unexpected_exit() -> + ok = couch_js_proc_manager:reload(), + + Clients = [ + spawn_client(), + spawn_client(), + spawn_client() + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + Procs1 = [get_client_proc(Client) || Client <- Clients], + ProcClients1 = [Proc#proc.client || Proc <- Procs1], + ?assertEqual(lists:sort(Procs1), lists:usort(Procs1)), + ?assertEqual(lists:sort(ProcClients1), lists:usort(ProcClients1)), + + Client1 = hd(Clients), + Proc1 = hd(Procs1), + ?assertEqual(ok, kill_client(Client1)), + + Client4 = spawn_client(), + ?assertEqual(ok, ping_client(Client4)), + Proc4 = get_client_proc(Client4), + + ?assertEqual(Proc1#proc.pid, Proc4#proc.pid), + ?assertNotEqual(Proc1#proc.client, Proc4#proc.client), + + Procs2 = [Proc4 | tl(Procs1)], + ProcClients2 = [Proc4#proc.client | tl(ProcClients1)], + ?assertEqual(lists:sort(Procs2), lists:usort(Procs2)), + ?assertEqual(lists:sort(ProcClients2), lists:usort(ProcClients2)), + + lists:map(fun(C) -> + ?assertEqual(ok, stop_client(C)) + end, [Client4 | tl(Clients)]). + + +should_reuse_known_proc() -> + ok = couch_js_proc_manager:reload(), + + Clients = [ + spawn_client(<<"ddoc1">>), + spawn_client(<<"ddoc2">>) + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + Procs = [get_client_proc(Client) || Client <- Clients], + ?assertEqual(lists:sort(Procs), lists:usort(Procs)), + + lists:foreach(fun(Client) -> + ?assertEqual(ok, stop_client(Client)) + end, Clients), + + lists:foreach(fun(Proc) -> + ?assert(is_process_alive(Proc#proc.pid)) + end, Procs), + + Client = spawn_client(<<"ddoc1">>), + ?assertEqual(ok, ping_client(Client)), + + OldProc = hd(Procs), + NewProc = get_client_proc(Client), + + ?assertEqual(OldProc#proc.pid, NewProc#proc.pid), + ?assertNotEqual(OldProc#proc.client, NewProc#proc.client), + ?assertEqual(ok, stop_client(Client)). + + +should_process_waiting_queue_as_fifo() -> + Clients = [ + spawn_client(<<"ddoc1">>), + spawn_client(<<"ddoc2">>), + spawn_client(<<"ddoc3">>), + spawn_client(<<"ddoc4">>), + spawn_client(<<"ddoc5">>), + spawn_client(<<"ddoc6">>) + ], + + lists:foldl(fun(Client, Pos) -> + case Pos =< ?NUM_PROCS of + true -> + ?assertEqual(ok, ping_client(Client)); + false -> + ?assert(is_client_waiting(Client)) + end, + Pos + 1 + end, 1, Clients), + + LastClients = lists:foldl(fun(_Iteration, ClientAcc) -> + FirstClient = hd(ClientAcc), + FirstProc = get_client_proc(FirstClient), + ?assertEqual(ok, stop_client(FirstClient)), + + RestClients = tl(ClientAcc), + + lists:foldl(fun(Client, Pos) -> + case Pos =< ?NUM_PROCS of + true -> + ?assertEqual(ok, ping_client(Client)); + false -> + ?assert(is_client_waiting(Client)) + end, + if Pos /= ?NUM_PROCS -> ok; true -> + BubbleProc = get_client_proc(Client), + ?assertEqual(FirstProc#proc.pid, BubbleProc#proc.pid), + ?assertNotEqual(FirstProc#proc.client, BubbleProc#proc.client) + end, + Pos + 1 + end, 1, RestClients), + + RestClients + end, Clients, lists:seq(1, 3)), + + lists:foreach(fun(Client) -> + ?assertEqual(ok, stop_client(Client)) + end, LastClients). + + +should_reduce_pool_on_idle_os_procs() -> + Clients = [ + spawn_client(<<"ddoc1">>), + spawn_client(<<"ddoc2">>), + spawn_client(<<"ddoc3">>) + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + ?assertEqual(3, couch_js_proc_manager:get_proc_count()), + + lists:foreach(fun(Client) -> + ?assertEqual(ok, stop_client(Client)) + end, Clients), + + ?assertEqual(3, couch_js_proc_manager:get_proc_count()), + + timer:sleep(1200), + + ?assertEqual(1, couch_js_proc_manager:get_proc_count()). + + +spawn_client() -> + Parent = self(), + Ref = make_ref(), + {Pid, _} = spawn_monitor(fun() -> + Parent ! {self(), initialized}, + Proc = couch_js_query_servers:get_os_process(<<"erlang">>), + loop(Parent, Ref, Proc) + end), + receive + {Pid, initialized} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Error creating client.") + end, + {Pid, Ref}. + + +spawn_client(DDocId) -> + Parent = self(), + Ref = make_ref(), + {Pid, _} = spawn_monitor(fun() -> + DDocKey = {DDocId, <<"1-abcdefgh">>}, + DDoc = #doc{body={[{<<"language">>, <<"erlang">>}]}}, + Parent ! {self(), initialized}, + Proc = couch_js_query_servers:get_ddoc_process(DDoc, DDocKey), + loop(Parent, Ref, Proc) + end), + receive + {Pid, initialized} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Error creating ddoc client.") + end, + {Pid, Ref}. + + +loop(Parent, Ref, Proc) -> + receive + ping -> + Parent ! {pong, Ref}, + loop(Parent, Ref, Proc); + get_proc -> + Parent ! {proc, Ref, Proc}, + loop(Parent, Ref, Proc); + stop -> + couch_js_query_servers:ret_os_process(Proc), + Parent ! {stop, Ref}; + die -> + Parent ! {die, Ref}, + exit(some_error) + end. + + +ping_client({Pid, Ref}) -> + Pid ! ping, + receive + {pong, Ref} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout pinging client") + end. + + +is_client_waiting({Pid, _Ref}) -> + {status, Status} = process_info(Pid, status), + {current_function, {M, F, A}} = process_info(Pid, current_function), + Status == waiting andalso {M, F, A} == {gen, do_call, 4}. + + +get_client_proc({Pid, Ref}) -> + Pid ! get_proc, + receive + {proc, Ref, Proc} -> Proc + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout getting proc from client") + end. + + +stop_client({Pid, Ref}) -> + Pid ! stop, + receive + {stop, Ref} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout stopping client") + end, + receive + {'DOWN', _, _, Pid, _} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout waiting for stopped client 'DOWN'") + end. + + +kill_client({Pid, Ref}) -> + Pid ! die, + receive + {die, Ref} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout killing client") + end, + receive + {'DOWN', _, _, Pid, _} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout waiting for killed client 'DOWN'") + end. + + +config_wait(Key, Value) -> + config_wait(Key, Value, 0). + +config_wait(Key, Value, Count) -> + case config:get("query_server_config", Key) of + Value -> + ok; + _ when Count > 10 -> + ?TIMEOUT_ERROR("Error waiting for config changes."); + _ -> + timer:sleep(10), + config_wait(Key, Value, Count + 1) + end. diff --git a/src/couch_js/test/couch_js_query_servers_tests.erl b/src/couch_js/test/couch_js_query_servers_tests.erl new file mode 100644 index 000000000..bc4ecc72f --- /dev/null +++ b/src/couch_js/test/couch_js_query_servers_tests.erl @@ -0,0 +1,96 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_query_servers_tests). + +-include_lib("couch/include/couch_eunit.hrl"). + + +setup() -> + meck:new([config, couch_log]). + + +teardown(_) -> + meck:unload(). + + +sum_overflow_test_() -> + { + "Test overflow detection in the _sum reduce function", + { + setup, + fun setup/0, + fun teardown/1, + [ + fun should_return_error_on_overflow/0, + fun should_return_object_on_log/0, + fun should_return_object_on_false/0 + ] + } + }. + + +should_return_error_on_overflow() -> + setup_reduce_limit_mock("true"), + + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), + ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result), + + check_reduce_limit_mock(). + + +should_return_object_on_log() -> + setup_reduce_limit_mock("log"), + + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), + ?assertMatch({[_ | _]}, Result), + Keys = [K || {K, _} <- element(1, Result)], + ?assert(not lists:member(<<"error">>, Keys)), + + check_reduce_limit_mock(). + + +should_return_object_on_false() -> + setup_reduce_limit_mock("false"), + + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), + ?assertMatch({[_ | _]}, Result), + Keys = [K || {K, _} <- element(1, Result)], + ?assert(not lists:member(<<"error">>, Keys)), + + ?assert(meck:called(config, get, '_')), + ?assertNot(meck:called(couch_log, error, '_')). + + +gen_sum_kvs() -> + lists:map(fun(I) -> + Props = lists:map(fun(_) -> + K = couch_util:encodeBase64Url(crypto:strong_rand_bytes(16)), + {K, 1} + end, lists:seq(1, 20)), + [I, {Props}] + end, lists:seq(1, 10)). + + +setup_reduce_limit_mock(Value) -> + ConfigArgs = ["query_server_config", "reduce_limit", "true"], + meck:reset([config, couch_log]), + meck:expect(config, get, ConfigArgs, Value), + meck:expect(couch_log, error, ['_', '_'], ok). + + +check_reduce_limit_mock() -> + ?assert(meck:called(config, get, '_')), + ?assert(meck:called(couch_log, error, '_')). -- cgit v1.2.1 From 388c1146ef502336466cc0c1540ad818088d86de Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 20 Aug 2019 16:16:57 -0500 Subject: Update couch_views to use couch_eval --- src/couch_views/src/couch_views.app.src | 3 +- src/couch_views/src/couch_views_indexer.erl | 65 ++++++++++++++++------ src/couch_views/test/couch_views_indexer_test.erl | 1 + src/couch_views/test/couch_views_map_test.erl | 7 ++- .../test/couch_views_trace_index_test.erl | 2 +- 5 files changed, 58 insertions(+), 20 deletions(-) diff --git a/src/couch_views/src/couch_views.app.src b/src/couch_views/src/couch_views.app.src index c80c30b02..0d666affd 100644 --- a/src/couch_views/src/couch_views.app.src +++ b/src/couch_views/src/couch_views.app.src @@ -26,6 +26,7 @@ config, couch_stats, fabric, - couch_jobs + couch_jobs, + couch_eval ]} ]}. diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 83d1b6aa2..55ce06311 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -120,7 +120,7 @@ update(#{} = Db, Mrst0, State0) -> case State4 of finished -> - couch_query_servers:stop_doc_map(Mrst2#mrst.qserver); + couch_eval:release_map_context(Mrst2#mrst.qserver); _ -> update(Db, Mrst2, State4) end. @@ -171,20 +171,42 @@ map_docs(Mrst, Docs) -> % Run all the non deleted docs through the view engine and Mrst1 = start_query_server(Mrst), QServer = Mrst1#mrst.qserver, - MapFun = fun - (#{deleted := true} = Change) -> - Change#{results => []}; - (#{deleted := false} = Change) -> - #{doc := Doc} = Change, - couch_stats:increment_counter([couchdb, mrview, map_doc]), - {ok, RawResults} = couch_query_servers:map_doc_raw(QServer, Doc), - JsonResults = couch_query_servers:raw_to_ejson(RawResults), - ListResults = lists:map(fun(ViewResults) -> - [list_to_tuple(Res) || Res <- ViewResults] - end, JsonResults), - Change#{results => ListResults} - end, - {Mrst1, lists:map(MapFun, Docs)}. + + {Deleted0, NotDeleted0} = lists:partition(fun(Doc) -> + #{deleted := Deleted} = Doc, + Deleted + end, Docs), + + Deleted1 = lists:map(fun(Doc) -> + Doc#{results => []} + end, Deleted0), + + DocsToMap = lists:map(fun(Doc) -> + #{doc := DocRec} = Doc, + DocRec + end, NotDeleted0), + + {ok, AllResults} = couch_eval:map_docs(QServer, DocsToMap), + + % The expanded function head here is making an assertion + % that the results match the given doc + NotDeleted1 = lists:zipwith(fun(#{id := DocId} = Doc, {DocId, Results}) -> + Doc#{results => Results} + end, NotDeleted0, AllResults), + + % I'm being a bit careful here resorting the docs + % in order of the changes feed. Theoretically this is + % unnecessary since we're inside a single transaction. + % However, I'm concerned if we ever split this up + % into multiple transactions that this detail might + % be important but forgotten. + MappedDocs = lists:sort(fun(A, B) -> + #{sequence := ASeq} = A, + #{sequence := BSeq} = B, + ASeq =< BSeq + end, Deleted1 ++ NotDeleted1), + + {Mrst1, MappedDocs}. write_docs(TxDb, Mrst, Docs, State) -> @@ -249,12 +271,21 @@ fetch_docs(Db, Changes) -> start_query_server(#mrst{qserver = nil} = Mrst) -> #mrst{ + db_name = DbName, + idx_name = DDocId, language = Language, + sig = Sig, lib = Lib, views = Views } = Mrst, - Defs = [View#mrview.def || View <- Views], - {ok, QServer} = couch_query_servers:start_doc_map(Language, Defs, Lib), + {ok, QServer} = couch_eval:acquire_map_context( + DbName, + DDocId, + Language, + Sig, + Lib, + [View#mrview.def || View <- Views] + ), Mrst#mrst{qserver = QServer}; start_query_server(#mrst{} = Mrst) -> diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 02c8ceedb..20ad0dc6b 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -52,6 +52,7 @@ setup() -> Ctx = test_util:start_couch([ fabric, couch_jobs, + couch_js, couch_views ]), Ctx. diff --git a/src/couch_views/test/couch_views_map_test.erl b/src/couch_views/test/couch_views_map_test.erl index 0b0ab6894..f8ba18319 100644 --- a/src/couch_views/test/couch_views_map_test.erl +++ b/src/couch_views/test/couch_views_map_test.erl @@ -20,7 +20,12 @@ setup() -> - test_util:start_couch([fabric, couch_jobs, couch_views]). + test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]). teardown(State) -> diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl index b7fe66b49..c4f76d897 100644 --- a/src/couch_views/test/couch_views_trace_index_test.erl +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -51,7 +51,7 @@ indexer_test_() -> setup() -> - test_util:start_couch([fabric]). + test_util:start_couch([fabric, couch_js]). cleanup(Ctx) -> -- cgit v1.2.1 From 31dd2b71f17231a585001264e3b9b96819c34000 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 27 Sep 2019 13:32:44 -0500 Subject: Fix mango index validation This check fails if Clouseau isn't present. Though we don't need Clouseau to perform the check so just avoid it. --- src/mango/src/mango_native_proc.erl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl index 274ae11de..cbf362291 100644 --- a/src/mango/src/mango_native_proc.erl +++ b/src/mango/src/mango_native_proc.erl @@ -345,12 +345,7 @@ make_text_field_name([P | Rest], Type) -> validate_index_info(IndexInfo) -> - IdxTypes = case clouseau_rpc:connected() of - true -> - [mango_idx_view, mango_idx_text]; - false -> - [mango_idx_view] - end, + IdxTypes = [mango_idx_view, mango_idx_text], Results = lists:foldl(fun(IdxType, Results0) -> try IdxType:validate_index_def(IndexInfo), -- cgit v1.2.1 From 8a972dd4f36795b30cfa5fd8dc8f5685d2f03548 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 3 Oct 2019 15:01:50 -0400 Subject: Fix timeout in couch_views set_type_timeout takes seconds as the argument but we gave it milliseconds --- src/couch_views/src/couch_views_jobs.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 16fc4103f..87e4fea6a 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -29,7 +29,7 @@ set_timeout() -> - couch_jobs:set_type_timeout(?INDEX_JOB_TYPE, 6 * 1000). + couch_jobs:set_type_timeout(?INDEX_JOB_TYPE, 6). build_view(TxDb, Mrst, UpdateSeq) -> -- cgit v1.2.1 From 831555f321abda0aa298e5884e5fcce444e8b42f Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 14 Oct 2019 16:30:22 -0400 Subject: DRY out CouchDB FDB prefix fetching It was suggested in another PR's discussion: https://github.com/apache/couchdb/pull/2107#pullrequestreview-274431487 --- src/couch_jobs/src/couch_jobs_fdb.erl | 5 +---- src/fabric/src/fabric2_fdb.erl | 15 +++++++++------ src/fabric/src/fabric2_txids.erl | 10 ++-------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 6903801a2..00a8ddf72 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -615,10 +615,7 @@ init_jtx(undefined) -> fabric2_fdb:transactional(fun(Tx) -> init_jtx(Tx) end); init_jtx({erlfdb_transaction, _} = Tx) -> - Root = erlfdb_directory:root(), - Dir = fabric2_server:fdb_directory(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), - LayerPrefix = erlfdb_directory:get_name(CouchDB), + LayerPrefix = fabric2_fdb:create_or_open_couchdb_dir(Tx), Jobs = erlfdb_tuple:pack({?JOBS}, LayerPrefix), Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), % layer_prefix, md_version and tx here match db map fields in fabric2_fdb diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 5c58da482..5471f99f2 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -24,6 +24,8 @@ delete/1, exists/1, + create_or_open_couchdb_dir/1, + list_dbs/4, get_info/1, @@ -274,11 +276,15 @@ exists(#{name := DbName} = Db) when is_binary(DbName) -> end. -list_dbs(Tx, Callback, AccIn, Options) -> +create_or_open_couchdb_dir(Tx) -> Root = erlfdb_directory:root(), Dir = fabric2_server:fdb_directory(), CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), - LayerPrefix = erlfdb_directory:get_name(CouchDB), + erlfdb_directory:get_name(CouchDB). + + +list_dbs(Tx, Callback, AccIn, Options) -> + LayerPrefix = create_or_open_couchdb_dir(Tx), Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), fold_range({tx, Tx}, Prefix, fun({K, _V}, Acc) -> {DbName} = erlfdb_tuple:unpack(K, Prefix), @@ -781,10 +787,7 @@ debug_cluster(Start, End) -> init_db(Tx, DbName, Options) -> - Root = erlfdb_directory:root(), - Dir = fabric2_server:fdb_directory(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), - Prefix = erlfdb_directory:get_name(CouchDB), + Prefix = create_or_open_couchdb_dir(Tx), Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), #{ name => DbName, diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl index 06704f021..f1a75243c 100644 --- a/src/fabric/src/fabric2_txids.erl +++ b/src/fabric/src/fabric2_txids.erl @@ -44,10 +44,7 @@ start_link() -> create(Tx, undefined) -> - Root = erlfdb_directory:root(), - Dir = fabric2_server:fdb_directory(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), - Prefix = erlfdb_directory:get_name(CouchDB), + Prefix = fabric2_fdb:create_or_open_couchdb_dir(Tx), create(Tx, Prefix); create(_Tx, LayerPrefix) -> @@ -136,10 +133,7 @@ clean(St, NeedsSweep) -> sweep(Tx, {Mega, Secs, Micro}) -> - Root = erlfdb_directory:root(), - Dir = fabric2_server:fdb_directory(), - CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), - Prefix = erlfdb_directory:get_name(CouchDB), + Prefix = fabric2_fdb:create_or_open_couchdb_dir(Tx), StartKey = erlfdb_tuple:pack({?TX_IDS}, Prefix), EndKey = erlfdb_tuple:pack({?TX_IDS, Mega, Secs, Micro}, Prefix), erlfdb:set_option(Tx, next_write_no_write_conflict_range), -- cgit v1.2.1 From 168126b2f2284b71777cf1e53df8341d2b6d8d1a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 15 Oct 2019 12:16:11 -0400 Subject: Use a shorter name for create_or_open_couchdb_dir --- src/couch_jobs/src/couch_jobs_fdb.erl | 2 +- src/fabric/src/fabric2_fdb.erl | 8 ++++---- src/fabric/src/fabric2_txids.erl | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 00a8ddf72..a08b78fc1 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -615,7 +615,7 @@ init_jtx(undefined) -> fabric2_fdb:transactional(fun(Tx) -> init_jtx(Tx) end); init_jtx({erlfdb_transaction, _} = Tx) -> - LayerPrefix = fabric2_fdb:create_or_open_couchdb_dir(Tx), + LayerPrefix = fabric2_fdb:get_dir(Tx), Jobs = erlfdb_tuple:pack({?JOBS}, LayerPrefix), Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), % layer_prefix, md_version and tx here match db map fields in fabric2_fdb diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 5471f99f2..0f55d9175 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -24,7 +24,7 @@ delete/1, exists/1, - create_or_open_couchdb_dir/1, + get_dir/1, list_dbs/4, @@ -276,7 +276,7 @@ exists(#{name := DbName} = Db) when is_binary(DbName) -> end. -create_or_open_couchdb_dir(Tx) -> +get_dir(Tx) -> Root = erlfdb_directory:root(), Dir = fabric2_server:fdb_directory(), CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), @@ -284,7 +284,7 @@ create_or_open_couchdb_dir(Tx) -> list_dbs(Tx, Callback, AccIn, Options) -> - LayerPrefix = create_or_open_couchdb_dir(Tx), + LayerPrefix = get_dir(Tx), Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), fold_range({tx, Tx}, Prefix, fun({K, _V}, Acc) -> {DbName} = erlfdb_tuple:unpack(K, Prefix), @@ -787,7 +787,7 @@ debug_cluster(Start, End) -> init_db(Tx, DbName, Options) -> - Prefix = create_or_open_couchdb_dir(Tx), + Prefix = get_dir(Tx), Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), #{ name => DbName, diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl index f1a75243c..046a7484a 100644 --- a/src/fabric/src/fabric2_txids.erl +++ b/src/fabric/src/fabric2_txids.erl @@ -44,7 +44,7 @@ start_link() -> create(Tx, undefined) -> - Prefix = fabric2_fdb:create_or_open_couchdb_dir(Tx), + Prefix = fabric2_fdb:get_dir(Tx), create(Tx, Prefix); create(_Tx, LayerPrefix) -> @@ -133,7 +133,7 @@ clean(St, NeedsSweep) -> sweep(Tx, {Mega, Secs, Micro}) -> - Prefix = fabric2_fdb:create_or_open_couchdb_dir(Tx), + Prefix = fabric2_fdb:get_dir(Tx), StartKey = erlfdb_tuple:pack({?TX_IDS}, Prefix), EndKey = erlfdb_tuple:pack({?TX_IDS, Mega, Secs, Micro}, Prefix), erlfdb:set_option(Tx, next_write_no_write_conflict_range), -- cgit v1.2.1 From b5c446854b61fc771d393051a67a6649cb50b289 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 16 Oct 2019 15:46:19 -0400 Subject: Enable FDB transaction tracing To trace FDB transactions: 1. Enable tracing in erlfdb application environment: `network_options = [{trace_enable, ...}]` OR with an environment variable: `FDB_NETWORK_OPTION_TRACE_ENABLE = ""` 2. Set `[fabric] fdb_trace=true` configuration value 3. Add the `x-couchdb-fdb-trace:true` header to each request that should be traced. The transaction name is set to the nonce value, which is already used by CouchDB to track API requests. Only transactions started from the main request process will be traced. So if a process is spawned without inheriting the `erlfdb_trace` process dict key, that transaction will not be traced. --- src/chttpd/src/chttpd.erl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 868937f6d..a15537f85 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -238,6 +238,8 @@ handle_request_int(MochiReq) -> erlang:put(dont_log_request, true), erlang:put(dont_log_response, true), + maybe_trace_fdb(MochiReq:get_header_value("x-couchdb-fdb-trace")), + {HttpReq2, Response} = case before_request(HttpReq0) of {ok, HttpReq1} -> process_request(HttpReq1); @@ -1213,6 +1215,22 @@ get_user(#httpd{user_ctx = #user_ctx{name = User}}) -> get_user(#httpd{user_ctx = undefined}) -> "undefined". +maybe_trace_fdb("true") -> + % Remember to also enable tracing in erlfdb application environment: + % network_options = [{trace_enable, ...}] + % Or via the OS environment variable: + % FDB_NETWORK_OPTION_TRACE_ENABLE = "" + case config:get_boolean("fabric", "fdb_trace", false) of + true -> + Nonce = erlang:get(nonce), + erlang:put(erlfdb_trace, list_to_binary(Nonce)); + false -> + ok + end; +maybe_trace_fdb(_) -> + ok. + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -- cgit v1.2.1 From dd7a3822e85ac14d5c42bc069fe340da8220bddf Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 17 Oct 2019 17:43:10 -0400 Subject: Take better advantage of metadata version key feature FDB's metadata version key allows more efficient metadata invalidation (see https://github.com/apple/foundationdb/pull/1213). To take advantage of that feature update the caching logic to check the metadata version first, and if it is current, skip checking the db version altogether. When db version is bumped we now update the metadata version as well. There is a bit of a subtlety when the metadata version is stale. In that case we check the db version, and if that is current, we still don't reopen the database, instead we continue with the transaction. Then, after the transaction succeeds, we update the cached metadata version for that db handle. Next client would get the updated db metadata, it will be current, and they won't need to check the db version. If the db version is stale as well, then we throw a `reopen` exception and the handle gets removed from the cache and reopened. Note: this commit doesn't actually use the new metadata version key, it still uses the old plain key. That update will be a separate commit where we also start setting a new API version (610) and will only work on FDB version 6.1.x --- src/fabric/include/fabric2.hrl | 2 + src/fabric/src/fabric2_fdb.erl | 110 ++++++++++++++++++++++-------- src/fabric/test/fabric2_db_misc_tests.erl | 56 ++++++++++++++- 3 files changed, 137 insertions(+), 31 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 3e224987d..fe11e6b8d 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -59,8 +59,10 @@ -define(PDICT_DB_KEY, '$fabric_db_handle'). -define(PDICT_LAYER_CACHE, '$fabric_layer_id'). -define(PDICT_CHECKED_DB_IS_CURRENT, '$fabric_checked_db_is_current'). +-define(PDICT_CHECKED_MD_IS_CURRENT, '$fabric_checked_md_is_current'). -define(PDICT_TX_ID_KEY, '$fabric_tx_id'). -define(PDICT_TX_RES_KEY, '$fabric_tx_result'). +-define(PDICT_ON_COMMIT_FUN, '$fabric_on_commit_fun'). -define(COMMIT_UNKNOWN_RESULT, 1021). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 0f55d9175..2ccde1cb4 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -117,7 +117,11 @@ do_transaction(Fun, LayerPrefix) when is_function(Fun, 1) -> true -> get_previous_transaction_result(); false -> - execute_transaction(Tx, Fun, LayerPrefix) + try + execute_transaction(Tx, Fun, LayerPrefix) + after + erase({?PDICT_ON_COMMIT_FUN, Tx}) + end end end) after @@ -864,6 +868,31 @@ bump_metadata_version(Tx) -> erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>). +check_metadata_version(#{} = Db) -> + #{ + tx := Tx, + layer_prefix := LayerPrefix, + name := DbName, + md_version := Version + } = Db, + + AlreadyChecked = get(?PDICT_CHECKED_MD_IS_CURRENT), + if AlreadyChecked == true -> {current, Db}; true -> + case erlfdb:wait(erlfdb:get_ss(Tx, ?METADATA_VERSION_KEY)) of + Version -> + put(?PDICT_CHECKED_MD_IS_CURRENT, true), + % We want to set a read conflict on the db version as we'd want + % to to conflict with any writes to this particular db + DbPrefix = erlfdb_tuple:pack({?DBS, DbName}, LayerPrefix), + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + erlfdb:add_read_conflict_key(Tx, DbVersionKey), + {current, Db}; + NewVersion -> + {stale, Db#{md_version := NewVersion}} + end + end. + + bump_db_version(#{} = Db) -> #{ tx := Tx, @@ -872,7 +901,30 @@ bump_db_version(#{} = Db) -> DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), DbVersion = fabric2_util:uuid(), - ok = erlfdb:set(Tx, DbVersionKey, DbVersion). + ok = erlfdb:set(Tx, DbVersionKey, DbVersion), + ok = bump_metadata_version(Tx). + + +check_db_version(#{} = Db, CheckDbVersion) -> + #{ + tx := Tx, + db_prefix := DbPrefix, + db_version := DbVersion + } = Db, + + AlreadyChecked = get(?PDICT_CHECKED_DB_IS_CURRENT), + if not CheckDbVersion orelse AlreadyChecked == true -> Db; true -> + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, DbVersionKey)) of + DbVersion -> + put(?PDICT_CHECKED_DB_IS_CURRENT, true), + on_commit(Tx, fun() -> fabric2_server:store(Db) end), + Db; + _NewDBVersion -> + fabric2_server:remove(maps:get(name, Db)), + throw({?MODULE, reopen}) + end + end. write_doc_body(#{} = Db0, #doc{} = Doc) -> @@ -1171,34 +1223,9 @@ ensure_current(Db) -> ensure_current(#{} = Db, CheckDbVersion) -> require_transaction(Db), - - #{ - tx := Tx, - md_version := MetaDataVersion - } = Db, - - case erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) of - MetaDataVersion -> Db; - _NewVersion -> throw({?MODULE, reopen}) - end, - - AlreadyChecked = get(?PDICT_CHECKED_DB_IS_CURRENT), - if not CheckDbVersion orelse AlreadyChecked == true -> Db; true -> - #{ - db_prefix := DbPrefix, - db_version := DbVersion - } = Db, - - DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), - - case erlfdb:wait(erlfdb:get(Tx, DbVersionKey)) of - DbVersion -> - put(?PDICT_CHECKED_DB_IS_CURRENT, true), - Db; - _NewDBVersion -> - fabric2_server:remove(maps:get(name, Db)), - throw({?MODULE, reopen}) - end + case check_metadata_version(Db) of + {current, Db1} -> Db1; + {stale, Db1} -> check_db_version(Db1, CheckDbVersion) end. @@ -1222,12 +1249,14 @@ execute_transaction(Tx, Fun, LayerPrefix) -> erlfdb:set(Tx, get_transaction_id(Tx, LayerPrefix), <<>>), put(?PDICT_TX_RES_KEY, Result) end, + ok = run_on_commit_fun(Tx), Result. clear_transaction() -> fabric2_txids:remove(get(?PDICT_TX_ID_KEY)), erase(?PDICT_CHECKED_DB_IS_CURRENT), + erase(?PDICT_CHECKED_MD_IS_CURRENT), erase(?PDICT_TX_ID_KEY), erase(?PDICT_TX_RES_KEY). @@ -1259,3 +1288,24 @@ new_versionstamp(Tx) -> TxId = erlfdb:get_next_tx_id(Tx), {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF, TxId}. + +on_commit(Tx, Fun) when is_function(Fun, 0) -> + % Here we rely on Tx objects matching. However they contain a nif resource + % object. Before Erlang 20.0 those would have been represented as empty + % binaries and would have compared equal to each other. See + % http://erlang.org/doc/man/erl_nif.html for more info. We assume we run on + % Erlang 20+ here and don't worry about that anymore. + case get({?PDICT_ON_COMMIT_FUN, Tx}) of + undefined -> put({?PDICT_ON_COMMIT_FUN, Tx}, Fun); + _ -> error({?MODULE, on_commit_function_already_set}) + end. + + +run_on_commit_fun(Tx) -> + case get({?PDICT_ON_COMMIT_FUN, Tx}) of + undefined -> + ok; + Fun when is_function(Fun, 0) -> + Fun(), + ok + end. diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 8e6405632..913b6aa98 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2.hrl"). -define(TDEF(A), {atom_to_list(A), fun A/1}). @@ -34,7 +35,9 @@ misc_test_() -> fun set_revs_limit/1, fun set_security/1, fun is_system_db/1, - fun ensure_full_commit/1 + fun ensure_full_commit/1, + fun metadata_bump/1, + fun db_version_bump/1 ]} } }. @@ -111,3 +114,54 @@ is_system_db({DbName, Db, _}) -> ensure_full_commit({_, Db, _}) -> ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db)), ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db, 5)). + + +metadata_bump({DbName, _, _}) -> + % Call open again here to make sure we have a version in the cache + % as we'll be checking if that version gets its metadata bumped + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + + % Emulate a remote client bumping the metadataversion + {ok, Fdb} = application:get_env(fabric, db), + erlfdb:transactional(Fdb, fun(Tx) -> + erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>) + end), + NewMDVersion = erlfdb:transactional(Fdb, fun(Tx) -> + erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) + end), + + % Perform a random operation which calls ensure_current + {ok, _} = fabric2_db:get_db_info(Db), + + % Check that db handle in the cache got the new metadata version + ?assertMatch(#{md_version := NewMDVersion}, fabric2_server:fetch(DbName)). + + +db_version_bump({DbName, _, _}) -> + % Call open again here to make sure we have a version in the cache + % as we'll be checking if that version gets its metadata bumped + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + + % Emulate a remote client bumping db version. We don't go through the + % regular db open + update security doc or something like that to make sure + % we don't touch the local cache + #{db_prefix := DbPrefix} = Db, + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + {ok, Fdb} = application:get_env(fabric, db), + NewDbVersion = fabric2_util:uuid(), + erlfdb:transactional(Fdb, fun(Tx) -> + erlfdb:set(Tx, DbVersionKey, NewDbVersion), + erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>) + end), + + % Perform a random operation which calls ensure_current + {ok, _} = fabric2_db:get_db_info(Db), + + % After previous operation, the cache should have been cleared + ?assertMatch(undefined, fabric2_server:fetch(DbName)), + + % Call open again and check that we have the latest db version + {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + + % Check that db handle in the cache got the new metadata version + ?assertMatch(#{db_version := NewDbVersion}, Db2). -- cgit v1.2.1 From 7e881e23bbdb0a1803b1231274ceb2d0e754c0ff Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 1 Oct 2019 16:10:26 +0000 Subject: Remove compiler warning --- src/chttpd/src/chttpd_db.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index b65f79c19..35ec8679a 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -351,8 +351,6 @@ update_partition_stats(PathParts) -> handle_design_req(#httpd{ path_parts=[_DbName, _Design, Name, <<"_",_/binary>> = Action | _Rest] }=Req, Db) -> - DbName = fabric2_db:name(Db), -%% case ddoc_cache:open(DbName, <<"_design/", Name/binary>>) of case fabric2_db:open_doc(Db, <<"_design/", Name/binary>>) of {ok, DDoc} -> Handler = chttpd_handlers:design_handler(Action, fun bad_action_req/3), -- cgit v1.2.1 From e6f24ac85f63827a57eee295a8d97ba6e5528261 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 22 Oct 2019 18:33:30 -0400 Subject: Chunkify local docs Previously local docs were not chunkified and it was possible for replications which checkpointed a few dozen times to create local documents above the 100KB limit. Documents are chunkiefied according to the same scheme as the regular docs -- rev values are in a main `?DB_LOCAL_DOCS` subspace, and doc body chunks in a separate `?DB_LOCAL_DOC_BODIES` subspace that looks like: {?DB_LOCAL_DOC_BODIES, DocId, ChunkId} = BinaryChunk where `ChunkId` is an incrementing integer and BinaryChunk is a 100KB chunk of the term_to_binary of the body. We also go to some lengths to read and silently upgrade docs written with the old encoding. Upgrades happen on doc writes as a first step, to ensure stats update logic is not affected. --- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric2_db.erl | 14 +- src/fabric/src/fabric2_fdb.erl | 84 +++++-- src/fabric/test/fabric2_doc_crud_tests.erl | 107 ++++++++ src/fabric/test/fabric2_local_doc_fold_tests.erl | 304 +++++++++++++++++++++++ test/elixir/test/basics_test.exs | 2 + 6 files changed, 491 insertions(+), 21 deletions(-) create mode 100644 src/fabric/test/fabric2_local_doc_fold_tests.erl diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index fe11e6b8d..a5c12aef3 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -47,6 +47,7 @@ -define(DB_LOCAL_DOCS, 22). -define(DB_ATTS, 23). -define(DB_VIEWS, 24). +-define(DB_LOCAL_DOC_BODIES, 25). % Versions diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 9ef0bd358..e2674a480 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -782,7 +782,14 @@ fold_design_docs(Db, UserFun, UserAcc0, Options1) -> fold_docs(Db, UserFun, UserAcc0, Options2). -fold_local_docs(Db, UserFun, UserAcc0, Options) -> +fold_local_docs(Db, UserFun, UserAcc0, Options0) -> + % This is mostly for testing and sanity checking. When calling from a test + % namespace will be automatically set. We also assert when called from the + % API the correct namespace was set + Options = case lists:keyfind(namespace, 1, Options0) of + {namespace, <<"_local">>} -> Options0; + false -> [{namespace, <<"_local">>} | Options0] + end, fabric2_fdb:transactional(Db, fun(TxDb) -> try #{ @@ -796,12 +803,11 @@ fold_local_docs(Db, UserFun, UserAcc0, Options) -> UserAcc2 = fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> {DocId} = erlfdb_tuple:unpack(K, Prefix), - LDoc = fabric2_fdb:get_local_doc(TxDb, DocId, V), - #doc{revs = {Pos, [Rev]}} = LDoc, + Rev = fabric2_fdb:get_local_doc_rev(TxDb, DocId, V), maybe_stop(UserFun({row, [ {id, DocId}, {key, DocId}, - {value, {[{rev, couch_doc:rev_to_str({Pos, Rev})}]}} + {value, {[{rev, couch_doc:rev_to_str({0, Rev})}]}} ]}, Acc)) end, UserAcc1, Options), diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 2ccde1cb4..dc803115f 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -46,7 +46,7 @@ get_doc_body_future/3, get_doc_body_wait/4, get_local_doc/2, - get_local_doc/3, + get_local_doc_rev/3, write_doc/6, write_local_doc/2, @@ -519,13 +519,32 @@ get_local_doc(#{} = Db0, <> = DocId) -> } = Db = ensure_current(Db0), Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, DocId}, DbPrefix), - Val = erlfdb:wait(erlfdb:get(Tx, Key)), - fdb_to_local_doc(Db, DocId, Val). + Rev = erlfdb:wait(erlfdb:get(Tx, Key)), + Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, DocId}, DbPrefix), + Future = erlfdb:get_range_startswith(Tx, Prefix), + Chunks = lists:map(fun({_K, V}) -> V end, erlfdb:wait(Future)), -get_local_doc(#{} = Db, <> = DocId, Val) - when is_binary(Val) orelse Val =:= not_found -> - fdb_to_local_doc(ensure_current(Db), DocId, Val). + fdb_to_local_doc(Db, DocId, Rev, Chunks). + + +get_local_doc_rev(_Db0, <> = DocId, Val) -> + case Val of + <<131, _/binary>> -> + % Compatibility clause for an older encoding format + {Rev, _} = binary_to_term(Val, [safe]), + Rev; + <<_/binary>> -> + try binary_to_integer(Val) of + IntVal when IntVal >= 0 -> + Val; + _ -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + catch + error:badarg -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end + end. write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> @@ -647,19 +666,31 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> write_local_doc(#{} = Db0, Doc) -> #{ - tx := Tx + tx := Tx, + db_prefix := DbPrefix } = Db = ensure_current(Db0), - {LDocKey, LDocVal} = local_doc_to_fdb(Db, Doc), + Id = Doc#doc.id, + + {LDocKey, LDocVal, Rows} = local_doc_to_fdb(Db, Doc), WasDeleted = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of <<_/binary>> -> false; not_found -> true end, + BPrefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id}, DbPrefix), + case Doc#doc.deleted of - true -> erlfdb:clear(Tx, LDocKey); - false -> erlfdb:set(Tx, LDocKey, LDocVal) + true -> + erlfdb:clear(Tx, LDocKey), + erlfdb:clear_range_startswith(Tx, BPrefix); + false -> + erlfdb:set(Tx, LDocKey, LDocVal), + % Make sure to clear the whole range, in case there was a larger + % document body there before. + erlfdb:clear_range_startswith(Tx, BPrefix), + lists:foreach(fun({K, V}) -> erlfdb:set(Tx, K, V) end, Rows) end, case {WasDeleted, Doc#doc.deleted} of @@ -1066,26 +1097,45 @@ local_doc_to_fdb(Db, #doc{} = Doc) -> body = Body } = Doc, + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, Id}, DbPrefix), + StoreRev = case Rev of _ when is_integer(Rev) -> integer_to_binary(Rev); _ when is_binary(Rev) -> Rev end, - Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, Id}, DbPrefix), - Val = {StoreRev, Body}, - {Key, term_to_binary(Val, [{minor_version, 1}])}. + BVal = term_to_binary(Body, [{minor_version, 1}]), + {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> + K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix), + {{K, Chunk}, ChunkId + 1} + end, 0, chunkify_binary(BVal)), + + {Key, StoreRev, Rows}. -fdb_to_local_doc(_Db, DocId, Bin) when is_binary(Bin) -> - {Rev, Body} = binary_to_term(Bin, [safe]), +fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) -> + % This is an upgrade clause for the old encoding. We allow reading the old + % value and will perform an upgrade of the storage format on an update. + {Rev, Body} = binary_to_term(Val, [safe]), #doc{ id = DocId, revs = {0, [Rev]}, deleted = false, body = Body }; -fdb_to_local_doc(_Db, _DocId, not_found) -> - {not_found, missing}. + +fdb_to_local_doc(_Db, _DocId, not_found, []) -> + {not_found, missing}; + +fdb_to_local_doc(_Db, DocId, Rev, Rows) when is_list(Rows), is_binary(Rev) -> + BodyBin = iolist_to_binary(Rows), + Body = binary_to_term(BodyBin, [safe]), + #doc{ + id = DocId, + revs = {0, [Rev]}, + deleted = false, + body = Body + }. chunkify_binary(Data) -> diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index 3cb380827..255efefdc 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2.hrl"). doc_crud_test_() -> @@ -61,6 +62,9 @@ doc_crud_test_() -> fun recreate_local_doc/1, fun create_local_doc_bad_rev/1, fun create_local_doc_random_rev/1, + fun create_a_large_local_doc/1, + fun create_2_large_local_docs/1, + fun local_doc_with_previous_encoding/1, fun before_doc_update_skips_local_docs/1 ]} } @@ -765,6 +769,109 @@ create_local_doc_random_rev({Db, _}) -> ?assertEqual(Doc5#doc{revs = {0, [<<"2">>]}}, Doc6). +create_a_large_local_doc({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <>, + Body = << <<"x">> || _ <- lists:seq(1, 300000) >>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + body = Body + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc2} = fabric2_db:open_doc(Db, Doc1#doc.id, []), + ?assertEqual(Doc1#doc{revs = {0, [<<"1">>]}}, Doc2), + + % Read via fold_local_docs + {ok, Result} = fabric2_db:fold_local_docs(Db, fun(Data, Acc) -> + case Data of + {row, [{id, DocId} | _]} when LDocId =:= DocId -> + {ok, [Data | Acc]}; + _ -> + {ok, Acc} + end + end, [], []), + ?assertEqual([{row, [ + {id, LDocId}, + {key, LDocId}, + {value, {[{rev, <<"0-1">>}]}} + ]}], Result). + + +create_2_large_local_docs({Db, _}) -> + % Create a large doc then overwrite with a smaller one. The reason is to + % ensure the previous one correctly clears its range before writting the + % new smaller one it its place. + UUID = fabric2_util:uuid(), + LDocId = <>, + Body1 = << <<"x">> || _ <- lists:seq(1, 400000) >>, + Body2 = << <<"y">> || _ <- lists:seq(1, 150000) >>, + + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + body = Body1 + }, + + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + + Doc2 = Doc1#doc{body = Body2}, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc2)), + + {ok, Doc3} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc2#doc{revs = {0, [<<"1">>]}}, Doc3). + + +local_doc_with_previous_encoding({Db, _}) -> + #{db_prefix := DbPrefix} = Db, + + Id = <<"_local/old_doc">>, + Body = {[{<<"x">>, 5}]}, + Rev = <<"1">>, + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, Id}, DbPrefix), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + Term = term_to_binary({Rev, Body}, [{minor_version, 1}]), + ok = erlfdb:set(Tx, Key, Term) + end), + + % Read old doc + {ok, Doc1} = fabric2_db:open_doc(Db, Id, []), + ?assertEqual({0, [<<"1">>]}, Doc1#doc.revs), + ?assertEqual({[{<<"x">>, 5}]}, Doc1#doc.body), + + % Read via fold_local_docs. + {ok, Result} = fabric2_db:fold_local_docs(Db, fun(Data, Acc) -> + case Data of + {row, [{id, DocId} | _]} when Id =:= DocId -> + {ok, [Data | Acc]}; + _ -> + {ok, Acc} + end + end, [], []), + ?assertEqual([{row, [ + {id, Id}, + {key, Id}, + {value, {[{rev, <<"0-1">>}]}} + ]}], Result), + + % Update doc + NewBody = {[{<<"y">>, 6}]}, + Doc2 = Doc1#doc{body = NewBody}, + ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc2)), + {ok, Doc3} = fabric2_db:open_doc(Db, Doc2#doc.id, []), + ?assertEqual({0, [<<"2">>]}, Doc3#doc.revs), + ?assertEqual(NewBody, Doc3#doc.body), + + % Old doc now has only the rev number in it + OldDocBin = fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + erlfdb:wait(erlfdb:get(Tx, Key)) + end), + ?assertEqual(<<"2">> , OldDocBin). + + before_doc_update_skips_local_docs({Db0, _}) -> BduFun = fun(Doc, _, _) -> diff --git a/src/fabric/test/fabric2_local_doc_fold_tests.erl b/src/fabric/test/fabric2_local_doc_fold_tests.erl new file mode 100644 index 000000000..82203b433 --- /dev/null +++ b/src/fabric/test/fabric2_local_doc_fold_tests.erl @@ -0,0 +1,304 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_local_doc_fold_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +-define(DOC_COUNT, 50). + +%% eunit implementation of {with, Tests} doesn't detect test name correctly +with(Tests) -> + fun(ArgsTuple) -> + [{Name, ?_test(Fun(ArgsTuple))} || {Name, Fun} <- Tests] + ++ + [{Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} || {Name, Timeout, Fun} <- Tests] + end. + +-define(NAMED(A), {atom_to_list(A), fun A/1}). +-define(WITH_TIMEOUT(Timeout, A), {atom_to_list(A), Timeout, fun A/1}). + +doc_fold_test_() -> + { + "Test local document fold operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?NAMED(fold_local_docs_basic), + ?NAMED(fold_local_docs_rev), + ?NAMED(fold_local_docs_with_start_key), + ?NAMED(fold_local_docs_with_end_key), + ?NAMED(fold_local_docs_with_both_keys_the_same), + ?WITH_TIMEOUT(15000, fold_local_docs_with_different_keys), + ?NAMED(fold_local_docs_with_limit), + ?NAMED(fold_local_docs_with_skip), + ?NAMED(fold_local_docs_with_skip_and_limit) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DocIdRevs = lists:map(fun(Val) -> + UUID = fabric2_util:uuid(), + DocId = <>, + % Every 10th doc is large to force the doc to be chunkified + BigChunk = << <<"x">> || _ <- lists:seq(1, 200000) >>, + Body = case Val rem 10 == 0 of + true -> {[{<<"value">>, BigChunk}]}; + false -> {[{<<"value">>, Val}]} + end, + Doc = #doc{ + id = DocId, + body = Body + }, + {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), + {DocId, {[{rev, couch_doc:rev_to_str(Rev)}]}} + end, lists:seq(1, ?DOC_COUNT)), + {Db, lists:sort(DocIdRevs), Ctx}. + + +cleanup({Db, _DocIdRevs, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +fold_local_docs_basic({Db, DocIdRevs, _}) -> + {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], []), + ?assertEqual(DocIdRevs, lists:reverse(Rows)). + + +fold_local_docs_rev({Db, DocIdRevs, _}) -> + Opts = [{dir, rev}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, Rows). + + +fold_local_docs_with_start_key({Db, DocIdRevs, _}) -> + {StartKey, _} = hd(DocIdRevs), + Opts = [{start_key, StartKey}], + {ok, {?DOC_COUNT, Rows}} + = fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, lists:reverse(Rows)), + if length(DocIdRevs) == 1 -> ok; true -> + fold_local_docs_with_start_key({Db, tl(DocIdRevs), nil}) + end. + + +fold_local_docs_with_end_key({Db, DocIdRevs, _}) -> + RevDocIdRevs = lists:reverse(DocIdRevs), + {EndKey, _} = hd(RevDocIdRevs), + Opts = [{end_key, EndKey}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(RevDocIdRevs, Rows), + if length(DocIdRevs) == 1 -> ok; true -> + fold_local_docs_with_end_key({Db, lists:reverse(tl(RevDocIdRevs)), nil}) + end. + + +fold_local_docs_with_both_keys_the_same({Db, DocIdRevs, _}) -> + lists:foreach(fun({DocId, _} = Row) -> + check_all_combos(Db, DocId, DocId, [Row]) + end, DocIdRevs). + + +fold_local_docs_with_different_keys({Db, DocIdRevs, _}) -> + lists:foreach(fun(_) -> + {StartKey, EndKey, Rows} = pick_range(DocIdRevs), + check_all_combos(Db, StartKey, EndKey, Rows) + end, lists:seq(1, 100)). + + +fold_local_docs_with_limit({Db, DocIdRevs, _}) -> + lists:foreach(fun(Limit) -> + Opts1 = [{limit, Limit}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:sublist(DocIdRevs, Limit), lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts2), + ?assertEqual( + lists:sublist(lists:reverse(DocIdRevs), Limit), + lists:reverse(Rows2) + ) + end, lists:seq(0, 51)). + + +fold_local_docs_with_skip({Db, DocIdRevs, _}) -> + lists:foreach(fun(Skip) -> + Opts1 = [{skip, Skip}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts1), + Expect1 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, DocIdRevs) + end, + ?assertEqual(Expect1, lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, lists:reverse(DocIdRevs)) + end, + ?assertEqual(Expect2, lists:reverse(Rows2)) + end, lists:seq(0, 51)). + + +fold_local_docs_with_skip_and_limit({Db, DocIdRevs, _}) -> + lists:foreach(fun(_) -> + check_skip_and_limit(Db, [], DocIdRevs), + check_skip_and_limit(Db, [{dir, rev}], lists:reverse(DocIdRevs)) + end, lists:seq(1, 100)). + + +check_all_combos(Db, StartKey, EndKey, Rows) -> + Opts1 = make_opts(fwd, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:reverse(Rows), Rows1), + check_skip_and_limit(Db, Opts1, Rows), + + Opts2 = make_opts(fwd, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = if EndKey == undefined -> lists:reverse(Rows); true -> + lists:reverse(all_but_last(Rows)) + end, + ?assertEqual(Expect2, Rows2), + check_skip_and_limit(Db, Opts2, lists:reverse(Expect2)), + + Opts3 = make_opts(rev, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows3}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts3), + ?assertEqual(Rows, Rows3), + check_skip_and_limit(Db, Opts3, lists:reverse(Rows)), + + Opts4 = make_opts(rev, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows4}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts4), + Expect4 = if StartKey == undefined -> Rows; true -> + tl(Rows) + end, + ?assertEqual(Expect4, Rows4), + check_skip_and_limit(Db, Opts4, lists:reverse(Expect4)). + + +check_skip_and_limit(Db, Opts, []) -> + Skip = rand:uniform(?DOC_COUNT + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1) - 1, + NewOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, OutRows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], NewOpts), + ?assertEqual([], OutRows); + +check_skip_and_limit(Db, Opts, Rows) -> + Skip = rand:uniform(length(Rows) + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1 - Skip) - 1, + + ExpectRows = case Skip >= length(Rows) of + true -> + []; + false -> + lists:sublist(lists:nthtail(Skip, Rows), Limit) + end, + + SkipLimitOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, RevRows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], SkipLimitOpts), + OutRows = lists:reverse(RevRows), + ?assertEqual(ExpectRows, OutRows). + + +make_opts(fwd, StartKey, EndKey, InclusiveEnd) -> + DirOpts = case rand:uniform() =< 0.50 of + true -> [{dir, fwd}]; + false -> [] + end, + StartOpts = case StartKey of + undefined -> []; + <<_/binary>> -> [{start_key, StartKey}] + end, + EndOpts = case EndKey of + undefined -> []; + <<_/binary>> when InclusiveEnd -> [{end_key, EndKey}]; + <<_/binary>> -> [{end_key_gt, EndKey}] + end, + DirOpts ++ StartOpts ++ EndOpts; +make_opts(rev, StartKey, EndKey, InclusiveEnd) -> + BaseOpts = make_opts(fwd, EndKey, StartKey, InclusiveEnd), + [{dir, rev}] ++ BaseOpts -- [{dir, fwd}]. + + +all_but_last([]) -> + []; +all_but_last([_]) -> + []; +all_but_last(Rows) -> + lists:sublist(Rows, length(Rows) - 1). + + +pick_range(DocIdRevs) -> + {StartKey, StartRow, RestRows} = pick_start_key(DocIdRevs), + {EndKey, EndRow, RowsBetween} = pick_end_key(RestRows), + {StartKey, EndKey, StartRow ++ RowsBetween ++ EndRow}. + + +pick_start_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + {DocId, [Row], lists:nthtail(Idx, Rows)} + end. + + +pick_end_key([]) -> + {undefined, [], []}; + +pick_end_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + Tail = lists:nthtail(Idx, Rows), + {DocId, [Row], Rows -- [Row | Tail]} + end. + + +fold_fun({meta, Meta}, _Acc) -> + Total = fabric2_util:get_value(total, Meta), + {ok, {Total, []}}; +fold_fun({row, Row}, {Total, Rows}) -> + RowId = fabric2_util:get_value(id, Row), + RowId = fabric2_util:get_value(key, Row), + RowRev = fabric2_util:get_value(value, Row), + {ok, {Total, [{RowId, RowRev} | Rows]}}; +fold_fun(complete, Acc) -> + {ok, Acc}. diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index 35bace2ac..21e05dfcf 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -454,6 +454,8 @@ defmodule BasicsTest do assert Map.has_key?(val, "rev") # Add _local/doc5 + # Use a body > 100Kb to tests local docs chunkifier + body = %{:b => String.duplicate("b", 110_000)} assert Couch.put("/#{db_name}/_local/doc5", body: body).body["ok"] resp = Couch.get("/#{db_name}/_local_docs") assert resp.status_code == 200 -- cgit v1.2.1 From 253a48b524697e0c79798495c73871190512ab6d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 24 Oct 2019 14:30:01 -0400 Subject: Add a special error for an invalid legacy local doc revsion Since we are dealing with upgrades and both versions start out as binaries, make sure we add extra belts and suspenders to surface any issues with encoding errors. --- src/fabric/src/fabric2_fdb.erl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index dc803115f..c59346ebd 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -532,8 +532,13 @@ get_local_doc_rev(_Db0, <> = DocId, Val) -> case Val of <<131, _/binary>> -> % Compatibility clause for an older encoding format - {Rev, _} = binary_to_term(Val, [safe]), - Rev; + try binary_to_term(Val, [safe]) of + {Rev, _} -> Rev; + _ -> erlang:error({invalid_local_doc_rev, DocId, Val}) + catch + error:badarg -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end; <<_/binary>> -> try binary_to_integer(Val) of IntVal when IntVal >= 0 -> -- cgit v1.2.1 From a1425f0f34f4589865c72149a8e9afb1d79ec208 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 28 Oct 2019 15:33:53 +0200 Subject: add test to prove we can view swap --- test/elixir/test/map_test.exs | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs index 04361ba87..fa1758767 100644 --- a/test/elixir/test/map_test.exs +++ b/test/elixir/test/map_test.exs @@ -436,6 +436,73 @@ defmodule ViewMapTest do assert Enum.at(rows, 9)["value"] == "multiple values!" end + test "can do design doc swap", context do + db_name = context[:db_name] + + docs = [ + %{_id: "doc1", foo: "foo", bar: "bar"}, + %{ + _id: "_design/view1", + views: %{ + view: %{ + map: """ + function (doc) { + if (!doc.foo) { + return; + } + emit(doc.foo); + } + """ + } + } + }, + %{ + _id: "_design/view2", + views: %{ + view: %{ + map: """ + function (doc) { + if (!doc.bar) { + return; + } + emit(doc.bar); + } + """ + } + } + } + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}) + assert resp.status_code == 201 + + url1 = "/#{db_name}/_design/view1/_view/view" + url2 = "/#{db_name}/_design/view2/_view/view" + + resp = Couch.get(url1) + assert resp.status_code == 200 + keys = get_keys(resp) + assert keys == ["foo"] + + resp = Couch.get(url2) + assert resp.status_code == 200 + keys = get_keys(resp) + assert keys == ["bar"] + + view1 = Couch.get("/#{db_name}/_design/view1") + view2 = Couch.get("/#{db_name}/_design/view2") + + new_view1 = Map.replace!(view1.body, "views", view2.body["views"]) + + resp = Couch.put("/#{db_name}/_design/view1", body: new_view1) + assert resp.status_code in [201, 202] + + resp = Couch.get(url1, query: %{update: false}) + assert resp.status_code == 200 + keys = get_keys(resp) + assert keys == ["bar"] + end + def update_doc_value(db_name, id, value) do resp = Couch.get("/#{db_name}/#{id}") doc = convert(resp.body) -- cgit v1.2.1 From 31f88cf60e65bc7a5eed1e7af58341f508ba406a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 30 Oct 2019 09:53:12 -0400 Subject: Use "\xFF/metadataVersion" key for checking metadata Need to use FDB version 6.1+ and erlfdb version that has this commit: https://github.com/cloudant-labs/couchdb-erlfdb/commit/7718a3d7e1994e1384c56d39fae5cad3d8c6c4b3 Since fabric2.hrl is a public include now, use that in couch_jobs to avoid redefining a bunch of things. --- src/couch_jobs/src/couch_jobs.hrl | 5 ++--- src/fabric/include/fabric2.hrl | 13 +------------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs.hrl b/src/couch_jobs/src/couch_jobs.hrl index 2a02d760f..055bf091c 100644 --- a/src/couch_jobs/src/couch_jobs.hrl +++ b/src/couch_jobs/src/couch_jobs.hrl @@ -10,6 +10,8 @@ % License for the specific language governing permissions and limitations under % the License. +-include_lib("fabric/include/fabric2.hrl"). + % Job map/json field definitions % @@ -20,10 +22,7 @@ % These might be in a fabric public hrl eventually % --define(uint2bin(I), binary:encode_unsigned(I, little)). --define(bin2uint(I), binary:decode_unsigned(I, little)). -define(UNSET_VS, {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF}). --define(METADATA_VERSION_KEY, <<"$metadata_version_key$">>). % Data model definitions % diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index a5c12aef3..189995de2 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -13,18 +13,7 @@ -define(uint2bin(I), binary:encode_unsigned(I, little)). -define(bin2uint(I), binary:decode_unsigned(I, little)). - -% This will eventually be the `\xFFmetadataVersion` key that is -% currently only available in FoundationDB master. -% -% https://forums.foundationdb.org/t/a-new-tool-for-managing-layer-metadata/1191 -% -% Until then we'll fake the same behavior using a randomish -% key for tracking metadata changse. Once we get to the -% new feature this will be more performant by updating -% this define. --define(METADATA_VERSION_KEY, <<"$metadata_version_key$">>). - +-define(METADATA_VERSION_KEY, <<16#FF, "/metadataVersion">>). % Prefix Definitions -- cgit v1.2.1 From eb4d0d8435532ebc5b8b590e980def71f3a7e800 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 30 Oct 2019 13:32:01 -0500 Subject: Abandon a view job if the db or ddoc is deleted If we don't explicitly bail out of running the job it will loop indefinitely in the couch_jobs retry logic. --- src/couch_views/src/couch_views_indexer.erl | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 55ce06311..7c05c1d60 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -42,8 +42,27 @@ init() -> <<"sig">> := JobSig } = Data, - {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), - {ok, DDoc} = fabric2_db:open_doc(Db, DDocId), + {ok, Db} = try + fabric2_db:open(DbName, [?ADMIN_CTX]) + catch error:database_does_not_exist -> + couch_jobs:finish(undefined, Job, Data#{ + error => db_deleted, + reason => "Database was deleted" + }), + exit(normal) + end, + + {ok, DDoc} = case fabric2_db:open_doc(Db, DDocId) of + {ok, DDoc0} -> + {ok, DDoc0}; + {not_found, _} -> + couch_jobs:finish(undefined, Job, Data#{ + error => ddoc_deleted, + reason => "Design document was deleted" + }), + exit(normal) + end, + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), HexSig = fabric2_util:to_hex(Mrst#mrst.sig), -- cgit v1.2.1 From 7815e8030930b724e8e67dc65565ba9e1bbdecf1 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 1 Oct 2019 16:01:08 +0000 Subject: Pass contexts to fabric2_db functions Since the db structure returned from fabric2_db:open and fabric2_db:create includes `user_ctx` there is no need to pass it explicitly in every `fabric2_db` call. This means we can simplify few things: - Don't pass user_ctx in `chttpd_db:db_req/2` since we pass db already - Don't have to use `db_open_options` in `chttpd_changes` - Don't have to pass `user_ctx` to `fabric2_db:open_doc` and `fabric2_db:update_doc` --- src/chttpd/src/chttpd_db.erl | 103 ++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 61 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 35ec8679a..dbd52be40 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -93,10 +93,7 @@ handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> send_method_not_allowed(Req, "GET,POST,HEAD"). handle_changes_req1(#httpd{}=Req, Db) -> - #changes_args{filter=Raw, style=Style} = Args0 = parse_changes_query(Req), - ChangesArgs = Args0#changes_args{ - db_open_options = [{user_ctx, fabric2_db:get_user_ctx(Db)}] - }, + ChangesArgs = parse_changes_query(Req), ChangesFun = chttpd_changes:handle_db_changes(ChangesArgs, Req, Db), Max = chttpd:chunked_response_buffer_size(), case ChangesArgs#changes_args.feed of @@ -376,10 +373,10 @@ handle_design_info_req(#httpd{method='GET'}=Req, Db, #doc{} = DDoc) -> handle_design_info_req(Req, _Db, _DDoc) -> send_method_not_allowed(Req, "GET"). -create_db_req(#httpd{}=Req, DbName) -> +create_db_req(#httpd{user_ctx=Ctx}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), DocUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), - case fabric2_db:create(DbName, []) of + case fabric2_db:create(DbName, [{user_ctx, Ctx}]) of {ok, _} -> send_json(Req, 201, [{"Location", DocUrl}], {[{ok, true}]}); {error, file_exists} -> @@ -388,9 +385,9 @@ create_db_req(#httpd{}=Req, DbName) -> throw(Error) end. -delete_db_req(#httpd{}=Req, DbName) -> +delete_db_req(#httpd{user_ctx=Ctx}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), - case fabric2_db:delete(DbName, []) of + case fabric2_db:delete(DbName, [{user_ctx, Ctx}]) of ok -> send_json(Req, 200, {[{ok, true}]}); Error -> @@ -409,11 +406,9 @@ db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> couch_stats:update_histogram([couchdb, dbinfo], DeltaT), send_json(Req, {DbInfo}); -db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> +db_req(#httpd{method='POST', path_parts=[DbName]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - Options = [{user_ctx,Ctx}], - Doc0 = chttpd:json_body(Req), Doc1 = couch_doc:from_json_obj_validate(Doc0, fabric2_db:name(Db)), Doc2 = case Doc1#doc.id of @@ -427,7 +422,7 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> "ok" -> % async_batching spawn(fun() -> - case catch(fabric2_db:update_doc(Db, Doc2, Options)) of + case catch(fabric2_db:update_doc(Db, Doc2, [])) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -447,7 +442,7 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> % normal DocUrl = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), - case fabric2_db:update_doc(Db, Doc2, Options) of + case fabric2_db:update_doc(Db, Doc2, []) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -465,8 +460,8 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> db_req(#httpd{path_parts=[_DbName]}=Req, _Db) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST"); -db_req(#httpd{method='POST', path_parts=[_DbName, <<"_ensure_full_commit">>], - user_ctx=Ctx}=Req, Db) -> +db_req(#httpd{method='POST', path_parts=[_DbName, <<"_ensure_full_commit">>] + }=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), #{db_prefix := <<_/binary>>} = Db, send_json(Req, 201, {[ @@ -477,7 +472,7 @@ db_req(#httpd{method='POST', path_parts=[_DbName, <<"_ensure_full_commit">>], db_req(#httpd{path_parts=[_,<<"_ensure_full_commit">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); -db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, Db) -> +db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> couch_stats:increment_counter([couchdb, httpd, bulk_requests]), chttpd:validate_ctype(Req, "application/json"), {JsonProps} = chttpd:json_body_obj(Req), @@ -490,13 +485,13 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, DocsArray0 end, couch_stats:update_histogram([couchdb, httpd, bulk_docs], length(DocsArray)), - case chttpd:header_value(Req, "X-Couch-Full-Commit") of + Options = case chttpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> - Options = [full_commit, {user_ctx,Ctx}]; + [full_commit]; "false" -> - Options = [delay_commit, {user_ctx,Ctx}]; + [delay_commit]; _ -> - Options = [{user_ctx,Ctx}] + [] end, DbName = fabric2_db:name(Db), Docs = lists:map(fun(JsonObj) -> @@ -561,10 +556,8 @@ db_req(#httpd{method='POST', path_parts=[_, <<"_bulk_get">>], throw({bad_request, <<"Missing JSON list of 'docs'.">>}); Docs -> #doc_query_args{ - options = Options0 + options = Options } = bulk_get_parse_doc_query(Req), - Options = [{user_ctx, Req#httpd.user_ctx} | Options0], - AcceptJson = MochiReq:accepts_content_type("application/json"), AcceptMixedMp = MochiReq:accepts_content_type("multipart/mixed"), AcceptRelatedMp = MochiReq:accepts_content_type("multipart/related"), @@ -629,7 +622,6 @@ db_req(#httpd{path_parts=[_, <<"_bulk_get">>]}=Req, _Db) -> db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> couch_stats:increment_counter([couchdb, httpd, purge_requests]), chttpd:validate_ctype(Req, "application/json"), - Options = [{user_ctx, Req#httpd.user_ctx}], {IdsRevs} = chttpd:json_body_obj(Req), IdsRevs2 = [{Id, couch_doc:parse_revs(Revs)} || {Id, Revs} <- IdsRevs], MaxIds = config:get_integer("purge", "max_document_id_number", 100), @@ -646,7 +638,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> true -> ok end, couch_stats:increment_counter([couchdb, document_purges, total], length(IdsRevs2)), - Results2 = case fabric:purge_docs(Db, IdsRevs2, Options) of + Results2 = case fabric:purge_docs(Db, IdsRevs2, []) of {ok, Results} -> chttpd_stats:incr_writes(length(Results)), Results; @@ -758,8 +750,7 @@ db_req(#httpd{method='GET',path_parts=[_,<<"_security">>]}=Req, Db) -> db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); -db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>],user_ctx=Ctx}=Req, - Db) -> +db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>]}=Req, Db) -> Limit = chttpd:json_body(Req), ok = fabric2_db:set_revs_limit(Db, Limit), send_json(Req, {[{<<"ok">>, true}]}); @@ -771,10 +762,9 @@ db_req(#httpd{path_parts=[_,<<"_revs_limit">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); db_req(#httpd{method='PUT',path_parts=[_,<<"_purged_infos_limit">>]}=Req, Db) -> - Options = [{user_ctx, Req#httpd.user_ctx}], case chttpd:json_body(Req) of Limit when is_integer(Limit), Limit > 0 -> - case fabric:set_purge_infos_limit(Db, Limit, Options) of + case fabric:set_purge_infos_limit(Db, Limit, []) of ok -> send_json(Req, {[{<<"ok">>, true}]}); Error -> @@ -822,7 +812,6 @@ db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> db_attachment_req(Req, Db, DocId, FileNameParts). multi_all_docs_view(Req, Db, OP, Queries) -> - UserCtx = Req#httpd.user_ctx, Args0 = couch_mrview_http:parse_params(Req, undefined), Args1 = Args0#mrargs{view_type=map}, ArgQueries = lists:map(fun({Query}) -> @@ -843,16 +832,15 @@ multi_all_docs_view(Req, Db, OP, Queries) -> }, VAcc1 = lists:foldl(fun (#mrargs{keys = undefined} = Args, Acc0) -> - send_all_docs(Db, Args, UserCtx, Acc0); + send_all_docs(Db, Args, Acc0); (#mrargs{keys = Keys} = Args, Acc0) when is_list(Keys) -> - send_all_docs_keys(Db, Args, UserCtx, Acc0) + send_all_docs_keys(Db, Args, Acc0) end, VAcc0, ArgQueries), {ok, Resp1} = chttpd:send_delayed_chunk(VAcc1#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). all_docs_view(Req, Db, Keys, OP) -> - UserCtx = Req#httpd.user_ctx, Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), Args1 = Args0#mrargs{view_type=map}, Args2 = couch_views_util:validate_args(Args1), @@ -865,17 +853,17 @@ all_docs_view(Req, Db, Keys, OP) -> }, case Args3#mrargs.keys of undefined -> - VAcc1 = send_all_docs(Db, Args3, UserCtx, VAcc0), + VAcc1 = send_all_docs(Db, Args3, VAcc0), {ok, VAcc1#vacc.resp}; Keys when is_list(Keys) -> - VAcc1 = send_all_docs_keys(Db, Args3, UserCtx, VAcc0), + VAcc1 = send_all_docs_keys(Db, Args3, VAcc0), {ok, VAcc2} = view_cb(complete, VAcc1), {ok, VAcc2#vacc.resp} end. -send_all_docs(Db, #mrargs{keys = undefined} = Args, UserCtx, VAcc0) -> - Opts = all_docs_view_opts(Args, UserCtx), +send_all_docs(Db, #mrargs{keys = undefined} = Args, VAcc0) -> + Opts = all_docs_view_opts(Args), NS = couch_util:get_value(namespace, Opts), FoldFun = case NS of <<"_all_docs">> -> fold_docs; @@ -888,7 +876,7 @@ send_all_docs(Db, #mrargs{keys = undefined} = Args, UserCtx, VAcc0) -> VAcc1. -send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> +send_all_docs_keys(Db, #mrargs{} = Args, VAcc0) -> Keys = apply_args_to_keylist(Args, Args#mrargs.keys), NS = couch_util:get_value(namespace, Args#mrargs.extra), TotalRows = fabric2_db:get_doc_count(Db, NS), @@ -903,7 +891,7 @@ send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> DocOpts = case Args#mrargs.conflicts of true -> [conflicts | Args#mrargs.doc_options]; _ -> Args#mrargs.doc_options - end ++ [{user_ctx, UserCtx}], + end, IncludeDocs = Args#mrargs.include_docs, lists:foldl(fun(DocId, Acc) -> OpenOpts = [deleted | DocOpts], @@ -946,7 +934,7 @@ send_all_docs_keys(Db, #mrargs{} = Args, UserCtx, VAcc0) -> end, VAcc1, Keys). -all_docs_view_opts(Args, UserCtx) -> +all_docs_view_opts(Args) -> NS = couch_util:get_value(namespace, Args#mrargs.extra), StartKey = case Args#mrargs.start_key of undefined -> Args#mrargs.start_key_docid; @@ -966,7 +954,6 @@ all_docs_view_opts(Args, UserCtx) -> {undefined, _} -> [] end, [ - {user_ctx, UserCtx}, {dir, Args#mrargs.direction}, {limit, Args#mrargs.limit}, {skip, Args#mrargs.skip}, @@ -1000,7 +987,7 @@ view_cb({row, Row}, {iter, Db, Args, VAcc}) -> DocOpts = case Args#mrargs.conflicts of true -> [conflicts | Args#mrargs.doc_options]; _ -> Args#mrargs.doc_options - end ++ [{user_ctx, (VAcc#vacc.req)#httpd.user_ctx}], + end, OpenOpts = [deleted | DocOpts], DocMember = case fabric2_db:open_doc(Db, DocId, OpenOpts) of {not_found, missing} -> @@ -1051,10 +1038,9 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> #doc_query_args{ rev = Rev, open_revs = Revs, - options = Options0, + options = Options, atts_since = AttsSince } = parse_doc_query(Req), - Options = [{user_ctx, Req#httpd.user_ctx} | Options0], case Revs of [] -> Options2 = @@ -1101,13 +1087,11 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> end end; -db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> +db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> couch_httpd:validate_referer(Req), fabric2_db:validate_docid(DocId), chttpd:validate_ctype(Req, "multipart/form-data"), - Options = [{user_ctx,Ctx}], - Form = couch_httpd:parse_form(Req), case proplists:is_defined("_doc", Form) of true -> @@ -1144,7 +1128,7 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> NewDoc = Doc#doc{ atts = UpdatedAtts ++ OldAtts2 }, - case fabric2_db:update_doc(Db, NewDoc, Options) of + case fabric2_db:update_doc(Db, NewDoc, []) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1158,15 +1142,13 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> {rev, couch_doc:rev_to_str(NewRev)} ]}); -db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> +db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> #doc_query_args{ update_type = UpdateType } = parse_doc_query(Req), DbName = fabric2_db:name(Db), fabric2_db:validate_docid(DocId), - Options = [{user_ctx, Ctx}], - Loc = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), RespHeaders = [{"Location", Loc}], @@ -1193,7 +1175,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> Doc = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), spawn(fun() -> - case catch(fabric2_db:update_doc(Db, Doc, Options)) of + case catch(fabric2_db:update_doc(Db, Doc, [])) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -1216,7 +1198,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> end end; -db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> +db_doc_req(#httpd{method='COPY'}=Req, Db, SourceDocId) -> SourceRev = case extract_header_rev(Req, chttpd:qs_value(Req, "rev")) of missing_rev -> nil; @@ -1228,7 +1210,7 @@ db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> Doc = couch_doc_open(Db, SourceDocId, SourceRev, []), % save new doc case fabric2_db:update_doc(Db, - Doc#doc{id=TargetDocId, revs=TargetRevs}, [{user_ctx,Ctx}]) of + Doc#doc{id=TargetDocId, revs=TargetRevs}, []) of {ok, NewTargetRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1424,16 +1406,16 @@ send_updated_doc(Req, Db, DocId, Json) -> send_updated_doc(Req, Db, DocId, Doc, Headers) -> send_updated_doc(Req, Db, DocId, Doc, Headers, interactive_edit). -send_updated_doc(#httpd{user_ctx=Ctx} = Req, Db, DocId, #doc{deleted=Deleted}=Doc, +send_updated_doc(#httpd{} = Req, Db, DocId, #doc{deleted=Deleted}=Doc, Headers, UpdateType) -> Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> - [full_commit, UpdateType, {user_ctx,Ctx}]; + [full_commit, UpdateType]; "false" -> - [delay_commit, UpdateType, {user_ctx,Ctx}]; + [delay_commit, UpdateType]; _ -> - [UpdateType, {user_ctx,Ctx}] + [UpdateType] end, {Status, {etag, Etag}, Body} = update_doc(Db, DocId, #doc{deleted=Deleted}=Doc, Options), @@ -1642,7 +1624,6 @@ db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNa db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) when (Method == 'PUT') or (Method == 'DELETE') -> #httpd{ - user_ctx = Ctx, mochi_req = MochiReq } = Req, FileName = validate_attachment_name( @@ -1727,7 +1708,7 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) fabric2_db:validate_docid(DocId), #doc{id=DocId}; Rev -> - case fabric2_db:open_doc_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of + case fabric2_db:open_doc_revs(Db, DocId, [Rev], []) of {ok, [{ok, Doc0}]} -> chttpd_stats:incr_reads(), Doc0; @@ -1742,7 +1723,7 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) DocEdited = Doc#doc{ atts = NewAtt ++ [A || A <- Atts, couch_att:fetch(name, A) /= FileName] }, - case fabric2_db:update_doc(Db, DocEdited, [{user_ctx,Ctx}]) of + case fabric2_db:update_doc(Db, DocEdited, []) of {ok, UpdatedRev} -> chttpd_stats:incr_writes(), HttpCode = 201; -- cgit v1.2.1 From c6f5d82427935ebc387629db9800eee04128330d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 4 Nov 2019 15:53:58 -0500 Subject: Ensure we can create partitioned design docs with FDB Users should be able to replicate their partitioned dbs to the new environment. --- src/couch_mrview/src/couch_mrview.erl | 14 ++------------ src/fabric/test/fabric2_doc_crud_tests.erl | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index 02e11d031..880dfa725 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -181,7 +181,7 @@ validate(Db, DDoc) -> validate(DbName, IsPartitioned, DDoc). -validate(DbName, IsDbPartitioned, DDoc) -> +validate(DbName, _IsDbPartitioned, DDoc) -> ok = validate_ddoc_fields(DDoc#doc.body), GetName = fun (#mrview{map_names = [Name | _]}) -> Name; @@ -208,19 +208,9 @@ validate(DbName, IsDbPartitioned, DDoc) -> end, {ok, #mrst{ language = Lang, - views = Views, - partitioned = Partitioned + views = Views }} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), - case {IsDbPartitioned, Partitioned} of - {false, true} -> - throw({invalid_design_doc, - <<"partitioned option cannot be true in a " - "non-partitioned database.">>}); - {_, _} -> - ok - end, - try Views =/= [] andalso couch_query_servers:get_os_process(Lang) of false -> ok; diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index 255efefdc..a9085be77 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -33,6 +33,7 @@ doc_crud_test_() -> fun create_ddoc_requires_admin/1, fun create_ddoc_requires_validation/1, fun create_ddoc_requires_compilation/1, + fun can_create_a_partitioned_ddoc/1, fun update_doc_basic/1, fun update_ddoc_basic/1, fun update_doc_replicated/1, @@ -108,6 +109,23 @@ create_ddoc_basic({Db, _}) -> ?assertEqual({ok, NewDoc}, fabric2_db:open_doc(Db, Doc#doc.id)). +can_create_a_partitioned_ddoc({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[ + {<<"options">>, {[{<<"partitioned">>, true}]}}, + {<<"views">>, {[ + {<<"foo">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]} + }, + ?assertMatch({ok, {_, _}}, fabric2_db:update_doc(Db, Doc)). + + create_ddoc_requires_admin({Db, _}) -> Db2 = fabric2_db:set_user_ctx(Db, #user_ctx{}), UUID = fabric2_util:uuid(), -- cgit v1.2.1 From 3344638f7deada175353c0d566c14634803b3741 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 5 Nov 2019 10:52:34 -0500 Subject: Check security properties in the main transaction Previously we checked security properties in a separate transaction, after opening the db or fetching it from the cache. To avoid running an extra transaction move the check inside the main transaction right after the metadata check runs. That ensure it will be consistent and it won't be accidentally missed as all operations run the `ensure_current` metadata check. Also remove the special `get_config/2` function in `fabric2_fdb` for getting revs limit and security properties and just read them directly from the db map. --- src/chttpd/src/chttpd_auth_request.erl | 5 +-- src/fabric/src/fabric2_db.erl | 59 +++++++++++++++++---------- src/fabric/src/fabric2_fdb.erl | 39 ++++++++---------- src/fabric/src/fabric2_server.erl | 3 +- src/fabric/test/fabric2_db_security_tests.erl | 21 ++++++---- 5 files changed, 71 insertions(+), 56 deletions(-) diff --git a/src/chttpd/src/chttpd_auth_request.erl b/src/chttpd/src/chttpd_auth_request.erl index 4a9b4e9e6..3f6f97602 100644 --- a/src/chttpd/src/chttpd_auth_request.erl +++ b/src/chttpd/src/chttpd_auth_request.erl @@ -106,9 +106,8 @@ server_authorization_check(#httpd{path_parts=[<<"_node">>,_ , <<"_system">>|_]}= server_authorization_check(#httpd{path_parts=[<<"_", _/binary>>|_]}=Req) -> require_admin(Req). -db_authorization_check(#httpd{path_parts=[DbName|_],user_ctx=Ctx}=Req) -> - {ok, Db} = fabric2_db:open(DbName, [{user_ctx, Ctx}]), - fabric2_db:check_is_member(Db), +db_authorization_check(#httpd{path_parts=[_DbName|_]}=Req) -> + % Db authorization checks are performed in fabric before every FDB operation Req. diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index e2674a480..b5d68c087 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -176,19 +176,18 @@ open(DbName, Options) -> case fabric2_server:fetch(DbName) of #{} = Db -> Db1 = maybe_set_user_ctx(Db, Options), - ok = check_is_member(Db1), - {ok, Db1}; + {ok, require_member_check(Db1)}; undefined -> Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> fabric2_fdb:open(TxDb, Options) end), % Cache outside the transaction retry loop case Result of - #{security_doc := SecDoc} = Db0 -> - ok = check_is_member(Db0, SecDoc), + #{} = Db0 -> Db1 = maybe_add_sys_db_callbacks(Db0), ok = fabric2_server:store(Db1), - {ok, Db1#{tx := undefined}}; + Db2 = Db1#{tx := undefined}, + {ok, require_member_check(Db2)}; Error -> Error end @@ -256,7 +255,11 @@ is_admin(Db, {SecProps}) when is_list(SecProps) -> check_is_admin(Db) -> - case is_admin(Db) of + check_is_admin(Db, get_security(Db)). + + +check_is_admin(Db, SecDoc) -> + case is_admin(Db, SecDoc) of true -> ok; false -> @@ -280,6 +283,18 @@ check_is_member(Db, SecDoc) -> end. +require_admin_check(#{} = Db) -> + Db#{security_fun := fun check_is_admin/2}. + + +require_member_check(#{} = Db) -> + Db#{security_fun := fun check_is_member/2}. + + +no_security_check(#{} = Db) -> + Db#{security_fun := undefined}. + + name(#{name := DbName}) -> DbName. @@ -359,17 +374,17 @@ get_pid(#{}) -> get_revs_limit(#{} = Db) -> - RevsLimitBin = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:get_config(TxDb, <<"revs_limit">>) + #{revs_limit := RevsLimit} = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:ensure_current(TxDb) end), - ?bin2uint(RevsLimitBin). + RevsLimit. get_security(#{} = Db) -> - SecBin = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:get_config(TxDb, <<"security_doc">>) + #{security_doc := SecDoc} = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:ensure_current(no_security_check(TxDb)) end), - ?JSON_DECODE(SecBin). + SecDoc. get_update_seq(#{} = Db) -> @@ -440,26 +455,26 @@ is_users_db(DbName) when is_binary(DbName) -> IsAuthCache orelse IsCfgUsersDb orelse IsGlobalUsersDb. -set_revs_limit(#{} = Db, RevsLimit) -> - check_is_admin(Db), +set_revs_limit(#{} = Db0, RevsLimit) -> + Db1 = require_admin_check(Db0), RevsLimBin = ?uint2bin(max(1, RevsLimit)), - Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin) + {Resp, Db2} = fabric2_fdb:transactional(Db1, fun(TxDb) -> + {fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin), TxDb} end), if Resp /= ok -> Resp; true -> - fabric2_server:store(Db#{revs_limit := RevsLimit}) + fabric2_server:store(Db2#{revs_limit := RevsLimit}) end. -set_security(#{} = Db, Security) -> - check_is_admin(Db), +set_security(#{} = Db0, Security) -> + Db1 = require_admin_check(Db0), ok = fabric2_util:validate_security_object(Security), SecBin = ?JSON_ENCODE(Security), - Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:set_config(TxDb, <<"security_doc">>, SecBin) + {Resp, Db2} = fabric2_fdb:transactional(Db1, fun(TxDb) -> + {fabric2_fdb:set_config(TxDb, <<"security_doc">>, SecBin), TxDb} end), if Resp /= ok -> Resp; true -> - fabric2_server:store(Db#{security_doc := Security}) + fabric2_server:store(Db2#{security_doc := Security}) end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index c59346ebd..1392ccd0f 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -20,7 +20,7 @@ create/2, open/2, - reopen/1, + ensure_current/1, delete/1, exists/1, @@ -30,7 +30,6 @@ get_info/1, get_config/1, - get_config/2, set_config/3, get_stat/2, @@ -246,10 +245,12 @@ reopen(#{} = OldDb) -> tx := Tx, name := DbName, db_options := Options, - user_ctx := UserCtx + user_ctx := UserCtx, + security_fun := SecurityFun } = OldDb, Options1 = lists:keystore(user_ctx, 1, Options, {user_ctx, UserCtx}), - open(init_db(Tx, DbName, Options1), Options1). + NewDb = open(init_db(Tx, DbName, Options1), Options1), + NewDb#{security_fun := SecurityFun}. delete(#{} = Db) -> @@ -360,19 +361,6 @@ get_config(#{} = Db) -> end, erlfdb:wait(Future)). -get_config(#{} = Db, ConfigKey) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = ensure_current(Db), - - Key = erlfdb_tuple:pack({?DB_CONFIG, ConfigKey}, DbPrefix), - case erlfdb:wait(erlfdb:get(Tx, Key)) of - % config values are expected to be set so we blow if not_found - Val when Val =/= not_found -> Val - end. - - set_config(#{} = Db, ConfigKey, ConfigVal) -> #{ tx := Tx, @@ -835,6 +823,7 @@ init_db(Tx, DbName, Options) -> layer_prefix => Prefix, md_version => Version, + security_fun => undefined, db_options => Options }. @@ -1276,12 +1265,20 @@ ensure_current(Db) -> ensure_current(Db, true). -ensure_current(#{} = Db, CheckDbVersion) -> - require_transaction(Db), - case check_metadata_version(Db) of +ensure_current(#{} = Db0, CheckDbVersion) -> + require_transaction(Db0), + Db2 = case check_metadata_version(Db0) of {current, Db1} -> Db1; {stale, Db1} -> check_db_version(Db1, CheckDbVersion) - end. + end, + case maps:get(security_fun, Db2) of + SecurityFun when is_function(SecurityFun, 2) -> + #{security_doc := SecDoc} = Db2, + ok = SecurityFun(Db2, SecDoc); + undefined -> + ok + end, + Db2. is_transaction_applied(Tx) -> diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index f88ceb643..9dd0b7739 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -56,7 +56,8 @@ fetch(DbName) when is_binary(DbName) -> store(#{name := DbName} = Db0) when is_binary(DbName) -> Db1 = Db0#{ tx := undefined, - user_ctx := #user_ctx{} + user_ctx := #user_ctx{}, + security_fun := undefined }, true = ets:insert(?MODULE, {DbName, Db1}), ok. diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl index 4a54083ac..e5f3ad2c0 100644 --- a/src/fabric/test/fabric2_db_security_tests.erl +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -39,9 +39,9 @@ security_test_() -> fun check_admin_is_member/1, fun check_is_member_of_public_db/1, fun check_set_user_ctx/1, - fun check_open_forbidden/1, - fun check_fail_open_no_opts/1, - fun check_fail_open_name_null/1 + fun check_forbidden/1, + fun check_fail_no_opts/1, + fun check_fail_name_null/1 ]} } }. @@ -165,18 +165,21 @@ check_set_user_ctx({Db0, _, _}) -> ?assertEqual(UserCtx, fabric2_db:get_user_ctx(Db1)). -check_open_forbidden({Db0, _, _}) -> +check_forbidden({Db0, _, _}) -> DbName = fabric2_db:name(Db0), UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, - ?assertThrow({forbidden, _}, fabric2_db:open(DbName, [{user_ctx, UserCtx}])). + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db)). -check_fail_open_no_opts({Db0, _, _}) -> +check_fail_no_opts({Db0, _, _}) -> DbName = fabric2_db:name(Db0), - ?assertThrow({unauthorized, _}, fabric2_db:open(DbName, [])). + {ok, Db} = fabric2_db:open(DbName, []), + ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). -check_fail_open_name_null({Db0, _, _}) -> +check_fail_name_null({Db0, _, _}) -> DbName = fabric2_db:name(Db0), UserCtx = #user_ctx{name = null}, - ?assertThrow({unauthorized, _}, fabric2_db:open(DbName, [{user_ctx, UserCtx}])). + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). -- cgit v1.2.1 From 3b454895f84bb870b767fc52f48ef7c12ea0dcde Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 8 Nov 2019 13:27:16 -0500 Subject: Before starting a db transanction, refresh the db handle from the cache Previously, a stale db handle could be re-used across a few separate transactions. That would result in the database getting re-opened before every one of those operations. To prevent that from happening, check the cache before the transaction starts, and if there is a newer version of the db handle and use that. --- src/fabric/src/fabric2_db.erl | 22 ++++++++++++------- src/fabric/src/fabric2_fdb.erl | 49 +++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index b5d68c087..ff5371fc3 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -458,11 +458,14 @@ is_users_db(DbName) when is_binary(DbName) -> set_revs_limit(#{} = Db0, RevsLimit) -> Db1 = require_admin_check(Db0), RevsLimBin = ?uint2bin(max(1, RevsLimit)), - {Resp, Db2} = fabric2_fdb:transactional(Db1, fun(TxDb) -> - {fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin), TxDb} + Resp = fabric2_fdb:transactional(Db1, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin) end), - if Resp /= ok -> Resp; true -> - fabric2_server:store(Db2#{revs_limit := RevsLimit}) + case Resp of + {ok, #{} = Db2} -> + fabric2_server:store(Db2#{revs_limit := RevsLimit}); + Err -> + Err end. @@ -470,11 +473,14 @@ set_security(#{} = Db0, Security) -> Db1 = require_admin_check(Db0), ok = fabric2_util:validate_security_object(Security), SecBin = ?JSON_ENCODE(Security), - {Resp, Db2} = fabric2_fdb:transactional(Db1, fun(TxDb) -> - {fabric2_fdb:set_config(TxDb, <<"security_doc">>, SecBin), TxDb} + Resp = fabric2_fdb:transactional(Db1, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, <<"security_doc">>, SecBin) end), - if Resp /= ok -> Resp; true -> - fabric2_server:store(Db2#{security_doc := Security}) + case Resp of + {ok, #{} = Db2} -> + fabric2_server:store(Db2#{security_doc := Security}); + Err -> + Err end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 1392ccd0f..a3dd7e28b 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -82,16 +82,17 @@ transactional(DbName, Options, Fun) when is_binary(DbName) -> transactional(#{tx := undefined} = Db, Fun) -> try - Reopen = maps:get(reopen, Db, false), - Db1 = maps:remove(reopen, Db), + Db1 = refresh(Db), + Reopen = maps:get(reopen, Db1, false), + Db2 = maps:remove(reopen, Db1), LayerPrefix = case Reopen of true -> undefined; - false -> maps:get(layer_prefix, Db1) + false -> maps:get(layer_prefix, Db2) end, do_transaction(fun(Tx) -> case Reopen of - true -> Fun(reopen(Db1#{tx => Tx})); - false -> Fun(Db1#{tx => Tx}) + true -> Fun(reopen(Db2#{tx => Tx})); + false -> Fun(Db2#{tx => Tx}) end end, LayerPrefix) catch throw:{?MODULE, reopen} -> @@ -239,6 +240,26 @@ open(#{} = Db0, Options) -> load_validate_doc_funs(Db3). +refresh(#{tx := undefined, name := DbName, md_version := OldVer} = Db) -> + case fabric2_server:fetch(DbName) of + % Relying on these assumptions about the `md_version` value: + % - It is bumped every time `db_version` is bumped + % - Is a versionstamp, so we can check which one is newer + % - If it is `not_found`, it would sort less than a binary value + #{md_version := Ver} = Db1 when Ver > OldVer -> + Db1#{ + user_ctx := maps:get(user_ctx, Db), + security_fun := maps:get(security_fun, Db) + }; + _ -> + Db + end; + +refresh(#{} = Db) -> + Db. + + + reopen(#{} = OldDb) -> require_transaction(OldDb), #{ @@ -361,15 +382,16 @@ get_config(#{} = Db) -> end, erlfdb:wait(Future)). -set_config(#{} = Db, ConfigKey, ConfigVal) -> +set_config(#{} = Db0, ConfigKey, ConfigVal) -> #{ tx := Tx, db_prefix := DbPrefix - } = ensure_current(Db), + } = Db = ensure_current(Db0), Key = erlfdb_tuple:pack({?DB_CONFIG, ConfigKey}, DbPrefix), erlfdb:set(Tx, Key, ConfigVal), - bump_db_version(Db). + {ok, DbVersion} = bump_db_version(Db), + {ok, Db#{db_version := DbVersion}}. get_stat(#{} = Db, StatKey) -> @@ -927,7 +949,8 @@ bump_db_version(#{} = Db) -> DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), DbVersion = fabric2_util:uuid(), ok = erlfdb:set(Tx, DbVersionKey, DbVersion), - ok = bump_metadata_version(Tx). + ok = bump_metadata_version(Tx), + {ok, DbVersion}. check_db_version(#{} = Db, CheckDbVersion) -> @@ -1274,11 +1297,11 @@ ensure_current(#{} = Db0, CheckDbVersion) -> case maps:get(security_fun, Db2) of SecurityFun when is_function(SecurityFun, 2) -> #{security_doc := SecDoc} = Db2, - ok = SecurityFun(Db2, SecDoc); + ok = SecurityFun(Db2, SecDoc), + Db2#{security_fun := undefined}; undefined -> - ok - end, - Db2. + Db2 + end. is_transaction_applied(Tx) -> -- cgit v1.2.1 From da0318f63e82551c04560db692adc9a8aac82f18 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 14 Nov 2019 13:03:12 -0500 Subject: Update fabric2_fdb's set_config to take un-encoding values Previously `set_config/3` needed keys and values to be transalted to binaries, now that is done inside the function. It's a bit more consistent as binary config values and encodings are better encapsulated in the `fabric2_fdb` module. Since `set_config` does, it made sense to update get_config as well. There, it turns out it was used only to load configuration setting after a db open, so the function was renamed to `load_config` and was made private. --- src/fabric/src/fabric2_db.erl | 20 +++++++------------- src/fabric/src/fabric2_fdb.erl | 40 +++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 34 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index ff5371fc3..d957ec954 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -455,32 +455,26 @@ is_users_db(DbName) when is_binary(DbName) -> IsAuthCache orelse IsCfgUsersDb orelse IsGlobalUsersDb. -set_revs_limit(#{} = Db0, RevsLimit) -> +set_revs_limit(#{} = Db0, RevsLimit) when is_integer(RevsLimit) -> Db1 = require_admin_check(Db0), - RevsLimBin = ?uint2bin(max(1, RevsLimit)), Resp = fabric2_fdb:transactional(Db1, fun(TxDb) -> - fabric2_fdb:set_config(TxDb, <<"revs_limit">>, RevsLimBin) + fabric2_fdb:set_config(TxDb, revs_limit, RevsLimit) end), case Resp of - {ok, #{} = Db2} -> - fabric2_server:store(Db2#{revs_limit := RevsLimit}); - Err -> - Err + {ok, #{} = Db2} -> fabric2_server:store(Db2); + Err -> Err end. set_security(#{} = Db0, Security) -> Db1 = require_admin_check(Db0), ok = fabric2_util:validate_security_object(Security), - SecBin = ?JSON_ENCODE(Security), Resp = fabric2_fdb:transactional(Db1, fun(TxDb) -> - fabric2_fdb:set_config(TxDb, <<"security_doc">>, SecBin) + fabric2_fdb:set_config(TxDb, security_doc, Security) end), case Resp of - {ok, #{} = Db2} -> - fabric2_server:store(Db2#{security_doc := Security}); - Err -> - Err + {ok, #{} = Db2} -> fabric2_server:store(Db2); + Err -> Err end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index a3dd7e28b..97f0bc921 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -29,7 +29,6 @@ list_dbs/4, get_info/1, - get_config/1, set_config/3, get_stat/2, @@ -226,16 +225,7 @@ open(#{} = Db0, Options) -> db_options => Options1 }, - Db3 = lists:foldl(fun({Key, Val}, DbAcc) -> - case Key of - <<"uuid">> -> - DbAcc#{uuid => Val}; - <<"revs_limit">> -> - DbAcc#{revs_limit => ?bin2uint(Val)}; - <<"security_doc">> -> - DbAcc#{security_doc => ?JSON_DECODE(Val)} - end - end, Db2, get_config(Db2)), + Db3 = load_config(Db2), load_validate_doc_funs(Db3). @@ -367,31 +357,39 @@ get_info(#{} = Db) -> [CProp | MProps]. -get_config(#{} = Db) -> +load_config(#{} = Db) -> #{ tx := Tx, db_prefix := DbPrefix - } = ensure_current(Db), + } = Db, {Start, End} = erlfdb_tuple:range({?DB_CONFIG}, DbPrefix), Future = erlfdb:get_range(Tx, Start, End), - lists:map(fun({K, V}) -> + lists:foldl(fun({K, V}, DbAcc) -> {?DB_CONFIG, Key} = erlfdb_tuple:unpack(K, DbPrefix), - {Key, V} - end, erlfdb:wait(Future)). + case Key of + <<"uuid">> -> DbAcc#{uuid => V}; + <<"revs_limit">> -> DbAcc#{revs_limit => ?bin2uint(V)}; + <<"security_doc">> -> DbAcc#{security_doc => ?JSON_DECODE(V)} + end + end, Db, erlfdb:wait(Future)). -set_config(#{} = Db0, ConfigKey, ConfigVal) -> +set_config(#{} = Db0, Key, Val) when is_atom(Key) -> #{ tx := Tx, db_prefix := DbPrefix } = Db = ensure_current(Db0), - - Key = erlfdb_tuple:pack({?DB_CONFIG, ConfigKey}, DbPrefix), - erlfdb:set(Tx, Key, ConfigVal), + {BinKey, BinVal} = case Key of + uuid -> {<<"uuid">>, Val}; + revs_limit -> {<<"revs_limit">>, ?uint2bin(max(1, Val))}; + security_doc -> {<<"security_doc">>, ?JSON_ENCODE(Val)} + end, + DbKey = erlfdb_tuple:pack({?DB_CONFIG, BinKey}, DbPrefix), + erlfdb:set(Tx, DbKey, BinVal), {ok, DbVersion} = bump_db_version(Db), - {ok, Db#{db_version := DbVersion}}. + {ok, Db#{db_version := DbVersion, Key := Val}}. get_stat(#{} = Db, StatKey) -> -- cgit v1.2.1 From 1420756d963ae47bc7dea1cb7d010a0aa9d2969a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 14 Nov 2019 18:13:46 -0500 Subject: Assert Db handle field existence in `load_config/1` in fabric2_fdb Forgot to push this in the previous PR so made a new commit. https://github.com/apache/couchdb/pull/2300#discussion_r346592418 --- src/fabric/src/fabric2_fdb.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 97f0bc921..0d741385c 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -212,8 +212,10 @@ open(#{} = Db0, Options) -> db_prefix => DbPrefix, db_version => DbVersion, + uuid => <<>>, revs_limit => 1000, security_doc => {[]}, + user_ctx => UserCtx, % Place holders until we implement these @@ -369,9 +371,9 @@ load_config(#{} = Db) -> lists:foldl(fun({K, V}, DbAcc) -> {?DB_CONFIG, Key} = erlfdb_tuple:unpack(K, DbPrefix), case Key of - <<"uuid">> -> DbAcc#{uuid => V}; - <<"revs_limit">> -> DbAcc#{revs_limit => ?bin2uint(V)}; - <<"security_doc">> -> DbAcc#{security_doc => ?JSON_DECODE(V)} + <<"uuid">> -> DbAcc#{uuid := V}; + <<"revs_limit">> -> DbAcc#{revs_limit := ?bin2uint(V)}; + <<"security_doc">> -> DbAcc#{security_doc := ?JSON_DECODE(V)} end end, Db, erlfdb:wait(Future)). -- cgit v1.2.1 From 7eaab664c7ceca470f96d547bd2cb12f7f8aef6a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 15 Nov 2019 18:46:16 -0500 Subject: Check membership when calling get_security/1 in fabric2_db Previously, membership check was disabled when fetching the security doc. That was not correct, as membership should be checked before every db operation, including when fetching the security doc itself. Also, most of the security tests relied on patching the user context in the `Db` handle then calling `check_*` functions. Those functions however call `get_security/1` before doing the actual check, and in some cases, like when checking for admin, the failure was coming from the membership check in `get_security/1` instead. Also some tests were going through the regular request path of opening a new db by name. In order, make the tests more uniform, switch all the tests to apply the tested `UserCtx` in the open call. --- src/fabric/src/fabric2_db.erl | 11 +-- src/fabric/test/fabric2_db_security_tests.erl | 126 +++++++++++++------------- 2 files changed, 64 insertions(+), 73 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index d957ec954..88840e702 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -22,7 +22,6 @@ list_dbs/1, list_dbs/3, - is_admin/1, check_is_admin/1, check_is_member/1, @@ -239,10 +238,6 @@ list_dbs(UserFun, UserAcc0, Options) -> end). -is_admin(Db) -> - is_admin(Db, get_security(Db)). - - is_admin(Db, {SecProps}) when is_list(SecProps) -> case fabric2_db_plugin:check_is_admin(Db) of true -> @@ -291,10 +286,6 @@ require_member_check(#{} = Db) -> Db#{security_fun := fun check_is_member/2}. -no_security_check(#{} = Db) -> - Db#{security_fun := undefined}. - - name(#{name := DbName}) -> DbName. @@ -382,7 +373,7 @@ get_revs_limit(#{} = Db) -> get_security(#{} = Db) -> #{security_doc := SecDoc} = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:ensure_current(no_security_check(TxDb)) + fabric2_fdb:ensure_current(TxDb) end), SecDoc. diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl index e5f3ad2c0..501545484 100644 --- a/src/fabric/test/fabric2_db_security_tests.erl +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -26,12 +26,10 @@ security_test_() -> fun setup/0, fun cleanup/1, {with, [ - fun is_admin_name/1, - fun is_not_admin_name/1, - fun is_admin_role/1, - fun is_not_admin_role/1, fun check_is_admin/1, fun check_is_not_admin/1, + fun check_is_admin_role/1, + fun check_is_not_admin_role/1, fun check_is_member_name/1, fun check_is_not_member_name/1, fun check_is_member_role/1, @@ -63,123 +61,125 @@ setup() -> ]}} ]}, ok = fabric2_db:set_security(Db1, SecProps), - {ok, Db2} = fabric2_db:open(DbName, [?ADMIN_CTX]), - {ok, PubDb} = fabric2_db:create(PubDbName, []), - {Db2, PubDb, Ctx}. + {ok, _} = fabric2_db:create(PubDbName, [?ADMIN_CTX]), + {DbName, PubDbName, Ctx}. -cleanup({Db, PubDb, Ctx}) -> - ok = fabric2_db:delete(fabric2_db:name(Db), []), - ok = fabric2_db:delete(fabric2_db:name(PubDb), []), +cleanup({DbName, PubDbName, Ctx}) -> + ok = fabric2_db:delete(DbName, []), + ok = fabric2_db:delete(PubDbName, []), test_util:stop_couch(Ctx). -is_admin_name({Db, _, _}) -> - UserCtx = #user_ctx{name = <<"admin_name1">>}, - ?assertEqual(true, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). - - -is_not_admin_name({Db, _, _}) -> - UserCtx = #user_ctx{name = <<"member1">>}, - ?assertEqual(false, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). +check_is_admin({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"admin_name1">>}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_admin(Db)). -is_admin_role({Db, _, _}) -> - UserCtx = #user_ctx{roles = [<<"admin_role1">>]}, - ?assertEqual(true, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). +check_is_not_admin({DbName, _, _}) -> + {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, #user_ctx{}}]), + ?assertThrow( + {unauthorized, <<"You are not authorized", _/binary>>}, + fabric2_db:check_is_admin(Db1) + ), -is_not_admin_role({Db, _, _}) -> - UserCtx = #user_ctx{roles = [<<"member_role1">>]}, - ?assertEqual(false, fabric2_db:is_admin(Db#{user_ctx := UserCtx})). + UserCtx = #user_ctx{name = <<"member_name1">>}, + {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow( + {forbidden, <<"You are not a db or server admin.">>}, + fabric2_db:check_is_admin(Db2) + ). -check_is_admin({Db, _, _}) -> - UserCtx = #user_ctx{name = <<"admin_name1">>}, - ?assertEqual(ok, fabric2_db:check_is_admin(Db#{user_ctx := UserCtx})). +check_is_admin_role({DbName, _, _}) -> + UserCtx = #user_ctx{roles = [<<"admin_role1">>]}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_admin(Db)). -check_is_not_admin({Db, _, _}) -> - UserCtx = #user_ctx{name = <<"member_name1">>}, - ?assertThrow( - {unauthorized, <<"You are not a db or server admin.">>}, - fabric2_db:check_is_admin(Db#{user_ctx := #user_ctx{}}) - ), +check_is_not_admin_role({DbName, _, _}) -> + UserCtx = #user_ctx{ + name = <<"member_name1">>, + roles = [<<"member_role1">>] + }, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertThrow( {forbidden, <<"You are not a db or server admin.">>}, - fabric2_db:check_is_admin(Db#{user_ctx := UserCtx}) + fabric2_db:check_is_admin(Db) ). -check_is_member_name({Db, _, _}) -> +check_is_member_name({DbName, _, _}) -> UserCtx = #user_ctx{name = <<"member_name1">>}, - ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db)). -check_is_not_member_name({Db, _, _}) -> - UserCtx = #user_ctx{name = <<"foo">>}, +check_is_not_member_name({DbName, _, _}) -> + {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, #user_ctx{}}]), ?assertThrow( {unauthorized, <<"You are not authorized", _/binary>>}, - fabric2_db:check_is_member(Db#{user_ctx := #user_ctx{}}) + fabric2_db:check_is_member(Db1) ), + + UserCtx = #user_ctx{name = <<"foo">>}, + {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertThrow( {forbidden, <<"You are not allowed to access", _/binary>>}, - fabric2_db:check_is_member(Db#{user_ctx := UserCtx}) + fabric2_db:check_is_member(Db2) ). -check_is_member_role({Db, _, _}) -> +check_is_member_role({DbName, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"member_role1">>]}, - ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db)). -check_is_not_member_role({Db, _, _}) -> +check_is_not_member_role({DbName, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertThrow( {forbidden, <<"You are not allowed to access", _/binary>>}, - fabric2_db:check_is_member(Db#{user_ctx := UserCtx}) + fabric2_db:check_is_member(Db) ). -check_admin_is_member({Db, _, _}) -> +check_admin_is_member({DbName, _, _}) -> UserCtx = #user_ctx{name = <<"admin_name1">>}, - ?assertEqual(ok, fabric2_db:check_is_member(Db#{user_ctx := UserCtx})). + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db)). -check_is_member_of_public_db({_, PubDb, _}) -> +check_is_member_of_public_db({_, PubDbName, _}) -> + {ok, Db1} = fabric2_db:open(PubDbName, [{user_ctx, #user_ctx{}}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db1)), + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, - ?assertEqual( - ok, - fabric2_db:check_is_member(PubDb#{user_ctx := #user_ctx{}}) - ), - ?assertEqual( - ok, - fabric2_db:check_is_member(PubDb#{user_ctx := UserCtx}) - ). + {ok, Db2} = fabric2_db:open(PubDbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db2)). -check_set_user_ctx({Db0, _, _}) -> - DbName = fabric2_db:name(Db0), +check_set_user_ctx({DbName, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"admin_role1">>]}, {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertEqual(UserCtx, fabric2_db:get_user_ctx(Db1)). -check_forbidden({Db0, _, _}) -> - DbName = fabric2_db:name(Db0), +check_forbidden({DbName, _, _}) -> UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db)). -check_fail_no_opts({Db0, _, _}) -> - DbName = fabric2_db:name(Db0), +check_fail_no_opts({DbName, _, _}) -> {ok, Db} = fabric2_db:open(DbName, []), ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). -check_fail_name_null({Db0, _, _}) -> - DbName = fabric2_db:name(Db0), +check_fail_name_null({DbName, _, _}) -> UserCtx = #user_ctx{name = null}, {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). -- cgit v1.2.1 From 6c11319023b439332052b07b5698bbd45a8f4059 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 12 Nov 2019 16:13:55 +0000 Subject: Support regexp based blacklist in config --- src/couch/src/couch_util.erl | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index ce0db4306..c56a4f6a5 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -47,15 +47,15 @@ -define(FLUSH_MAX_MEM, 10000000). -define(BLACKLIST_CONFIG_SECTIONS, [ - <<"daemons">>, - <<"external">>, - <<"httpd_design_handlers">>, - <<"httpd_db_handlers">>, - <<"httpd_global_handlers">>, - <<"native_query_servers">>, - <<"os_daemons">>, - <<"query_servers">>, - <<"feature_flags">> + <<"^daemons$">>, + <<"^external$">>, + <<"^httpd_design_handlers$">>, + <<"^httpd_db_handlers$">>, + <<"^httpd_global_handlers$">>, + <<"^native_query_servers$">>, + <<"^os_daemons$">>, + <<"^query_servers$">>, + <<"^feature_flags$">> ]). @@ -765,10 +765,13 @@ unique_monotonic_integer() -> check_config_blacklist(Section) -> - case lists:member(Section, ?BLACKLIST_CONFIG_SECTIONS) of - true -> - Msg = <<"Config section blacklisted for modification over HTTP API.">>, - throw({forbidden, Msg}); - _ -> - ok - end. + lists:foreach(fun(RegExp) -> + case re:run(Section, RegExp) of + nomatch -> + ok; + _ -> + Msg = <<"Config section blacklisted for modification over HTTP API.">>, + throw({forbidden, Msg}) + end + end, ?BLACKLIST_CONFIG_SECTIONS), + ok. -- cgit v1.2.1 From 555d28cea854e6d199cb2fe0f5faaec1add8f0a9 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 12 Nov 2019 16:14:55 +0000 Subject: Implement fabric2_server:fdb_cluster/0 --- src/fabric/src/fabric2_server.erl | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index 9dd0b7739..b1c38ef55 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -20,7 +20,8 @@ fetch/1, store/1, remove/1, - fdb_directory/0 + fdb_directory/0, + fdb_cluster/0 ]). @@ -39,6 +40,7 @@ -define(CLUSTER_FILE, "/usr/local/etc/foundationdb/fdb.cluster"). -define(FDB_DIRECTORY, fdb_directory). +-define(FDB_CLUSTER, fdb_cluster). -define(DEFAULT_FDB_DIRECTORY, <<"couchdb">>). @@ -76,13 +78,16 @@ init(_) -> {write_concurrency, true} ]), - Db = case application:get_env(fabric, eunit_run) of + {Cluster, Db} = case application:get_env(fabric, eunit_run) of {ok, true} -> - erlfdb_util:get_test_db([empty]); + {<<"eunit_test">>, erlfdb_util:get_test_db([empty])}; undefined -> - ClusterStr = config:get("erlfdb", "cluster_file", ?CLUSTER_FILE), - erlfdb:open(iolist_to_binary(ClusterStr)) + ClusterFileStr = config:get("erlfdb", "cluster_file", ?CLUSTER_FILE), + {ok, ConnectionStr} = file:read_file(ClusterFileStr), + DbHandle = erlfdb:open(iolist_to_binary(ClusterFileStr)), + {string:trim(ConnectionStr), DbHandle} end, + application:set_env(fabric, ?FDB_CLUSTER, Cluster), application:set_env(fabric, db, Db), Dir = case config:get("fabric", "fdb_directory") of @@ -117,15 +122,21 @@ code_change(_OldVsn, St, _Extra) -> fdb_directory() -> - case get(?FDB_DIRECTORY) of + get_env(?FDB_DIRECTORY). + +fdb_cluster() -> + get_env(?FDB_CLUSTER). + +get_env(Key) -> + case get(Key) of undefined -> - case application:get_env(fabric, ?FDB_DIRECTORY) of + case application:get_env(fabric, Key) of undefined -> erlang:error(fabric_application_not_started); - {ok, Dir} -> - put(?FDB_DIRECTORY, Dir), - Dir + {ok, Value} -> + put(Key, Value), + Value end; - Dir -> - Dir + Value -> + Value end. -- cgit v1.2.1 From b49ca70dd144a5e6c4f104cdbb85dd074f6afb6c Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 12 Nov 2019 16:20:47 +0000 Subject: Add ctrace application --- .gitignore | 5 + rebar.config.script | 5 + rel/overlay/etc/default.ini | 50 +++- src/couch/src/couch_util.erl | 4 +- src/ctrace/README.md | 291 +++++++++++++++++++++++ src/ctrace/rebar.config | 14 ++ src/ctrace/src/ctrace.app.src | 27 +++ src/ctrace/src/ctrace.erl | 361 +++++++++++++++++++++++++++++ src/ctrace/src/ctrace.hrl | 15 ++ src/ctrace/src/ctrace_app.erl | 26 +++ src/ctrace/src/ctrace_config.erl | 133 +++++++++++ src/ctrace/src/ctrace_dsl.erl | 106 +++++++++ src/ctrace/src/ctrace_sup.erl | 41 ++++ src/ctrace/test/ctrace_config_test.erl | 153 ++++++++++++ src/ctrace/test/ctrace_dsl_test.erl | 123 ++++++++++ src/ctrace/test/ctrace_test.erl | 412 +++++++++++++++++++++++++++++++++ 16 files changed, 1757 insertions(+), 9 deletions(-) create mode 100644 src/ctrace/README.md create mode 100644 src/ctrace/rebar.config create mode 100644 src/ctrace/src/ctrace.app.src create mode 100644 src/ctrace/src/ctrace.erl create mode 100644 src/ctrace/src/ctrace.hrl create mode 100644 src/ctrace/src/ctrace_app.erl create mode 100644 src/ctrace/src/ctrace_config.erl create mode 100644 src/ctrace/src/ctrace_dsl.erl create mode 100644 src/ctrace/src/ctrace_sup.erl create mode 100644 src/ctrace/test/ctrace_config_test.erl create mode 100644 src/ctrace/test/ctrace_dsl_test.erl create mode 100644 src/ctrace/test/ctrace_test.erl diff --git a/.gitignore b/.gitignore index d1c106821..bf45d1a4f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ .DS_Store .vscode .rebar/ +.rebar3/ .erlfdb/ .eunit/ cover/ @@ -58,21 +59,25 @@ src/ibrowse/ src/idna/ src/ioq/ src/hqueue/ +src/jaeger_passage/ src/jiffy/ src/ken/ src/khash/ +src/local/ src/meck/ src/metrics/ src/mimerl/ src/mochiweb/ src/oauth/ src/parse_trans/ +src/passage/ src/proper/ src/rebar/ src/recon/ src/smoosh/ src/snappy/ src/ssl_verify_fun/ +src/thrift_protocol/ src/triq/ src/unicode_util_compat/ tmp/ diff --git a/rebar.config.script b/rebar.config.script index 05000b51f..2eeec988d 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -130,6 +130,7 @@ SubDirs = [ "src/couch_peruser", "src/couch_tests", "src/couch_views", + "src/ctrace", "src/ddoc_cache", "src/dreyfus", "src/fabric", @@ -161,10 +162,14 @@ DepDescs = [ {folsom, "folsom", {tag, "CouchDB-0.8.3"}}, {hyper, "hyper", {tag, "CouchDB-2.2.0-6"}}, {ibrowse, "ibrowse", {tag, "CouchDB-4.0.1-1"}}, +{jaeger_passage, "jaeger-passage", {tag, "CouchDB-0.1.13-1"}}, {jiffy, "jiffy", {tag, "CouchDB-1.0.4-1"}}, +{local, "local", {tag, "0.2.1"}}, {mochiweb, "mochiweb", {tag, "v2.20.0"}}, {meck, "meck", {tag, "0.8.8"}}, {recon, "recon", {tag, "2.5.0"}}, +{passage, "passage", {tag, "0.2.6"}}, +{thrift_protocol, "thrift-protocol", {tag, "0.1.3"}}, %% TMP - Until this is moved to a proper Apache repo {erlfdb, "erlfdb", {branch, "master"}} diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 3c9271605..db69fe1b7 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -178,8 +178,8 @@ enable_xframe_options = false ; CouchDB can optionally enforce a maximum uri length; ; max_uri_length = 8000 ; changes_timeout = 60000 -; config_whitelist = -; max_uri_length = +; config_whitelist = +; max_uri_length = ; rewrite_limit = 100 ; x_forwarded_host = X-Forwarded-Host ; x_forwarded_proto = X-Forwarded-Proto @@ -188,7 +188,7 @@ enable_xframe_options = false max_http_request_size = 4294967296 ; 4GB ; [httpd_design_handlers] -; _view = +; _view = ; [ioq] ; concurrency = 10 @@ -202,7 +202,7 @@ port = 6984 ; [chttpd_auth_cache] ; max_lifetime = 600000 -; max_objects = +; max_objects = ; max_size = 104857600 ; [mem3] @@ -213,7 +213,7 @@ port = 6984 ; [fabric] ; all_docs_concurrency = 10 -; changes_duration = +; changes_duration = ; shard_timeout_factor = 2 ; uuid_prefix_len = 7 ; request_timeout = 60000 @@ -263,7 +263,7 @@ iterations = 10 ; iterations for password hashing ; proxy_use_secret = false ; comma-separated list of public fields, 404 if empty ; public_fields = -; secret = +; secret = ; users_db_public = false ; cookie_domain = example.com ; Set the SameSite cookie property for the auth cookie. If empty, the SameSite property is not set. @@ -357,7 +357,7 @@ javascript = couch_js couch_mrview = true [feature_flags] -; This enables any database to be created as a partitioned databases (except system db's). +; This enables any database to be created as a partitioned databases (except system db's). ; Setting this to false will stop the creation of paritioned databases. ; paritioned||allowed* = true will scope the creation of partitioned databases ; to databases with 'allowed' prefix. @@ -600,7 +600,7 @@ compaction = false ; The default number of results returned from a search on a partition ; of a database. ; limit_partitions = 2000 - + ; The maximum number of results that can be returned from a global ; search query (or any search query on a database without user-defined ; partitions). Attempts to set ?limit=N higher than this value will @@ -645,3 +645,37 @@ compaction = false ; ; Jitter applied when checking for new job types. ;type_check_max_jitter_msec = 5000 + +[tracing] +; +; Configuration settings for the `ctrace` OpenTracing +; API. +; +; enabled = false ; true | false +; thrift_format = compact ; compact | binary +; agent_host = 127.0.0.1 +; agent_port = 6831 +; app_name = couchdb ; value to use for the `location.application` tag + +[tracing.filters] +; +; Configure tracing for each individual operation. Keys should be set as +; operation names (i.e., `database-info.read` or `view.build`). Values +; are essentially an anonymous function that accepts a single argument +; that is the tags provided to the root span. These definitions +; should not include a function name or a trailing `.`. Return values +; must be one of `true`, `false`, or `float()`. A boolean return +; indicates whether or not to include the trace while a `float()` +; value between 0 and 1 gives the probability that the trace should +; be included or not. I.e., if the value is `0.9` then 90% of the +; traces will be logged. See the `src/ctrace/README.md` for a +; thorough description of the filter DSL. +; +; database-info.read = (#{'http.method' := Method}) when Method == 'GET' -> true +; view.build = (#{'view.name' := Name}) when Name == "foo" -> 0.25 +; +; The key `all` is checked for any trace that does not have a +; corresponding operation name key configured. Thus, users can easily +; log every generated trace by including the following: +; +; all = (#{}) -> true diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index c56a4f6a5..fc66f36f4 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -55,7 +55,9 @@ <<"^native_query_servers$">>, <<"^os_daemons$">>, <<"^query_servers$">>, - <<"^feature_flags$">> + <<"^feature_flags$">>, + <<"^tracing\..*$">>, + <<"^tracing$">> ]). diff --git a/src/ctrace/README.md b/src/ctrace/README.md new file mode 100644 index 000000000..6e40b434c --- /dev/null +++ b/src/ctrace/README.md @@ -0,0 +1,291 @@ +Overview +======== + +This application provides an interface to opentracing compatible +tracing systems. + +Open Tracing +------------ + +[//]: # (taken from https://github.com/opentracing/specification/blob/master/specification.md) +Traces in OpenTracing are defined implicitly by their Spans. +In particular, a Trace can be thought of as a directed acyclic +graph (DAG) of Spans, where the edges between Spans are called +References. + +Each Span encapsulates the following state: + +- An operation name +- A start timestamp +- A finish timestamp +- A set of zero or more key:value Span Tags. +- A set of zero or more Span Logs, each of which is + itself a key:value map paired with a timestamp. +- A SpanContext +- References to zero or more causally-related Spans + +Every trace is identified by unique trace_id. Every trace includes zero +or more tracing spans which are identified by a span id. + +Jaeger +------ + +Jaeger is a distributed tracing system released as open source by +Uber Technologies. It is one of implementations of open tracing specification. +Jaeger supports Trace detail view where a single trace is represented as +a tree of tracing span with detailed timing information about every span. +In order to make this feature work all tracing spans should form a lineage +from the same root span. + + +Implementation +============== + +Every operation has unique identifier. Example identifiers are: + +- all-dbs.read +- database.delete +- replication.trigger +- view.compaction + +Tracing begins with a root span that can be filtered based on +a set of configurable rules. When the root trace is created these +rules are applied to see if the trace should be generated and logged. +If a trace is disabled due to filtering then no trace data is generated. + + +Code instrumentation +-------------------- + +The span lifecycle is controled by + +- `ctrace:start_span` +- `ctrace:finish_span` +- `ctrace:with_span` + +The instrumentation can add tags and logs to a span. + +Example of instrumentation: + +``` +ctrace:with_span('database.read', #{'db.name' => <<>>}, fun() -> + ctrace:tag(#{ + peer => Peer, + 'http.method' => Method, + nonce => Nonce, + 'http.url' => Path, + 'span.kind' => <<"server">>, + component => <<"couchdb.chttpd">> + }), + ctrace:log(#{ + field0 => "value0" + }) + + handle_request(HttpReq) +end), +``` + +As you can see the `ctrace:with_span/3` function receives a function which +wraps the operation we wanted to trace: + +- `ctrace:tag/1` to add new tags to the span +- `ctrace:log/1` add log event to the span + +There are some informative functions as well: + +- `ctrace:refs/0` - returns all other spans we have references from the current +- `ctrace:operation_name/0` - returns operation name for the current span +- `ctrace:trace_id/0` - returns trace id for the current span +- `ctrace:span_id/0` - returns span id for the current span + +Instrumentation guide +--------------------- + +- Start root span at system boundaries + - httpd + - internal trigger (replication or compaction jobs) +- Start new child span when you cross layer boundaries +- Start new child span when you cross node bounadary +- Extend `_httpd_handlers:handler_info/1` as needed to + have operation ids. (We as community might need to work on + naming conventions) +- Use [span conventions](https://github.com/apache/couchdb-documentation/blob/master/rfcs/011-opentracing.md#conventions) https://github.com/opentracing/specification/blob/master/semantic_conventions.md +- When in doubt consult open tracing spec + - [spec overview](https://github.com/opentracing/specification/blob/master/specification.md) + - [conventions](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#standard-span-tags-and-log-fields) + +Configuration +------------- + +Traces are configured using standard CouchDB ini file based configuration. +There is a global toggle `[tracing] enabled = true | false` that switches +tracing on or off completely. The `[tracing]` section also includes +configuration for where to send trace data. + +An example `[tracing]` section + +```ini +[tracing] + +enabled = true +thrift_format = compact ; compact | binary +agent_host = 127.0.0.1 +agent_port = 6831 +app_name = couchdb ; Value to use for the `location.application` tag +``` + +In the `[tracing.filters]` section we can define a set of rules for +whether to include a trace. Keys are the operation name of the root +span and values are a simple DSL for whether to include the given +span based on its tags. See below for a more thorough description +of the DSL. The `all` key is special and is used when no other +filter matches a given operation. If the `all` key is not present +then ctrace behaves as if it were defined as `(#{}) -> false`. I.e., +any trace that doesn't have a configuration entry is not generated +and logged. + +```ini +[tracing.filters] +; all = (#{}) -> true +; database-info.read = (#{'http.method' := Method}) when Method == 'GET' -> true +; view.build = (#{'view.name' := Name}) when Name == "foo" -> 0.25 +``` + +Filter DSL Description +--- + +``` + = ( #{<[arguments]>} ) when <[conditions]> -> <[actions]> +``` + +Where: + - operation_name is the name of the root span + - arguments is comma separated pairs of + ` := ` + - actions is a list which contains + - `report` + - conditions + - `<[condition]>` + - `| <[condition]> <[operator]> <[condition]>` + - condition: + - ` <[operator]> ` + `| <[guard_function]>(<[variable_name]>)` + - `variable_name` - lowercase name without special characters + - guard_function: one of + - `is_atom` + - `is_float` + - `is_integer` + - `is_list` + - `is_number` + - `is_pid` + - `is_port` + - `is_reference` + - `is_tuple` + - `is_map` + - `is_binary` + - `is_function` + - `element` - `element(n, tuple)` + - `abs` + - `hd` - return head of the list + - `length` + - `map_get` + - `map_size` + - `round` + - `node` + - `size` - returns size of the tuple + - `bit_size` - returns number of bits in binary + - `byte_size` - returns number of bytes in binary + - `tl` - return tail of a list + - `trunc` + - `self` + - operator: one of + - `not` + - `and` - evaluates both expressions + - `andalso` - evaluates second only when first is true + - `or` - evaluates both expressions + - `orelse` - evaluates second only when first is false + - `xor` + - `+` + - `-` + - `*` + - `div` + - `rem` + - `band` - bitwise AND + - `bor` - bitwise OR + - `bxor` - bitwise XOR + - `bnot` - bitwise NOT + - `bsl` - arithmetic bitshift left + - `bsr` - bitshift right + - `>` + - `>=` + - `<` + - `=<` + - `=:=` + - `==` + - `=/=` + - `/=` - not equal + + +b3 propagation +-------------- + +In order to correlate spans across multiple systems the information +about parent span can be passed via headers. Currently the chttpd +application is responsible for extracting and parsing the header. +The ctrace application provides following facilities to enable this +use case: + +- `{root, RootSpan}` option for `ctrace:start_span/2` +- `ctrace:external_span/3` to convert references to a root span + +The span references could be set either via `b3` header of via +individual headers. In case when individual headers are used the +following set of headers is supported: + +- X-B3-TraceId (32 lower-hex characters) +- X-B3-SpanId (16 lower-hex characters) + (has no effect if X-B3-TraceId is not set) +- X-B3-ParentSpanId (16 lower-hex characters) + (has no effect if X-B3-TraceId is not set) + +Alternatively a single `b3` header could be used. It has to be +in the following format: + +b3={TraceId}-{SpanId}-{SamplingState}-{ParentSpanId} + +Where SamplingState is either `0` or `1`. However we ignore the value. + +Note: We only support 128 bit TraceId's. + +Developing +========== + +Here we provide a list frequently used commands +useful while working on this application. + + +1. Run all tests +``` +make setup-eunit +make && ERL_LIBS=`pwd`/src BUILDDIR=`pwd` mix test --trace src/chttpd/test/exunit/ src/ctrace/test/exunit/ +``` + +2. Run tests selectively +``` +make && ERL_LIBS=`pwd`/src BUILDDIR=`pwd` mix test --trace src/chttpd/test/exunit/ctrace_context_test.exs:59 +``` + +3. Re-run only failed tests +``` +make && ERL_LIBS=`pwd`/src BUILDDIR=`pwd` mix test --failed --trace src/chttpd/test/exunit/ src/ctrace/test/exunit/ +``` + +4. Running jaeger in docker +``` +docker run -d --net fdb-core --name jaeger.local -p 6831:6831/udp -p 16686:16686 jaegertracing/all-in-one:1.14 +``` + +If Docker isn't your cup of tea, the Jaeger project also provides +prebuilt binaries that can be downloaded. On macOS we can easily +setup a development Jaeger instance by running the prebuilt +`jaeger-all-in-one` binary without any arguments. \ No newline at end of file diff --git a/src/ctrace/rebar.config b/src/ctrace/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/ctrace/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/ctrace/src/ctrace.app.src b/src/ctrace/src/ctrace.app.src new file mode 100644 index 000000000..64f4fc5df --- /dev/null +++ b/src/ctrace/src/ctrace.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + {application, ctrace, [ + {description, "Open tracer API for CouchDB"}, + {vsn, git}, + {registered, [ + ]}, + {applications, [ + kernel, + stdlib, + syntax_tools, + config, + jaeger_passage, + passage + ]}, + {mod, {ctrace_app, []}} +]}. diff --git a/src/ctrace/src/ctrace.erl b/src/ctrace/src/ctrace.erl new file mode 100644 index 000000000..5521901fd --- /dev/null +++ b/src/ctrace/src/ctrace.erl @@ -0,0 +1,361 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace). + +-vsn(1). + +-export([ + is_enabled/0, + + with_span/2, + with_span/3, + start_span/1, + start_span/2, + finish_span/0, + finish_span/1, + has_span/0, + external_span/3, + + tag/1, + log/1, + + tags/0, + refs/0, + operation_name/0, + trace_id/0, + span_id/0, + tracer/0, + context/0, + + match/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("passage/include/opentracing.hrl"). +-include("ctrace.hrl"). + + +-type operation() + :: atom() + | fun(). + +-type tags() + :: #{atom() => term()}. + +-type log_fields() + :: #{atom() => term()}. + +-type start_span_options() + :: [start_span_option()]. + +-type start_span_option() + :: {time, erlang:timespan()} + | {tags, tags()}. + +-type finish_span_options() + :: [finish_span_option()]. + +-type finish_span_option() + :: {time, erlang:timespan()}. + + +-spec is_enabled() -> boolean(). + +is_enabled() -> + case get(?IS_ENABLED_KEY) of + undefined -> + Result = ctrace_config:is_enabled(), + put(?IS_ENABLED_KEY, Result), + Result; + IsEnabled -> + IsEnabled + end. + + +%% @equiv with_span(Operation, [], Fun) +-spec with_span( + Operation :: operation(), + Fun + ) -> Result when + Fun :: fun (() -> Result), + Result :: term(). + +with_span(Operation, Fun) -> + with_span(Operation, #{}, Fun). + +-spec with_span( + Operation :: operation(), + TagsOrOptions :: tags() | start_span_options(), + Fun + ) -> Result when + Fun :: fun (() -> Result), + Result :: term(). + +with_span(Operation, ExtraTags, Fun) when is_map(ExtraTags) -> + with_span(Operation, [{tags, ExtraTags}], Fun); + +with_span(Operation, Options, Fun) -> + try + start_span(Operation, Options), + Fun() + catch Type:Reason -> + Stack = erlang:get_stacktrace(), + log(#{ + ?LOG_FIELD_ERROR_KIND => Type, + ?LOG_FIELD_MESSAGE => Reason, + ?LOG_FIELD_STACK => Stack + }, [error]), + erlang:raise(Type, Reason, Stack) + after + finish_span() + end. + +-spec start_span( + Operation :: operation() + ) -> ok. + +start_span(Operation) -> + start_span(Operation, []). + +-spec start_span( + Operation :: operation(), + Options :: start_span_options() + ) -> ok. + +start_span(Operation, Options) -> + case is_enabled() of + true -> + do_start_span(Operation, Options); + false -> + ok + end. + +do_start_span(Fun, Options) when is_function(Fun) -> + start_span(fun_to_op(Fun), Options); + +do_start_span(OperationName, Options0) -> + Options1 = add_time(Options0), + case passage_pd:current_span() of + undefined -> + put(?ORIGIN_KEY, atom_to_binary(OperationName, utf8)), + Tags = case lists:keyfind(tags, 1, Options0) of + {tags, T} -> + T; + false -> + #{} + end, + case match(OperationName, Tags) of + true -> + Options = [ + {tracer, ?MAIN_TRACER} + | maybe_start_root(Options1) + ], + passage_pd:start_span(OperationName, Options); + false -> + ok + end; + Span -> + Options = add_tags([{child_of, Span} | Options1], #{ + origin => get(?ORIGIN_KEY) + }), + passage_pd:start_span(OperationName, Options) + end. + +-spec finish_span() -> ok. + +finish_span() -> + finish_span([]). + +-spec finish_span( + Options :: finish_span_options() + ) -> ok. + +finish_span(Options0) -> + Options = add_time(Options0), + passage_pd:finish_span(Options). + +-spec tag( + Tags :: tags() + ) -> ok. + +tag(Tags) -> + passage_pd:set_tags(Tags). + +-spec log( + Fields :: log_fields() | fun (() -> log_fields()) + ) -> ok. + +log(FieldsOrFun) -> + log(FieldsOrFun, []). + +log(FieldsOrFun, Options) -> + passage_pd:log(FieldsOrFun, Options). + +-spec tags() -> tags(). + +tags() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_tags(Span) + end. + +-spec refs() -> passage:refs(). + +refs() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_refs(Span) + end. + +-spec has_span() -> boolean(). + +has_span() -> + passage_pd:current_span() =/= undefined. + +-spec operation_name() -> atom(). + +operation_name() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_operation_name(Span) + end. + +-spec trace_id() -> 0..16#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF. + +trace_id() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + Context = passage_span:get_context(Span), + jaeger_passage_span_context:get_trace_id(Context) + end. + +-spec span_id() -> 0..16#FFFFFFFFFFFFFFFF. + +span_id() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + Context = passage_span:get_context(Span), + jaeger_passage_span_context:get_span_id(Context) + end. + +-spec tracer() -> passage:tracer_id(). + +tracer() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_tracer(Span) + end. + +-spec context() -> passage_span_contest:context(). + +context() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_context(Span) + end. + +-spec external_span( + TraceId :: passage:trace_id(), + SpanId :: undefined | passage:span_id(), + ParentSpanId :: undefined | passage:span_id() + ) -> passage:maybe_span(). + +external_span(TraceId, undefined, ParentSpanId) -> + external_span(TraceId, rand:uniform(16#FFFFFFFFFFFFFFFF), ParentSpanId); +external_span(TraceId, SpanId, undefined) -> + external_span(TraceId, SpanId, rand:uniform(16#FFFFFFFFFFFFFFFF)); +external_span(TraceId, SpanId, ParentSpanId) -> + IterFun = fun(Val) -> Val end, + Flags = <<0:32>>, + BaggageItems = <<0:32>>, + Binary = << + TraceId:128, + SpanId:64, + ParentSpanId:64, + Flags/binary, + BaggageItems/binary + >>, + State = {ok, <<"binary">>, Binary, error}, + passage:extract_span(?MAIN_TRACER, binary, IterFun, State). + + +match(OperationId, Tags) -> + OpMod = ctrace_config:filter_module_name(OperationId), + case erlang:function_exported(OpMod, match, 1) of + true -> + do_match(OpMod, Tags); + false -> + AllMod = ctrace_config:filter_module_name("all"), + case erlang:function_exported(AllMod, match, 1) of + true -> do_match(AllMod, Tags); + false -> false + end + end. + + +do_match(Mod, Tags) -> + case Mod:match(Tags) of + true -> + true; + false -> + false; + Rate when is_float(Rate) -> + rand:uniform() =< Rate + end. + + +add_tags(Options, ExtraTags) -> + case lists:keytake(tags, 1, Options) of + {value, {tags, T}, Opts} -> + [{tags, maps:merge(T, ExtraTags)} | Opts]; + false -> + [{tags, ExtraTags} | Options] + end. + +add_time(Options) -> + case lists:keymember(time, 1, Options) of + true -> + Options; + false -> + [{time, os:timestamp()} | Options] + end. + +maybe_start_root(Options) -> + case lists:keytake(root, 1, Options) of + {value, {root, Root}, NewOptions} -> + [{child_of, Root} | NewOptions]; + false -> + Options + end. + +fun_to_op(Fun) -> + {module, M} = erlang:fun_info(Fun, module), + {name, F} = erlang:fun_info(Fun, name), + {arity, A} = erlang:fun_info(Fun, arity), + Str = io_lib:format("~s:~s/~b", [M, F, A]), + list_to_atom(lists:flatten(Str)). diff --git a/src/ctrace/src/ctrace.hrl b/src/ctrace/src/ctrace.hrl new file mode 100644 index 000000000..3819bbd50 --- /dev/null +++ b/src/ctrace/src/ctrace.hrl @@ -0,0 +1,15 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(MAIN_TRACER, jaeger_passage_reporter). +-define(IS_ENABLED_KEY, ctrace_is_enabled). +-define(ORIGIN_KEY, ctrace_origin_key). diff --git a/src/ctrace/src/ctrace_app.erl b/src/ctrace/src/ctrace_app.erl new file mode 100644 index 000000000..c98b897e0 --- /dev/null +++ b/src/ctrace/src/ctrace_app.erl @@ -0,0 +1,26 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_app). + +-behaviour(application). + +-export([ + start/2, + stop/1 +]). + +start(_StartType, _StartArgs) -> + ctrace_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/ctrace/src/ctrace_config.erl b/src/ctrace/src/ctrace_config.erl new file mode 100644 index 000000000..bc2a3dff2 --- /dev/null +++ b/src/ctrace/src/ctrace_config.erl @@ -0,0 +1,133 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_config). + +-vsn(1). + +-behaviour(config_listener). + +-export([ + is_enabled/0, + update/0, + + filter_module_name/1 +]). + +-export([ + handle_config_change/5, + handle_config_terminate/3 +]). + +-include("ctrace.hrl"). + + +-spec is_enabled() -> boolean(). +is_enabled() -> + config:get_boolean("tracing", "enabled", false). + + +-spec update() -> ok. +update() -> + case is_enabled() of + true -> + maybe_start_main_tracer(?MAIN_TRACER), + + CompiledFilters = get_compiled_filters(), + + RemovedFilters = lists:foldl(fun({OperationId, FilterDef}, Acc) -> + case compile_filter(OperationId, FilterDef) of + true -> Acc -- [OperationId]; + false -> Acc + end + end, CompiledFilters, config:get("tracing.filters")), + + lists:foreach(fun(OperationId) -> + ModName = filter_module_name(OperationId), + code:delete(ModName), + code:purge(ModName) + end, RemovedFilters), + + case config:get("tracing.filters", "all") of + undefined -> compile_filter("all", "(#{}) -> false"); + _ -> ok + end; + + false -> + jaeger_passage:stop_tracer(?MAIN_TRACER) + end, + ok. + + +-spec filter_module_name(atom() | string()) -> atom(). +filter_module_name(OperationId) when is_atom(OperationId) -> + filter_module_name(atom_to_list(OperationId)); +filter_module_name(OperationId) -> + list_to_atom("ctrace_filter_" ++ OperationId). + + +handle_config_change("tracing", "enabled", _, _Persist, St) -> + update(), + {ok, St}; +handle_config_change("tracing.filters", _Key, _Val, _Persist, St) -> + update(), + {ok, St}; +handle_config_change(_Sec, _Key, _Val, _Persist, St) -> + {ok, St}. + +handle_config_terminate(_Server, _Reason, _State) -> + update(). + + +maybe_start_main_tracer(TracerId) -> + case passage_tracer_registry:get_reporter(TracerId) of + error -> + start_main_tracer(TracerId); + _ -> + true + end. + + +start_main_tracer(TracerId) -> + Sampler = passage_sampler_all:new(), + Options = [ + {thrift_format, + list_to_atom(config:get("tracing", "thrift_format", "compact"))}, + {agent_host, config:get("tracing", "agent_host", "127.0.0.1")}, + {agent_port, config:get_integer("tracing", "agent_port", 6831)}, + {default_service_name, + list_to_atom(config:get("tracing", "app_name", "couchdb"))} + ], + ok = jaeger_passage:start_tracer(TracerId, Sampler, Options). + + +compile_filter(OperationId, FilterDef) -> + try + couch_log:info("Compiling filter : ~s", [OperationId]), + ctrace_dsl:compile(OperationId, FilterDef), + true + catch throw:{error, Reason} -> + couch_log:error("Cannot compile ~s :: ~s~n", [OperationId, Reason]), + false + end. + + +get_compiled_filters() -> + lists:foldl(fun({Mod, _Path}, Acc) -> + ModStr = atom_to_list(Mod), + case ModStr of + "ctrace_filter_" ++ OpName -> + [OpName | Acc]; + _ -> + Acc + end + end, [], code:all_loaded()). diff --git a/src/ctrace/src/ctrace_dsl.erl b/src/ctrace/src/ctrace_dsl.erl new file mode 100644 index 000000000..5e0b0f252 --- /dev/null +++ b/src/ctrace/src/ctrace_dsl.erl @@ -0,0 +1,106 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_dsl). +-include_lib("syntax_tools/include/merl.hrl"). + +-export([ + compile/2, + + % Debug + source/2 +]). + + +-type ast() :: erl_syntax:syntaxTree(). + + +-spec compile(OperationId :: string(), FilterDef :: string()) -> ok. +compile(OperationId, FilterDef) -> + AST = parse_filter(OperationId, FilterDef), + merl:compile_and_load(AST), + ok. + + +-spec source(OperationId :: string(), FilterDef :: string()) -> string(). +source(OperationId, FilterDef) -> + AST = parse_filter(OperationId, FilterDef), + Options = [{paper, 160}, {ribbon, 80}], + erl_prettypr:format(erl_syntax:form_list(AST), Options). + + +-spec parse_filter(OperationId :: string(), FilterDef :: string()) -> [ast()]. +parse_filter(OperationId, FilterDef) -> + AST = merl:quote("match" ++ FilterDef ++ "."), + case AST of + ?Q("match(_@Args) when _@__@Guard -> _@Return.") + when erl_syntax:type(Args) == map_expr -> + validate_args(Args), + validate_return(Return), + generate(OperationId, Args, Guard, Return); + ?Q("match(_@Args) when _@__@Guard -> _@@_.") -> + fail("The only argument of the filter should be map"); + ?Q("match(_@@Args) when _@__@Guard -> _@@_.") -> + fail("The arity of the filter function should be 1"); + _ -> + fail("Unknown shape of a filter function") + end. + + +-spec validate_args(MapAST :: ast()) -> ok. +validate_args(MapAST) -> + %% Unfortunatelly merl doesn't seem to support maps + %% so we had to do it manually + lists:foldl(fun(AST, Bindings) -> + erl_syntax:type(AST) == map_field_exact + orelse fail("Only #{field := Var} syntax is supported in the header"), + NameAST = erl_syntax:map_field_exact_name(AST), + erl_syntax:type(NameAST) == atom + orelse fail("Only atoms are supported as field names in the header"), + Name = erl_syntax:atom_value(NameAST), + VarAST = erl_syntax:map_field_exact_value(AST), + erl_syntax:type(VarAST) == variable + orelse fail("Only capitalized names are supported as matching variables in the header"), + Var = erl_syntax:variable_name(VarAST), + maps:is_key(Var, Bindings) + andalso fail("'~s' variable is already in use", [Var]), + Bindings#{Var => Name} + end, #{}, erl_syntax:map_expr_fields(MapAST)). + + +-spec validate_return(Return :: [ast()]) -> ok. +validate_return(Return) -> + case Return of + ?Q("true") -> ok; + ?Q("false") -> ok; + ?Q("_@AST") when erl_syntax:type(AST) == float -> ok; + _ -> + fail("Unsupported return value '~s'", [erl_prettypr:format(Return)]) + end. + + +generate(OperationId, Args, Guard, Return) -> + ModuleName = ctrace_config:filter_module_name(OperationId), + Module = ?Q("-module('@ModuleName@')."), + Export = ?Q("-export([match/1])."), + Function = erl_syntax:function(merl:term(match), [ + ?Q("(_@Args) when _@__@Guard -> _@Return"), + ?Q("(_) -> false") + ]), + lists:flatten([Module, Export, Function]). + + +fail(Msg) -> + throw({error, Msg}). + +fail(Msg, Args) -> + throw({error, lists:flatten(io_lib:format(Msg, Args))}). \ No newline at end of file diff --git a/src/ctrace/src/ctrace_sup.erl b/src/ctrace/src/ctrace_sup.erl new file mode 100644 index 000000000..70de3c586 --- /dev/null +++ b/src/ctrace/src/ctrace_sup.erl @@ -0,0 +1,41 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_sup). +-behaviour(supervisor). +-vsn(1). + +-export([ + start_link/0, + init/1 +]). + +start_link() -> + ctrace_config:update(), + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 5, + period => 10 + }, + Children = [ + #{ + id => config_listener_mon, + type => worker, + restart => permanent, + shutdown => 5000, + start => {config_listener_mon, start_link, [ctrace_config, nil]} + } + ], + {ok, {Flags, Children}}. \ No newline at end of file diff --git a/src/ctrace/test/ctrace_config_test.erl b/src/ctrace/test/ctrace_config_test.erl new file mode 100644 index 000000000..0827013fd --- /dev/null +++ b/src/ctrace/test/ctrace_config_test.erl @@ -0,0 +1,153 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_config_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("ctrace/src/ctrace.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +ctrace_config_test_() -> + { + "Test ctrace_config", + { + setup, + fun setup/0, + fun cleanup/1, + [ + ?TDEF(ensure_main_tracer_started), + ?TDEF(ensure_all_supported), + ?TDEF(handle_all_syntax_error_supported), + ?TDEF(ensure_filter_updated), + ?TDEF(ensure_filter_removed), + ?TDEF(ensure_bad_filter_ignored) + ] + } + }. + + +setup() -> + Ctx = test_util:start_couch([ctrace]), + + config_set("tracing", "enabled", "true"), + + Filter = "(#{method := M}) when M == get -> true", + config_set("tracing.filters", "base", Filter), + + ctrace_config:update(), + + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +ensure_main_tracer_started() -> + ?assertMatch( + {ok, _}, + passage_tracer_registry:get_reporter(?MAIN_TRACER) + ). + + +ensure_all_supported() -> + config:delete("tracing.filters", "all", false), + test_util:wait_value(fun() -> + config:get("tracing.filters", "all") + end, undefined), + ctrace_config:update(), + + ?assertEqual(false, ctrace:match(bam, #{gee => whiz})), + + Filter = "(#{}) -> true", + config_set("tracing.filters", "all", Filter), + ctrace_config:update(), + + ?assertEqual(true, ctrace:match(bam, #{gee => whiz})). + + +handle_all_syntax_error_supported() -> + couch_log:error("XKCD: TEST START", []), + config:delete("tracing.filters", "all", false), + test_util:wait_value(fun() -> + config:get("tracing.filters", "all") + end, undefined), + ctrace_config:update(), + + ?assertEqual(false, ctrace:match(bam, #{gee => whiz})), + + Filter = "( -> true.", + config_set("tracing.filters", "all", Filter), + ctrace_config:update(), + + % If there's a syntax in the `all` handler + % then we default to not generating traces + ?assertEqual(false, ctrace:match(bam, #{gee => whiz})), + + couch_log:error("XKCD: TEST END", []), + config:delete("tracing.filters", "all", false). + + +ensure_filter_updated() -> + Filter1 = "(#{}) -> true", + config_set("tracing.filters", "bing", Filter1), + ctrace_config:update(), + + ?assertEqual(true, ctrace:match(bing, #{gee => whiz})), + + Filter2 = "(#{}) -> false", + config_set("tracing.filters", "bing", Filter2), + ctrace_config:update(), + + ?assertEqual(false, ctrace:match(bing, #{gee => whiz})). + + +ensure_filter_removed() -> + Filter = "(#{}) -> true", + config_set("tracing.filters", "bango", Filter), + ctrace_config:update(), + + ?assertEqual(true, ctrace:match(bango, #{gee => whiz})), + + config:delete("tracing.filters", "bango", false), + test_util:wait_value(fun() -> + config:get("tracing.filters", "bango") + end, undefined), + ctrace_config:update(), + + FilterMod = ctrace_config:filter_module_name("bango"), + ?assertEqual(false, code:is_loaded(FilterMod)). + + +ensure_bad_filter_ignored() -> + Filter = "#foo stuff", + config_set("tracing.filters", "compile_error", Filter), + ctrace_config:update(), + + FilterMod = ctrace_config:filter_module_name("compile_error"), + ?assertEqual(false, code:is_loaded(FilterMod)), + + AllMod = ctrace_config:filter_module_name(all), + ?assertMatch({file, _}, code:is_loaded(AllMod)). + + +config_set(Section, Key, Value) -> + PrevValue = config:get(Section, Key), + if Value == PrevValue -> ok; true -> + config:set(Section, Key, Value, false), + test_util:wait_other_value(fun() -> + config:get(Section, Key) + end, PrevValue) + end. diff --git a/src/ctrace/test/ctrace_dsl_test.erl b/src/ctrace/test/ctrace_dsl_test.erl new file mode 100644 index 000000000..601e6cd17 --- /dev/null +++ b/src/ctrace/test/ctrace_dsl_test.erl @@ -0,0 +1,123 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_dsl_test). + + +-include_lib("eunit/include/eunit.hrl"). + + +simple_parse_and_compile_test() -> + Filter = "(#{'http.method' := Method}) when Method == get -> 1.0", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(1.0, run_filter("foo", #{'http.method' => get})), + ?assertEqual(false, run_filter("foo", #{'httpd.method' => put})). + + +empty_map_test() -> + Filter = "(#{}) -> true", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(true, run_filter("foo", #{})), + ?assertEqual(true, run_filter("foo", #{foo => bar})), + ?assertEqual(false, run_filter("foo", nil)). + + +return_false_test() -> + Filter = "(#{}) -> false", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(false, run_filter("foo", #{})), + ?assertEqual(false, run_filter("foo", nil)). + + +return_float_test() -> + Filter = "(#{}) -> 0.2", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(0.2, run_filter("foo", #{})), + ?assertEqual(false, run_filter("foo", nil)). + + +bad_filter_body_is_list_test() -> + Filter = "(#{}) -> []", + Error = "Unsupported return value '[]'", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_filter_body_has_calls_test() -> + Filter = "(#{}) -> [module:function()]", + Error = "Unsupported return value '[module:function()]'", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_arg_list_too_few_test() -> + Filter = "() -> true", + Error = "The arity of the filter function should be 1", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_arg_list_too_many_test() -> + Filter = "(#{}, foo) -> true", + Error = "The arity of the filter function should be 1", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_arg_type_test() -> + Filters = [ + "(atom) -> true", + "([atom]) -> true", + "(1) -> true", + "(1.0) -> true" + ], + Error = "The only argument of the filter should be map", + lists:foreach(fun(Filter) -> + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)) + end, Filters). + + +bad_map_association_test() -> + Filter = "(#{foo => Var}) -> true", + Error = "Only #{field := Var} syntax is supported in the header", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_field_variable_test() -> + Filter = "(#{Var := Val}) -> false", + Error = "Only atoms are supported as field names in the header", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_field_match_test() -> + Filter = "(#{foo := 2}) -> true", + Error = "Only capitalized names are supported" + " as matching variables in the header", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +repeated_variable_test() -> + Filter = "(#{foo := Val, bar := Val}) -> true", + Error = "'Val' variable is already in use", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +code_coverage1_test() -> + Filter = "foo(#{}) -> bar", + Error = "Unknown shape of a filter function", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +code_coverage2_test() -> + Filter = "(#{}) -> true", + ?assertMatch([_ | _], ctrace_dsl:source("foo", Filter)). + + +run_filter(OperationId, Value) -> + ModName = ctrace_config:filter_module_name(OperationId), + ModName:match(Value). diff --git a/src/ctrace/test/ctrace_test.erl b/src/ctrace/test/ctrace_test.erl new file mode 100644 index 000000000..962f9aae3 --- /dev/null +++ b/src/ctrace/test/ctrace_test.erl @@ -0,0 +1,412 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("ctrace/src/ctrace.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +ctrace_config_test_() -> + { + "Test ctrace", + { + setup, + fun setup/0, + fun cleanup/1, + [ + ?TDEF(is_enabled_cached), + ?TDEF(simple_with_span), + ?TDEF(with_span_exception), + ?TDEF(simple_start_finish_span), + ?TDEF(op_name_from_fun), + ?TDEF(skipped_when_disabled), + ?TDEF(include_or_skip_on_sampled), + ?TDEF(set_tags_on_start_span), + ?TDEF(set_time_on_start_span), + ?TDEF(skip_on_filtered), + ?TDEF(simple_child_span), + ?TDEF(update_tags), + ?TDEF(update_logs), + ?TDEF(current_span_getters), + ?TDEF(create_external_span), + ?TDEF(use_external_span) + ] + } + }. + + +setup() -> + Ctx = test_util:start_couch([ctrace]), + + config_set("tracing", "enabled", "true"), + + Filter = "(#{}) -> true", + config_set("tracing.filters", "all", Filter), + + ctrace_config:update(), + + MainReporter = passage_tracer_registry:get_reporter(?MAIN_TRACER), + + {MainReporter, Ctx}. + + +cleanup({MainReporter, Ctx}) -> + passage_tracer_registry:set_reporter(?MAIN_TRACER, MainReporter), + test_util:stop_couch(Ctx). + + +is_enabled_cached() -> + erase(?IS_ENABLED_KEY), + Result = ctrace:is_enabled(), + ?assertEqual(Result, get(?IS_ENABLED_KEY)), + ?assert(is_boolean(Result)), + + % Fake override to test that we're using the cached value + put(?IS_ENABLED_KEY, not Result), + ?assertEqual(not Result, ctrace:is_enabled()), + + % Revert to original to not mess with other tests + put(?IS_ENABLED_KEY, Result). + + +simple_with_span() -> + set_self_reporter(), + + Result = ctrace:with_span(zing, fun() -> + a_result + end), + + ?assertEqual(a_result, Result), + + receive + {span, Span} -> + ?assertEqual(zing, passage_span:get_operation_name(Span)) + end. + + +with_span_exception() -> + set_self_reporter(), + + Result = try + ctrace:with_span(zab, fun() -> + throw(foo) + end) + catch T:R -> + {T, R} + end, + + ?assertEqual({throw, foo}, Result), + + receive + {span, Span} -> + ?assertEqual(zab, passage_span:get_operation_name(Span)), + ?assertMatch( + [ + {#{ + 'error.kind' := throw, + event := error, + message := foo, + stack := [_ | _] + }, _TimeStamp} + ], + passage_span:get_logs(Span) + ) + end. + + +simple_start_finish_span() -> + set_self_reporter(), + + ctrace:start_span(foo), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual(foo, passage_span:get_operation_name(Span)) + end. + + +op_name_from_fun() -> + set_self_reporter(), + + ctrace:start_span(fun ctrace:match/2), + ctrace:finish_span(), + + receive + {span, Span} -> + OpName = passage_span:get_operation_name(Span), + ?assertEqual('ctrace:match/2', OpName) + end. + + +skipped_when_disabled() -> + set_self_reporter(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(foo), + ?assert(ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + receive {span, _Span} -> ok end, + + IsEnabled = get(?IS_ENABLED_KEY), + try + put(?IS_ENABLED_KEY, false), + + ?assert(not ctrace:has_span()), + ctrace:start_span(foo), + ?assert(not ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()) + after + put(?IS_ENABLED_KEY, IsEnabled) + end. + + +set_tags_on_start_span() -> + set_self_reporter(), + + Tags = #{foo => bar}, + ctrace:start_span(bang, [{tags, Tags}]), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual(bang, passage_span:get_operation_name(Span)), + ?assertEqual(#{foo => bar}, passage_span:get_tags(Span)) + end. + + +set_time_on_start_span() -> + set_self_reporter(), + + Time = os:timestamp(), + timer:sleep(100), + ctrace:start_span(bang, [{time, Time}]), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual(Time, passage_span:get_start_time(Span)) + end. + + +skip_on_filtered() -> + set_self_reporter(), + + config_set("tracing.filters", "do_skip", "(#{}) -> false"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(do_skip), + ?assert(not ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()). + + +include_or_skip_on_sampled() -> + set_self_reporter(), + + config_set("tracing.filters", "sample", "(#{}) -> 0.0"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(sample), + ?assert(not ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + config_set("tracing.filters", "sample", "(#{}) -> 1.0"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(sample), + ?assert(ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + receive + {span, Span1} -> + ?assertEqual(sample, passage_span:get_operation_name(Span1)) + end, + + config_set("tracing.filters", "sample", "(#{}) -> 0.5"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(sample), + IsSampled = ctrace:has_span(), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + if not IsSampled -> ok; true -> + receive + {span, Span2} -> + ?assertEqual( + sample, + passage_span:get_operation_name(Span2) + ) + end + end. + + +simple_child_span() -> + set_self_reporter(), + + ctrace:start_span(parent), + ctrace:start_span(child), + ctrace:finish_span(), + ctrace:finish_span(), + + receive + {span, CSpan} -> + ?assertEqual(child, passage_span:get_operation_name(CSpan)) + end, + + receive + {span, PSpan} -> + ?assertEqual(parent, passage_span:get_operation_name(PSpan)) + end. + + +update_tags() -> + set_self_reporter(), + + ctrace:start_span(foo, [{tags, #{foo => bar}}]), + ctrace:tag(#{bango => bongo}), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual( + #{foo => bar, bango => bongo}, + passage_span:get_tags(Span) + ) + end. + + +update_logs() -> + set_self_reporter(), + + ctrace:start_span(foo), + ctrace:log(#{foo => bar}), + ctrace:finish_span(), + + receive + {span, Span1} -> + ?assertMatch( + [{#{foo := bar}, _TimeStamp}], + passage_span:get_logs(Span1) + ) + end, + + ctrace:start_span(foo), + ctrace:log(fun() -> + #{foo => baz} + end), + ctrace:finish_span(), + + receive + {span, Span2} -> + ?assertMatch( + [{#{foo := baz}, _TimeStamp}], + passage_span:get_logs(Span2) + ) + end. + + +current_span_getters() -> + ?assertEqual(false, ctrace:has_span()), + ?assertEqual(undefined, ctrace:tags()), + ?assertEqual(undefined, ctrace:refs()), + ?assertEqual(undefined, ctrace:operation_name()), + ?assertEqual(undefined, ctrace:trace_id()), + ?assertEqual(undefined, ctrace:span_id()), + ?assertEqual(undefined, ctrace:tracer()), + ?assertEqual(undefined, ctrace:context()), + + ctrace:start_span(parent), + ctrace:start_span(child, [{tags, #{foo => oof}}]), + + ?assertEqual(true, ctrace:has_span()), + ?assertEqual(#{foo => oof, origin => <<"parent">>}, ctrace:tags()), + ?assertMatch([{child_of, _} | _], ctrace:refs()), + ?assertEqual(child, ctrace:operation_name()), + ?assert(is_integer(ctrace:trace_id())), + ?assert(is_integer(ctrace:span_id())), + ?assertEqual(?MAIN_TRACER, ctrace:tracer()), + ?assertNotEqual(undefined, ctrace:context()), + + ctrace:finish_span(), + ctrace:finish_span(), + + receive + {span, CSpan} -> + ?assertEqual(child, passage_span:get_operation_name(CSpan)) + end, + + receive + {span, PSpan} -> + ?assertEqual(parent, passage_span:get_operation_name(PSpan)) + end. + + +create_external_span() -> + Span1 = ctrace:external_span(1, 2, 3), + Ctx1 = passage_span:get_context(Span1), + ?assertEqual(1, jaeger_passage_span_context:get_trace_id(Ctx1)), + ?assertEqual(2, jaeger_passage_span_context:get_span_id(Ctx1)), + + Span2 = ctrace:external_span(42, undefined, undefined), + Ctx2 = passage_span:get_context(Span2), + ?assertEqual(42, jaeger_passage_span_context:get_trace_id(Ctx2)), + ?assert(is_integer(jaeger_passage_span_context:get_span_id(Ctx2))). + + +use_external_span() -> + Parent = ctrace:external_span(1, 2, 3), + + ?assert(not ctrace:has_span()), + ctrace:start_span(foo, [{root, Parent}]), + ?assert(ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + receive + {span, Span} -> + Ctx = passage_span:get_context(Span), + TraceId = jaeger_passage_span_context:get_trace_id(Ctx), + ?assertEqual(1, TraceId) + end. + + +config_set(Section, Key, Value) -> + PrevValue = config:get(Section, Key), + if Value == PrevValue -> ok; true -> + config:set(Section, Key, Value, false), + test_util:wait_other_value(fun() -> + config:get(Section, Key) + end, PrevValue) + end. + + +set_self_reporter() -> + SelfReporter = passage_reporter_process:new(self(), span), + passage_tracer_registry:set_reporter(?MAIN_TRACER, SelfReporter), + test_util:wait_value(fun() -> + {ok, Result} = passage_tracer_registry:get_reporter(?MAIN_TRACER), + Result + end, SelfReporter). \ No newline at end of file -- cgit v1.2.1 From 0521bcaabcdbea6387b85c79be4d4fe424c053fc Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 12 Nov 2019 16:21:36 +0000 Subject: Trace http endpoints --- src/chttpd/src/chttpd.app.src | 1 + src/chttpd/src/chttpd.erl | 114 +++++++++++++++++++++++++++++-- src/chttpd/src/chttpd_handlers.erl | 7 +- src/chttpd/src/chttpd_httpd_handlers.erl | 30 +++++++- 4 files changed, 146 insertions(+), 6 deletions(-) diff --git a/src/chttpd/src/chttpd.app.src b/src/chttpd/src/chttpd.app.src index 3526745df..af330e0df 100644 --- a/src/chttpd/src/chttpd.app.src +++ b/src/chttpd/src/chttpd.app.src @@ -26,6 +26,7 @@ couch_stats, config, couch, + ctrace, ets_lru, fabric ]}, diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index a15537f85..625e4eb55 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -259,6 +259,7 @@ handle_request_int(MochiReq) -> case after_request(HttpReq2, HttpResp) of #httpd_resp{status = ok, response = Resp} -> + span_ok(HttpResp), {ok, Resp}; #httpd_resp{status = aborted, reason = Reason} -> couch_log:error("Response abnormally terminated: ~p", [Reason]), @@ -266,6 +267,7 @@ handle_request_int(MochiReq) -> end. before_request(HttpReq) -> + ctrace:is_enabled() andalso start_span(HttpReq), try chttpd_stats:init(), chttpd_plugin:before_request(HttpReq) @@ -316,6 +318,8 @@ process_request(#httpd{mochi_req = MochiReq} = HttpReq) -> end. handle_req_after_auth(HandlerKey, HttpReq) -> + #httpd{user_ctx = #user_ctx{name = User}} = HttpReq, + ctrace:tag(#{user => User}), try HandlerFun = chttpd_handlers:url_handler(HandlerKey, fun chttpd_db:handle_request/1), @@ -1052,16 +1056,20 @@ send_error(#httpd{} = Req, Code, ErrorStr, ReasonStr) -> send_error(Req, Code, [], ErrorStr, ReasonStr, []). send_error(Req, Code, Headers, ErrorStr, ReasonStr, []) -> - send_json(Req, Code, Headers, + Return = send_json(Req, Code, Headers, {[{<<"error">>, ErrorStr}, - {<<"reason">>, ReasonStr}]}); + {<<"reason">>, ReasonStr}]}), + span_error(Code, ErrorStr, ReasonStr, []), + Return; send_error(Req, Code, Headers, ErrorStr, ReasonStr, Stack) -> log_error_with_stack_trace({ErrorStr, ReasonStr, Stack}), - send_json(Req, Code, [stack_trace_id(Stack) | Headers], + Return = send_json(Req, Code, [stack_trace_id(Stack) | Headers], {[{<<"error">>, ErrorStr}, {<<"reason">>, ReasonStr} | case Stack of [] -> []; _ -> [{<<"ref">>, stack_hash(Stack)}] end - ]}). + ]}), + span_error(Code, ErrorStr, ReasonStr, Stack), + Return. update_timeout_stats(<<"timeout">>, #httpd{requested_path_parts = PathParts}) -> update_timeout_stats(PathParts); @@ -1230,6 +1238,104 @@ maybe_trace_fdb("true") -> maybe_trace_fdb(_) -> ok. +start_span(Req) -> + #httpd{ + mochi_req = MochiReq, + begin_ts = Begin, + peer = Peer, + nonce = Nonce, + method = Method, + path_parts = PathParts + } = Req, + {OperationName, ExtraTags} = get_action(Req), + Tags = maps:merge(#{ + peer => Peer, + 'http.method' => Method, + nonce => Nonce, + 'http.url' => MochiReq:get(raw_path), + path_parts => PathParts, + 'span.kind' => <<"server">>, + component => <<"couchdb.chttpd">> + }, ExtraTags), + + ctrace:start_span(OperationName, [ + {tags, Tags}, + {time, Begin} + ] ++ maybe_root_span(MochiReq)). + +maybe_root_span(MochiReq) -> + case get_trace_headers(MochiReq) of + [undefined, _, _] -> + []; + [TraceId, SpanId, ParentSpanId] -> + Span = ctrace:external_span(TraceId, SpanId, ParentSpanId), + [{root, Span}] + end. + +parse_trace_id(undefined) -> + undefined; +parse_trace_id(Hex) -> + to_int(Hex, 32). + +parse_span_id(undefined) -> + undefined; +parse_span_id(Hex) -> + to_int(Hex, 16). + +to_int(Hex, N) when length(Hex) =:= N -> + try + list_to_integer(Hex, 16) + catch error:badarg -> + undefined + end. + +get_trace_headers(MochiReq) -> + case MochiReq:get_header_value("b3") of + undefined -> + [ + parse_trace_id(MochiReq:get_header_value("X-B3-TraceId")), + parse_span_id(MochiReq:get_header_value("X-B3-SpanId")), + parse_span_id(MochiReq:get_header_value("X-B3-ParentSpanId")) + ]; + Value -> + case binary:split(Value, <<"-">>, [global]) of + [TraceIdStr, SpanIdStr, _SampledStr, ParentSpanIdStr] -> + [ + parse_trace_id(TraceIdStr), + parse_span_id(SpanIdStr), + parse_span_id(ParentSpanIdStr) + ]; + _ -> + [undefined, undefined, undefined] + end + end. + +get_action(#httpd{} = Req) -> + try + chttpd_handlers:handler_info(Req) + catch Tag:Error -> + couch_log:error("Cannot set tracing action ~p:~p", [Tag, Error]), + {undefind, #{}} + end. + +span_ok(#httpd_resp{code = Code}) -> + ctrace:tag(#{ + error => false, + 'http.status_code' => Code + }), + ctrace:finish_span(). + +span_error(Code, ErrorStr, ReasonStr, Stack) -> + ctrace:tag(#{ + error => true, + 'http.status_code' => Code + }), + ctrace:log(#{ + 'error.kind' => ErrorStr, + message => ReasonStr, + stack => Stack + }), + ctrace:finish_span(). -ifdef(TEST). diff --git a/src/chttpd/src/chttpd_handlers.erl b/src/chttpd/src/chttpd_handlers.erl index 930563230..c07b2097b 100644 --- a/src/chttpd/src/chttpd_handlers.erl +++ b/src/chttpd/src/chttpd_handlers.erl @@ -15,7 +15,8 @@ -export([ url_handler/2, db_handler/2, - design_handler/2 + design_handler/2, + handler_info/1 ]). -define(SERVICE_ID, chttpd_handlers). @@ -35,6 +36,10 @@ db_handler(HandlerKey, DefaultFun) -> design_handler(HandlerKey, DefaultFun) -> select(collect(design_handler, [HandlerKey]), DefaultFun). +handler_info(HttpReq) -> + Default = {'unknown.unknown', #{}}, + select(collect(handler_info, [HttpReq]), Default). + %% ------------------------------------------------------------------ %% Internal Function Definitions %% ------------------------------------------------------------------ diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 5e86ea87d..54d6dfce2 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -12,7 +12,10 @@ -module(chttpd_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/1]). + +-include_lib("couch/include/couch_db.hrl"). + url_handler(<<>>) -> fun chttpd_misc:handle_welcome_req/1; url_handler(<<"favicon.ico">>) -> fun chttpd_misc:handle_favicon_req/1; @@ -44,3 +47,28 @@ design_handler(<<"_update">>) -> fun chttpd_show:handle_doc_update_req/3; design_handler(<<"_info">>) -> fun chttpd_db:handle_design_info_req/3; design_handler(<<"_rewrite">>) -> fun chttpd_rewrite:handle_rewrite_req/3; design_handler(_) -> no_match. + +%% TODO Populate in another PR +handler_info(#httpd{path_parts=[<<"_all_dbs">>], method=Method}) + when Method =:= 'HEAD' orelse Method =:= 'GET' -> + {'all-dbs.read', #{}}; + +handler_info(#httpd{path_parts=[<<"_session">>], method=Method}) + when Method =:= 'HEAD' orelse Method =:= 'GET' -> + {'session.read', #{}}; +handler_info(#httpd{path_parts=[<<"_session">>], method='POST'}) -> + {'session.write', #{}}; +handler_info(#httpd{path_parts=[<<"_session">>], method='DELETE'}) -> + {'session.delete', #{}}; + +handler_info(#httpd{path_parts=[_Db], method=Method}) + when Method =:= 'HEAD' orelse Method =:= 'GET' -> + {'database-info.read', #{}}; +handler_info(#httpd{path_parts=[_Db], method='POST'}) -> + {'document.write', #{}}; +handler_info(#httpd{path_parts=[_Db], method='PUT'}) -> + {'database.create', #{}}; +handler_info(#httpd{path_parts=[_Db], method='DELETE'}) -> + {'database.delete', #{}}; + +handler_info(_) -> no_match. -- cgit v1.2.1 From 8232be8e2107a7b182170e86c997bce67b3c1ca3 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 12 Nov 2019 16:21:55 +0000 Subject: Trace fdb transactions --- src/fabric/src/fabric.app.src | 1 + src/fabric/src/fabric2_fdb.erl | 47 ++++++++++++++++++++++------ src/fabric/test/fabric2_dir_prefix_tests.erl | 2 +- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/fabric/src/fabric.app.src b/src/fabric/src/fabric.app.src index 77260f962..0538b19b4 100644 --- a/src/fabric/src/fabric.app.src +++ b/src/fabric/src/fabric.app.src @@ -23,6 +23,7 @@ config, couch_epi, couch, + ctrace, rexi, mem3, couch_log, diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 0d741385c..fb2891be7 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -74,12 +74,15 @@ transactional(Fun) -> transactional(DbName, Options, Fun) when is_binary(DbName) -> - transactional(fun(Tx) -> - Fun(init_db(Tx, DbName, Options)) + with_span(Fun, #{'db.name' => DbName}, fun() -> + transactional(fun(Tx) -> + Fun(init_db(Tx, DbName, Options)) + end) end). transactional(#{tx := undefined} = Db, Fun) -> + DbName = maps:get(name, Db, undefined), try Db1 = refresh(Db), Reopen = maps:get(reopen, Db1, false), @@ -88,18 +91,25 @@ transactional(#{tx := undefined} = Db, Fun) -> true -> undefined; false -> maps:get(layer_prefix, Db2) end, - do_transaction(fun(Tx) -> - case Reopen of - true -> Fun(reopen(Db2#{tx => Tx})); - false -> Fun(Db2#{tx => Tx}) - end - end, LayerPrefix) + with_span(Fun, #{'db.name' => DbName}, fun() -> + do_transaction(fun(Tx) -> + case Reopen of + true -> Fun(reopen(Db2#{tx => Tx})); + false -> Fun(Db2#{tx => Tx}) + end + end, LayerPrefix) + end) catch throw:{?MODULE, reopen} -> - transactional(Db#{reopen => true}, Fun) + with_span('db.reopen', #{'db.name' => DbName}, fun() -> + transactional(Db#{reopen => true}, Fun) + end) end; transactional(#{tx := {erlfdb_transaction, _}} = Db, Fun) -> - Fun(Db). + DbName = maps:get(name, Db, undefined), + with_span(Fun, #{'db.name' => DbName}, fun() -> + Fun(Db) + end). do_transaction(Fun, LayerPrefix) when is_function(Fun, 1) -> @@ -1384,3 +1394,20 @@ run_on_commit_fun(Tx) -> Fun(), ok end. + +with_span(Operation, ExtraTags, Fun) -> + case ctrace:has_span() of + true -> + Tags = maps:merge(#{ + 'span.kind' => <<"client">>, + component => <<"couchdb.fabric">>, + 'db.instance' => fabric2_server:fdb_cluster(), + 'db.namespace' => fabric2_server:fdb_directory(), + 'db.type' => <<"fdb">>, + nonce => get(nonce), + pid => self() + }, ExtraTags), + ctrace:with_span(Operation, Tags, Fun); + false -> + Fun() + end. diff --git a/src/fabric/test/fabric2_dir_prefix_tests.erl b/src/fabric/test/fabric2_dir_prefix_tests.erl index c7bc8bba4..e4e78a338 100644 --- a/src/fabric/test/fabric2_dir_prefix_tests.erl +++ b/src/fabric/test/fabric2_dir_prefix_tests.erl @@ -28,7 +28,7 @@ dir_prefix_test_() -> % erlfdb, rexi and mem3 are all dependent apps for fabric. We make % sure to start them so when fabric is started during the test it % already has its dependencies - test_util:start_couch([erlfdb, rexi, mem3]) + test_util:start_couch([erlfdb, rexi, mem3, ctrace]) end, fun(Ctx) -> config:delete("fabric", "fdb_directory"), -- cgit v1.2.1 From 8163d998dbde575172cc4217f06e003b95cb45ad Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 21 Nov 2019 13:03:36 -0500 Subject: Implement node types The implementation follows the RFC [1] [1]: https://github.com/apache/couchdb-documentation/blob/master/rfcs/013-node-types.md --- src/chttpd/src/chttpd_app.erl | 4 +- src/chttpd/src/chttpd_sup.erl | 16 ++++-- src/couch_views/src/couch_views_app.erl | 4 +- src/couch_views/src/couch_views_sup.erl | 32 ++++++++---- src/fabric/src/fabric2_node_types.erl | 52 ++++++++++++++++++++ src/fabric/test/fabric2_node_types_tests.erl | 73 ++++++++++++++++++++++++++++ 6 files changed, 163 insertions(+), 18 deletions(-) create mode 100644 src/fabric/src/fabric2_node_types.erl create mode 100644 src/fabric/test/fabric2_node_types_tests.erl diff --git a/src/chttpd/src/chttpd_app.erl b/src/chttpd/src/chttpd_app.erl index d7a5aef86..770b78ef9 100644 --- a/src/chttpd/src/chttpd_app.erl +++ b/src/chttpd/src/chttpd_app.erl @@ -14,8 +14,8 @@ -behaviour(application). -export([start/2, stop/1]). -start(_Type, StartArgs) -> - chttpd_sup:start_link(StartArgs). +start(_Type, _StartArgs) -> + chttpd_sup:start_link(). stop(_State) -> ok. diff --git a/src/chttpd/src/chttpd_sup.erl b/src/chttpd/src/chttpd_sup.erl index d4bdb118c..8b51e6c40 100644 --- a/src/chttpd/src/chttpd_sup.erl +++ b/src/chttpd/src/chttpd_sup.erl @@ -18,17 +18,25 @@ -export([init/1]). --export([start_link/1]). +-export([start_link/0]). -export([handle_config_change/5, handle_config_terminate/3]). %% Helper macro for declaring children of supervisor -define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 100, Type, [I]}). -start_link(Args) -> - supervisor:start_link({local,?MODULE}, ?MODULE, Args). +start_link() -> + Arg = case fabric2_node_types:is_type(api_frontend) of + true -> normal; + false -> disabled + end, + supervisor:start_link({local,?MODULE}, ?MODULE, Arg). -init([]) -> +init(disabled) -> + couch_log:notice("~p : api_frontend disabled", [?MODULE]), + {ok, {{one_for_one, 3, 10}, []}}; + +init(normal) -> Children = [ { config_listener_mon, diff --git a/src/couch_views/src/couch_views_app.erl b/src/couch_views/src/couch_views_app.erl index 5ede5ef85..7337d0580 100644 --- a/src/couch_views/src/couch_views_app.erl +++ b/src/couch_views/src/couch_views_app.erl @@ -23,8 +23,8 @@ ]). -start(_StartType, StartArgs) -> - couch_views_sup:start_link(StartArgs). +start(_StartType, _StartArgs) -> + couch_views_sup:start_link(). stop(_State) -> diff --git a/src/couch_views/src/couch_views_sup.erl b/src/couch_views/src/couch_views_sup.erl index 7650fdf14..7a72a1f33 100644 --- a/src/couch_views/src/couch_views_sup.erl +++ b/src/couch_views/src/couch_views_sup.erl @@ -18,7 +18,7 @@ -export([ - start_link/1 + start_link/0 ]). @@ -27,20 +27,32 @@ ]). -start_link(Args) -> - supervisor:start_link({local, ?MODULE}, ?MODULE, Args). +start_link() -> + Arg = case fabric2_node_types:is_type(view_indexing) of + true -> normal; + false -> builds_disabled + end, + supervisor:start_link({local, ?MODULE}, ?MODULE, Arg). -init([]) -> - Flags = #{ - strategy => one_for_one, - intensity => 1, - period => 5 - }, +init(normal) -> Children = [ #{ id => couch_views_server, start => {couch_views_server, start_link, []} } ], - {ok, {Flags, Children}}. + {ok, {flags(), Children}}; + +init(builds_disabled) -> + couch_log:notice("~p : view_indexing disabled", [?MODULE]), + couch_views_jobs:set_timeout(), + {ok, {flags(), []}}. + + +flags() -> + #{ + strategy => one_for_one, + intensity => 1, + period => 5 + }. diff --git a/src/fabric/src/fabric2_node_types.erl b/src/fabric/src/fabric2_node_types.erl new file mode 100644 index 000000000..110f04d15 --- /dev/null +++ b/src/fabric/src/fabric2_node_types.erl @@ -0,0 +1,52 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_node_types). + + +-export([ + is_type/1 +]). + + +is_type(Type) when is_atom(Type) -> + case {from_os_env(Type), from_app_env(Type)} of + {V, _} when is_boolean(V) -> + V; + {undefined, V} when is_boolean(V) -> + V; + {undefined, undefined} -> + % When not defined anywhere assume `true`, that is by default a + % node will perform all the background tasks + true + end. + + +from_os_env(Type) when is_atom(Type) -> + StrType = erlang:atom_to_list(Type), + StrTypeUpper = string:to_upper(StrType), + case os:getenv("COUCHDB_NODE_TYPE_" ++ StrTypeUpper) of + false -> + undefined; + Str when is_list(Str) -> + case string:to_lower(Str) of + "false" -> false; + _ -> true + end + end. + + +from_app_env(Type) when is_atom(Type) -> + case application:get_env(fabric, node_types) of + undefined -> undefined; + {ok, Props} when is_list(Props) -> proplists:get_value(Type, Props) + end. diff --git a/src/fabric/test/fabric2_node_types_tests.erl b/src/fabric/test/fabric2_node_types_tests.erl new file mode 100644 index 000000000..ad400f98f --- /dev/null +++ b/src/fabric/test/fabric2_node_types_tests.erl @@ -0,0 +1,73 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_node_types_tests). + + +-include_lib("eunit/include/eunit.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +node_types_test_() -> + { + "Test node types", + foreach, + fun() -> + os:putenv("COUCHDB_NODE_TYPE_FOO", "false"), + os:putenv("COUCHDB_NODE_TYPE_BAZ", "true"), + os:putenv("COUCHDB_NODE_TYPE_ZIG", ""), + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3, ctrace]) + end, + fun(Ctx) -> + ok = application:stop(fabric), + test_util:stop_couch(Ctx), + application:unset_env(fabric, node_types), + os:unsetenv("COUCHDB_NODE_TYPE_FOO"), + os:unsetenv("COUCHDB_NODE_TYPE_BAZ"), + os:unsetenv("COUCHDB_NODE_TYPE_ZIG") + end, + [ + ?TDEF(basics), + ?TDEF(os_env_priority) + ] + }. + + +basics() -> + ok = application:start(fabric), + + % default is true for new types + ?assert(fabric2_node_types:is_type(some_new_node_type)), + + % defined in os env + ?assert(fabric2_node_types:is_type(baz)), + ?assert(not fabric2_node_types:is_type(foo)), + ?assert(fabric2_node_types:is_type(zig)), + + % defined in app env + application:set_env(fabric, node_types, [{zag, true}, {bam, false}]), + ?assert(fabric2_node_types:is_type(zag)), + ?assert(not fabric2_node_types:is_type(bam)). + + +os_env_priority() -> + ok = application:start(fabric), + + % os env takes precedence + application:set_env(fabric, node_types, [{foo, true}, {baz, false}]), + ?assert(not fabric2_node_types:is_type(foo)), + ?assert(fabric2_node_types:is_type(baz)). -- cgit v1.2.1 From 6ff0a112899a8a04d65fb3155b69540862be8c0a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 21 Nov 2019 16:54:44 -0600 Subject: Add operation names for all HTTP endpoints This adds operation names to all valid HTTP end points. This covers all of `make elixir` except for seven requests that are testing specific error conditions in URL and Methods for various endpoints. --- src/chttpd/src/chttpd.erl | 6 +- src/chttpd/src/chttpd_handlers.erl | 18 +- src/chttpd/src/chttpd_httpd_handlers.erl | 454 ++++++++++++++++++++- .../src/global_changes_httpd_handlers.erl | 8 +- src/mango/src/mango_httpd_handlers.erl | 31 +- src/mem3/src/mem3_httpd_handlers.erl | 38 +- src/setup/src/setup_httpd_handlers.erl | 12 +- 7 files changed, 540 insertions(+), 27 deletions(-) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 625e4eb55..7fc9cffd4 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -1248,12 +1248,16 @@ start_span(Req) -> path_parts = PathParts } = Req, {OperationName, ExtraTags} = get_action(Req), + Path = case PathParts of + [] -> <<"">>; + [_ | _] -> filename:join(PathParts) + end, Tags = maps:merge(#{ peer => Peer, 'http.method' => Method, nonce => Nonce, 'http.url' => MochiReq:get(raw_path), - path_parts => PathParts, + path_parts => Path, 'span.kind' => <<"server">>, component => <<"couchdb.chttpd">> }, ExtraTags), diff --git a/src/chttpd/src/chttpd_handlers.erl b/src/chttpd/src/chttpd_handlers.erl index c07b2097b..17d2952b3 100644 --- a/src/chttpd/src/chttpd_handlers.erl +++ b/src/chttpd/src/chttpd_handlers.erl @@ -37,8 +37,24 @@ design_handler(HandlerKey, DefaultFun) -> select(collect(design_handler, [HandlerKey]), DefaultFun). handler_info(HttpReq) -> + #httpd{ + method = Method, + path_parts = PathParts + } = HttpReq, Default = {'unknown.unknown', #{}}, - select(collect(handler_info, [HttpReq]), Default). + try + select(collect(handler_info, [Method, PathParts, HttpReq]), Default) + catch Type:Reason -> + Stack = erlang:get_stacktrace(), + couch_log:error("~s :: handler_info failure for ~p : ~p:~p :: ~p", [ + ?MODULE, + get(nonce), + Type, + Reason, + Stack + ]), + Default + end. %% ------------------------------------------------------------------ %% Internal Function Definitions diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 54d6dfce2..831c014b3 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(chttpd_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1, handler_info/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). -include_lib("couch/include/couch_db.hrl"). @@ -48,27 +48,439 @@ design_handler(<<"_info">>) -> fun chttpd_db:handle_design_info_req/3; design_handler(<<"_rewrite">>) -> fun chttpd_rewrite:handle_rewrite_req/3; design_handler(_) -> no_match. -%% TODO Populate in another PR -handler_info(#httpd{path_parts=[<<"_all_dbs">>], method=Method}) - when Method =:= 'HEAD' orelse Method =:= 'GET' -> - {'all-dbs.read', #{}}; -handler_info(#httpd{path_parts=[<<"_session">>], method=Method}) - when Method =:= 'HEAD' orelse Method =:= 'GET' -> +handler_info('GET', [], _) -> + {'welcome_message.read', #{}}; + +handler_info('GET', [<<"_active_tasks">>], _) -> + {'active_tasks.read', #{}}; + +handler_info('GET', [<<"_all_dbs">>], _) -> + {'all_dbs.read', #{}}; + +handler_info('POST', [<<"_dbs_info">>], _) -> + {'dbs_info.read', #{}}; + +handler_info('GET', [<<"_node">>, <<"_local">>], _) -> + {'node.name.read', #{}}; + +handler_info(Method, [<<"_node">>, <<"_local">> | Rest], HttpReq) -> + handler_info(Method, [<<"_node">>, node() | Rest], HttpReq); + +handler_info('GET', [<<"_node">>, Node, <<"_config">>], _) -> + {'node.config.all.read', #{node => Node}}; + +handler_info('GET', [<<"_node">>, Node, <<"_config">>, Section], _) -> + {'node.config.section.read', #{node => Node, 'config.section' => Section}}; + +handler_info('GET', [<<"_node">>, Node, <<"_config">>, Section, Key], _) -> + {'node.config.key.read', #{ + node => Node, + 'config.section' => Section, + 'config.key' => Key + }}; + +handler_info('PUT', [<<"_node">>, Node, <<"_config">>, Section, Key], _) -> + {'node.config.key.write', #{ + node => Node, + 'config.section' => Section, + 'config.key' => Key + }}; + +handler_info('DELETE', [<<"_node">>, Node, <<"_config">>, Section, Key], _) -> + {'node.config.key.delete', #{ + node => Node, + 'config.section' => Section, + 'config.key' => Key + }}; + +handler_info('GET', [<<"_node">>, Node, <<"_stats">> | Path], _) -> + {'node.stats.read', #{node => Node, 'stat.path' => Path}}; + +handler_info('GET', [<<"_node">>, Node, <<"_system">>], _) -> + {'node.system.read', #{node => Node}}; + +handler_info('POST', [<<"_node">>, Node, <<"_restart">>], _) -> + {'node.restart.execute', #{node => Node}}; + +handler_info('POST', [<<"_reload_query_servers">>], _) -> + {'query_servers.reload', #{}}; + +handler_info('POST', [<<"_replicate">>], _) -> + {'replication.create', #{}}; + +handler_info('GET', [<<"_scheduler">>, <<"jobs">>], _) -> + {'replication.jobs.read', #{}}; + +handler_info('GET', [<<"_scheduler">>, <<"jobs">>, JobId], _) -> + {'replication.job.read', #{'job.id' => JobId}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">>], _) -> + {'replication.docs.read', #{'db.name' => <<"_replicator">>}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">>, Db], _) -> + {'replication.docs.read', #{'db.name' => Db}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">>, Db, DocId], _) -> + {'replication.doc.read', #{'db.name' => Db, 'doc.id' => DocId}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">> | Path], _) -> + case lists:splitwith(fun(Elem) -> Elem /= <<"_replicator">> end, Path) of + {_, [<<"_replicator">>]} -> + {'replication.docs.read', #{ + 'db.name' => filename:join(Path) + }}; + {DbParts, [<<"_replicator">>, DocId]} -> + {'replication.doc.read', #{ + 'db.name' => filename:join(DbParts ++ [<<"_replicator">>]), + 'doc.id' => DocId + }}; + _ -> + no_match + end; + +handler_info('GET', [<<"_session">>], _) -> {'session.read', #{}}; -handler_info(#httpd{path_parts=[<<"_session">>], method='POST'}) -> - {'session.write', #{}}; -handler_info(#httpd{path_parts=[<<"_session">>], method='DELETE'}) -> + +handler_info('POST', [<<"_session">>], _) -> + {'session.create', #{}}; + +handler_info('DELETE', [<<"_session">>], _) -> {'session.delete', #{}}; -handler_info(#httpd{path_parts=[_Db], method=Method}) - when Method =:= 'HEAD' orelse Method =:= 'GET' -> - {'database-info.read', #{}}; -handler_info(#httpd{path_parts=[_Db], method='POST'}) -> - {'document.write', #{}}; -handler_info(#httpd{path_parts=[_Db], method='PUT'}) -> - {'database.create', #{}}; -handler_info(#httpd{path_parts=[_Db], method='DELETE'}) -> - {'database.delete', #{}}; - -handler_info(_) -> no_match. +handler_info('GET', [<<"_up">>], _) -> + {'health.read', #{}}; + +handler_info('GET', [<<"_utils">> | Path], _) -> + {'utils.read', #{'file.path' => filename:join(Path)}}; + +handler_info('GET', [<<"_uuids">>], _) -> + {'uuids.read', #{}}; + +handler_info('GET', [<<"favicon.ico">>], _) -> + {'favicon.ico.read', #{}}; + + +handler_info(Method, [<<"_", _/binary>> = Part| Rest], Req) -> + % Maybe bail here so that we don't trample over a + % different url_handler plugin. However, we continue + % on for known system databases. + DbName = case Part of + <<"_dbs">> -> '_dbs'; + <<"_global_changes">> -> '_global_changes'; + <<"_metadata">> -> '_metadata'; + <<"_nodes">> -> '_nodes'; + <<"_replicator">> -> '_replicator'; + <<"_users">> -> '_users'; + _ -> no_match + end, + if DbName == no_match -> no_match; true -> + handler_info(Method, [DbName | Rest], Req) + end; + +handler_info('GET', [Db], _) -> + {'db.info.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db], _) -> + {'db.create', #{'db.name' => Db}}; + +handler_info('POST', [Db], _) -> + {'db.doc.write', #{'db.name' => Db}}; + +handler_info('DELETE', [Db], _) -> + {'db.delete', #{'db.name' => Db}}; + +handler_info(M, [Db, <<"_all_docs">>], _) when M == 'GET'; M == 'POST' -> + {'db.all_docs.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_all_docs">>, <<"queries">>], _) -> + {'db.all_docs.read', #{'db.name' => Db, multi => true}}; + +handler_info('POST', [Db, <<"_bulk_docs">>], _) -> + {'db.docs.write', #{'db.name' => Db, bulk => true}}; + +handler_info('POST', [Db, <<"_bulk_get">>], _) -> + {'db.docs.read', #{'db.name' => Db, bulk => true}}; + +handler_info('GET', [Db, <<"_changes">>], _) -> + {'db.changes.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_changes">>], _) -> + {'db.changes.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_compact">>], _) -> + {'db.compact.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.read', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('POST', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.write', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('PUT', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.write', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('COPY', [Db, <<"_design">>, Name], Req) -> + {'db.design.doc.write', #{ + 'db.name' => Db, + 'design.id' => get_copy_destination(Req), + 'copy.source.doc.id' => <<"_design/", Name/binary>> + }}; + +handler_info('DELETE', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.delete', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('GET', [Db, <<"_design">>, Name, <<"_info">>], _) -> + {'db.design.info.read', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info(M, [Db, <<"_design">>, Name, <<"_list">>, List, View], _) + when M == 'GET'; M == 'POST', M == 'OPTIONS' -> + {'db.design.list.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.list.name' => List, + 'design.view.name' => View + }}; + +handler_info(M, [Db, <<"_design">>, Name, <<"_list">>, List, Design, View], _) + when M == 'GET'; M == 'POST', M == 'OPTIONS' -> + {'db.design.list.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.list.name' => List, + 'design.view.source.id' => Design, + 'design.view.name' => View + }}; + +handler_info(_, [Db, <<"_design">>, Name, <<"_rewrite">> | Path], _) -> + {'db.design.rewrite.execute', #{ + 'db.name' => Db, + 'design.id' => Name, + 'rewrite.path' => filename:join(Path) + }}; + +handler_info(_, [Db, <<"_design">>, Name, <<"_show">>, Show, DocId], _) -> + {'db.design.show.execute', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.show.name' => Show, + 'design.show.doc.id' => DocId + }}; + +handler_info(_, [Db, <<"_design">>, Name, <<"_update">>, Update | Rest], _) -> + BaseTags = #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.update.name' => Update + }, + Tags = case Rest of + [] -> + BaseTags; + _ -> + DocId = filename:join(Rest), + maps:put('design.update.doc.id', DocId, BaseTags) + end, + {'db.design.update.execute', Tags}; + +handler_info('POST', [Db, <<"_design">>, Name, <<"_view">>, View, <<"queries">>], _) -> + {'db.design.view.multi.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.view.name' => View + }}; + +handler_info(M, [Db, <<"_design">>, Name, <<"_view">>, View], _) + when M == 'GET'; M == 'POST' -> + {'db.design.view.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.view.name' => View + }}; + +handler_info(_, [_Db, <<"_design">>, _Name, <<"_", _/binary>> | _], _) -> + % Bail here so that we don't treat a plugin + % design handler in place of a design attachment + no_match; + +handler_info('GET', [Db, <<"_design">>, Name | Path], _) -> + {'db.design.doc.attachment.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('PUT', [Db, <<"_design">>, Name | Path], _) -> + {'db.design.doc.attachment.write', #{ + 'db.name' => Db, + 'design.id' => Name, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('DELETE', [Db, <<"_design">>, Name | Path], _) -> + {'db.design.doc.attachment.delete', #{ + 'db.name' => Db, + 'design.id' => Name, + 'attachment.name' => filename:join(Path) + }}; + +handler_info(_, [Db, <<"_design/", Name/binary>> | Rest], Req) -> + % Recurse if someone sent us `_design%2Fname` + chttpd_handlers:handler_info(Req#httpd{ + path_parts = [Db, <<"_design">>, Name | Rest] + }); + +handler_info(M, [Db, <<"_design_docs">>], _) when M == 'GET'; M == 'POST' -> + {'db.design_docs.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_design_docs">>, <<"queries">>], _) -> + {'db.design_docs.read', #{'db.name' => Db, multi => true}}; + +handler_info('POST', [Db, <<"_ensure_full_commit">>], _) -> + {'db.ensure_full_commit.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.read', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info('POST', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.write', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info('PUT', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.write', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info('COPY', [Db, <<"_local">>, Name], Req) -> + {'db.local.doc.write', #{ + 'db.name' => Db, + 'local.id' => get_copy_destination(Req), + 'copy.source.doc.id' => <<"_local/", Name/binary>> + }}; + +handler_info('DELETE', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.delete', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info(_, [Db, <<"_local">>, Name | _Path], _) -> + {'db.local.doc.invalid_attachment_req', #{ + 'db.name' => Db, + 'local.id' => Name + }}; + +handler_info(M, [Db, <<"_local_docs">>], _) when M == 'GET'; M == 'POST' -> + {'db.local_docs.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_local_docs">>, <<"queries">>], _) -> + {'db.local_docs.read', #{'db.name' => Db, multi => true}}; + +handler_info('POST', [Db, <<"_missing_revs">>], _) -> + {'db.docs.missing_revs.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_partition">>, Partition], _) -> + {'db.partition.info.read', #{'db.name' => Db, partition => Partition}}; + +handler_info(_, [Db, <<"_partition">>, Partition | Rest], Req) -> + NewPath = case Rest of + [<<"_all_docs">> | _] -> + [Db | Rest]; + [<<"_index">> | _] -> + [Db | Rest]; + [<<"_find">> | _] -> + [Db | Rest]; + [<<"_explain">> | _] -> + [Db | Rest]; + [<<"_design">>, _Name, <<"_", _/binary>> | _] -> + [Db | Rest]; + _ -> + no_match + end, + if NewPath == no_match -> no_match; true -> + {OpName, Tags} = chttpd_handlers:handler_info(Req#httpd{ + path_parts = NewPath + }), + NewOpName = case atom_to_list(OpName) of + "db." ++ Name -> list_to_atom("db.partition." ++ Name); + Else -> list_to_atom(Else ++ ".partition") + end, + {NewOpName, maps:put(partition, Partition, Tags)} + end; + +handler_info('POST', [Db, <<"_purge">>], _) -> + {'db.docs.purge', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_purged_infos_limit">>], _) -> + {'db.purged_infos_limit.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db, <<"_purged_infos_limit">>], _) -> + {'db.purged_infos_limit.write', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_revs_diff">>], _) -> + {'db.docs.revs_diff.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_revs_limit">>], _) -> + {'db.revs_limit.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db, <<"_revs_limit">>], _) -> + {'db.revs_limit.write', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_security">>], _) -> + {'db.security.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db, <<"_security">>], _) -> + {'db.security.write', #{'db.name' => Db}}; + +handler_info(_, [Db, <<"_view_cleanup">>], _) -> + {'views.cleanup.execute', #{'db.name' => Db}}; + +handler_info(_, [_Db, <<"_", _/binary>> | _], _) -> + % Bail here for other possible db_handleres + no_match; + +handler_info('GET', [Db, DocId], _) -> + {'db.doc.read', #{'db.name' => Db, 'doc.id' => DocId}}; + +handler_info('POST', [Db, DocId], _) -> + {'db.doc.write', #{'db.name' => Db, 'design.id' => DocId}}; + +handler_info('PUT', [Db, DocId], _) -> + {'db.doc.write', #{'db.name' => Db, 'design.id' => DocId}}; + +handler_info('COPY', [Db, DocId], Req) -> + {'db.doc.write', #{ + 'db.name' => Db, + 'doc.id' => get_copy_destination(Req), + 'copy.source.doc.id' => DocId + }}; + +handler_info('DELETE', [Db, DocId], _) -> + {'db.doc.delete', #{'db.name' => Db, 'doc.id' => DocId}}; + +handler_info('GET', [Db, DocId | Path], _) -> + {'db.doc.attachment.read', #{ + 'db.name' => Db, + 'doc.id' => DocId, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('PUT', [Db, DocId | Path], _) -> + {'db.doc.attachment.write', #{ + 'db.name' => Db, + 'doc.id' => DocId, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('DELETE', [Db, DocId | Path], _) -> + {'db.doc.attachment.delete', #{ + 'db.name' => Db, + 'doc.id' => DocId, + 'attachment.name' => filename:join(Path) + }}; + +handler_info(_, _, _) -> + no_match. + + +get_copy_destination(Req) -> + try + {DocIdStr, _} = couch_httpd_db:parse_copy_destination_header(Req), + list_to_binary(mochiweb_util:unquote(DocIdStr)) + catch _:_ -> + unknown + end. + diff --git a/src/global_changes/src/global_changes_httpd_handlers.erl b/src/global_changes/src/global_changes_httpd_handlers.erl index b21a64b8f..94a50abc8 100644 --- a/src/global_changes/src/global_changes_httpd_handlers.erl +++ b/src/global_changes/src/global_changes_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(global_changes_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(<<"_db_updates">>) -> fun global_changes_httpd:handle_global_changes_req/1; url_handler(_) -> no_match. @@ -20,3 +20,9 @@ url_handler(_) -> no_match. db_handler(_) -> no_match. design_handler(_) -> no_match. + +handler_info('GET', [<<"_db_updates">>], _) -> + {'db_updates.read', #{}}; + +handler_info(_, _, _) -> + no_match. \ No newline at end of file diff --git a/src/mango/src/mango_httpd_handlers.erl b/src/mango/src/mango_httpd_handlers.erl index 80e5e277e..c1ddd6c4e 100644 --- a/src/mango/src/mango_httpd_handlers.erl +++ b/src/mango/src/mango_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(mango_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(_) -> no_match. @@ -22,3 +22,32 @@ db_handler(<<"_find">>) -> fun mango_httpd:handle_req/2; db_handler(_) -> no_match. design_handler(_) -> no_match. + +handler_info('GET', [Db, <<"_index">>], _) -> + {'db.mango.index.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_index">>], _) -> + {'db.mango.index.create', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_index">>, <<"_bulk_delete">>], _) -> + {'db.mango.index.delete', #{'db.name' => Db, multi => true}}; + +handler_info('DELETE', [Db, <<"_index">>, <<"_design">>, Name, Type, Idx], _) -> + {'db.mango.index.delete', #{ + 'db.name' => Db, + 'design.id' => Name, + 'index.type' => Type, + 'index.name' => Idx + }}; + +handler_info(M, [Db, <<"_index">>, <<"_design/", N/binary>>, T, I], R) -> + handler_info(M, [Db, <<"_index">>, <<"_design">>, N, T, I], R); + +handler_info('POST', [Db, <<"_explain">>], _) -> + {'db.mango.explain.execute', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_find">>], _) -> + {'db.mango.find.execute', #{'db.name' => Db}}; + +handler_info(_, _, _) -> + no_match. \ No newline at end of file diff --git a/src/mem3/src/mem3_httpd_handlers.erl b/src/mem3/src/mem3_httpd_handlers.erl index 7dd6ab052..eeec1edf3 100644 --- a/src/mem3/src/mem3_httpd_handlers.erl +++ b/src/mem3/src/mem3_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(mem3_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(<<"_membership">>) -> fun mem3_httpd:handle_membership_req/1; url_handler(<<"_reshard">>) -> fun mem3_reshard_httpd:handle_reshard_req/1; @@ -23,3 +23,39 @@ db_handler(<<"_sync_shards">>) -> fun mem3_httpd:handle_sync_req/2; db_handler(_) -> no_match. design_handler(_) -> no_match. + +handler_info('GET', [<<"_membership">>], _) -> + {'cluster.membership.read', #{}}; + +handler_info('GET', [<<"_reshard">>], _) -> + {'reshard.summary.read', #{}}; + +handler_info('GET', [<<"_reshard">>, <<"state">>], _) -> + {'reshard.state.read', #{}}; + +handler_info('PUT', [<<"_reshard">>, <<"state">>], _) -> + {'reshard.state.write', #{}}; + +handler_info('GET', [<<"_reshard">>, <<"jobs">>], _) -> + {'reshard.jobs.read', #{}}; + +handler_info('POST', [<<"_reshard">>, <<"jobs">>], _) -> + {'reshard.jobs.create', #{}}; + +handler_info('GET', [<<"_reshard">>, <<"jobs">>, JobId], _) -> + {'reshard.job.read', #{'job.id' => JobId}}; + +handler_info('DELETE', [<<"_reshard">>, <<"jobs">>, JobId], _) -> + {'reshard.job.delete', #{'job.id' => JobId}}; + +handler_info('GET', [DbName, <<"_shards">>], _) -> + {'db.shards.read', #{'db.name' => DbName}}; + +handler_info('GET', [DbName, <<"_shards">>, DocId], _) -> + {'db.shards.read', #{'db.name' => DbName, 'doc.id' => DocId}}; + +handler_info('POST', [DbName, <<"_sync_shards">>], _) -> + {'db.shards.sync', #{'db.name' => DbName}}; + +handler_info(_, _, _) -> + no_match. diff --git a/src/setup/src/setup_httpd_handlers.erl b/src/setup/src/setup_httpd_handlers.erl index 994c217e8..e26fbc3c4 100644 --- a/src/setup/src/setup_httpd_handlers.erl +++ b/src/setup/src/setup_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(setup_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(<<"_cluster_setup">>) -> fun setup_httpd:handle_setup_req/1; url_handler(_) -> no_match. @@ -20,3 +20,13 @@ url_handler(_) -> no_match. db_handler(_) -> no_match. design_handler(_) -> no_match. + + +handler_info('GET', [<<"_cluster_setup">>], _) -> + {'cluster_setup.read', #{}}; + +handler_info('POST', [<<"_cluster_setup">>], _) -> + {'cluster_setup.write', #{}}; + +handler_info(_, _, _) -> + no_match. \ No newline at end of file -- cgit v1.2.1 From 5aeb21ce1d86c8e0e5c0c4b7bd8c188816a5de75 Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Fri, 22 Nov 2019 15:15:45 -0400 Subject: Change end-point /_up to check fdb connectivity --- src/chttpd/src/chttpd_misc.erl | 11 +++++------ test/elixir/test/basics_test.exs | 6 ++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index 6e0d8cea2..f245875f2 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -291,12 +291,11 @@ handle_up_req(#httpd{method='GET'} = Req) -> "nolb" -> send_json(Req, 404, {[{status, nolb}]}); _ -> - {ok, {Status}} = mem3_seeds:get_status(), - case couch_util:get_value(status, Status) of - ok -> - send_json(Req, 200, {Status}); - seeding -> - send_json(Req, 404, {Status}) + try + fabric2_db:list_dbs([{limit, 0}]), + send_json(Req, 200, {[{status, ok}]}) + catch error:{timeout, _} -> + send_json(Req, 404, {[{status, backend_unavailable}]}) end end; diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index 21e05dfcf..f4f95689a 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -18,6 +18,12 @@ defmodule BasicsTest do assert Couch.get("/").body["couchdb"] == "Welcome", "Should say welcome" end + test "Ready endpoint" do + resp = Couch.get("/_up") + assert resp.status_code == 200 + assert resp.body["status"] == "ok" + end + @tag :with_db test "PUT on existing DB should return 412 instead of 500", context do db_name = context[:db_name] -- cgit v1.2.1 From f943ac10e223d765aada4b17ae90403cbc5b0a4f Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 26 Nov 2019 16:27:50 +0200 Subject: Optimize view read latency when the view ready If the view is already built, read the view status and view in the same transaction. This will improve view read latency. --- src/couch_views/src/couch_views.erl | 46 +++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 7c7588c67..322415b91 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -44,36 +44,42 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> false -> ok end, - ok = maybe_update_view(Db, Mrst, Args3), - try - couch_views_reader:read(Db, Mrst, ViewName, Callback, Acc0, Args3) - after - UpdateAfter = Args3#mrargs.update == lazy, - if UpdateAfter == false -> ok; true -> - couch_views_jobs:build_view_async(Db, Mrst) - end + fabric2_fdb:transactional(Db, fun(TxDb) -> + ok = maybe_update_view(TxDb, Mrst, Args3), + read_view(TxDb, Mrst, ViewName, Callback, Acc0, Args3) + end) + catch throw:{build_view, WaitSeq} -> + couch_views_jobs:build_view(Db, Mrst, WaitSeq), + read_view(Db, Mrst, ViewName, Callback, Acc0, Args3) end. +read_view(Db, Mrst, ViewName, Callback, Acc0, Args) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + couch_views_reader:read(TxDb, Mrst, ViewName, Callback, Acc0, Args) + after + UpdateAfter = Args#mrargs.update == lazy, + if UpdateAfter == false -> ok; true -> + couch_views_jobs:build_view_async(TxDb, Mrst) + end + end + end). + + maybe_update_view(_Db, _Mrst, #mrargs{update = false}) -> ok; maybe_update_view(_Db, _Mrst, #mrargs{update = lazy}) -> ok; -maybe_update_view(Db, Mrst, _Args) -> - WaitSeq = fabric2_fdb:transactional(Db, fun(TxDb) -> - DbSeq = fabric2_db:get_update_seq(TxDb), - ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), - case DbSeq == ViewSeq of - true -> ready; - false -> DbSeq - end - end), - - if WaitSeq == ready -> ok; true -> - couch_views_jobs:build_view(Db, Mrst, WaitSeq) +maybe_update_view(TxDb, Mrst, _Args) -> + DbSeq = fabric2_db:get_update_seq(TxDb), + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + case DbSeq == ViewSeq of + true -> ok; + false -> throw({build_view, DbSeq}) end. -- cgit v1.2.1 From b971493c4d8299e5288e690ca346cbc6d963cb0a Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 25 Nov 2019 15:20:29 +0200 Subject: Retry for failed indexes builds Retry building a failing index for a set number of retries. If it never completes, then return the error to the user. --- src/couch_views/src/couch_views_indexer.erl | 74 +++++++++++++++++++++++++++-- src/couch_views/src/couch_views_jobs.erl | 3 +- test/elixir/test/map_test.exs | 32 +++++++++++++ 3 files changed, 103 insertions(+), 6 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 7c05c1d60..75e4b368f 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -35,11 +35,13 @@ spawn_link() -> init() -> - {ok, Job, Data} = couch_jobs:accept(?INDEX_JOB_TYPE, #{}), + {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, #{}), + Data = upgrade_data(Data0), #{ <<"db_name">> := DbName, <<"ddoc_id">> := DDocId, - <<"sig">> := JobSig + <<"sig">> := JobSig, + <<"retries">> := Retries } = Data, {ok, Db} = try @@ -87,7 +89,63 @@ init() -> design_opts => Mrst#mrst.design_opts }, - update(Db, Mrst, State). + try + update(Db, Mrst, State) + catch + exit:normal -> + ok; + Error:Reason -> + NewRetry = Retries + 1, + RetryLimit = retry_limit(), + + case should_retry(NewRetry, RetryLimit, Reason) of + true -> + DataErr = Data#{<<"retries">> := NewRetry}, + % Set the last_seq to 0 so that it doesn't trigger a + % successful view build for anyone listening to the + % couch_views_jobs:wait_for_job + % Note this won't cause the view to rebuild from 0 again + StateErr = State#{job_data := DataErr, last_seq := <<"0">>}, + report_progress(StateErr, update); + false -> + NewData = add_error(Error, Reason, Data), + couch_jobs:finish(undefined, Job, NewData), + exit(normal) + end + end. + + +upgrade_data(Data) -> + case maps:is_key(<<"retries">>, Data) of + true -> Data; + false -> Data#{<<"retries">> =>0} + end. + + +% Transaction limit exceeded don't retry +should_retry(_, _, {erlfdb_error, 2101}) -> + false; + +should_retry(Retries, RetryLimit, _) when Retries < RetryLimit -> + true; + +should_retry(_, _, _) -> + false. + + +add_error(error, {erlfdb_error, Code}, Data) -> + CodeBin = couch_util:to_binary(Code), + CodeString = erlfdb:get_error_string(Code), + Data#{ + error => foundationdb_error, + reason => list_to_binary([CodeBin, <<"-">>, CodeString]) + }; + +add_error(Error, Reason, Data) -> + Data#{ + error => couch_util:to_binary(Error), + reason => couch_util:to_binary(Reason) + }. update(#{} = Db, Mrst0, State0) -> @@ -322,7 +380,8 @@ report_progress(State, UpdateType) -> #{ <<"db_name">> := DbName, <<"ddoc_id">> := DDocId, - <<"sig">> := Sig + <<"sig">> := Sig, + <<"retries">> := Retries } = JobData, % Reconstruct from scratch to remove any @@ -331,7 +390,8 @@ report_progress(State, UpdateType) -> <<"db_name">> => DbName, <<"ddoc_id">> => DDocId, <<"sig">> => Sig, - <<"view_seq">> => LastSeq + <<"view_seq">> => LastSeq, + <<"retries">> => Retries }, case UpdateType of @@ -356,3 +416,7 @@ report_progress(State, UpdateType) -> num_changes() -> config:get_integer("couch_views", "change_limit", 100). + + +retry_limit() -> + config:get_integer("couch_views", "retry_limit", 3). diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 87e4fea6a..7e0ac9765 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -96,7 +96,8 @@ job_data(Db, Mrst) -> #{ db_name => fabric2_db:name(Db), ddoc_id => DDocId, - sig => fabric2_util:to_hex(Sig) + sig => fabric2_util:to_hex(Sig), + retries => 0 }. diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs index fa1758767..bccd4173b 100644 --- a/test/elixir/test/map_test.exs +++ b/test/elixir/test/map_test.exs @@ -503,6 +503,38 @@ defmodule ViewMapTest do assert keys == ["bar"] end + test "send error for failed indexing", context do + db_name = context[:db_name] + + docs = [ + %{_id: "doc1", foo: "foo", bar: "bar"}, + %{ + _id: "_design/view1", + views: %{ + view: %{ + map: """ + function (doc) { + for (var i=0; i<10000; i++) { + emit({doc: doc._id + 1}, doc._id); + } + } + """ + } + } + } + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}) + assert resp.status_code == 201 + + url = "/#{db_name}/_design/view1/_view/view" + + resp = Couch.get(url, timeout: 500_000) + assert resp.status_code == 500 + %{:body => %{"error" => error}} = resp + assert error == "foundationdb_error" + end + def update_doc_value(db_name, id, value) do resp = Couch.get("/#{db_name}/#{id}") doc = convert(resp.body) -- cgit v1.2.1 From a59c953686cb3fc583c58ef27e8be5b0f1c9a394 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 4 Dec 2019 13:13:26 -0600 Subject: Normalize fabric2 test suite This change ensures that all test names are visible and follows a single unified pattern for each test module. --- src/fabric/test/fabric2_changes_fold_tests.erl | 17 +++-- src/fabric/test/fabric2_db_crud_tests.erl | 16 ++-- src/fabric/test/fabric2_db_misc_tests.erl | 24 +++--- src/fabric/test/fabric2_db_security_tests.erl | 33 +++++---- src/fabric/test/fabric2_dir_prefix_tests.erl | 24 +++--- src/fabric/test/fabric2_doc_count_tests.erl | 13 ++-- src/fabric/test/fabric2_doc_crud_tests.erl | 85 +++++++++++----------- src/fabric/test/fabric2_doc_fold_tests.erl | 29 +++----- src/fabric/test/fabric2_fdb_tx_retry_tests.erl | 48 ++++++------ src/fabric/test/fabric2_local_doc_fold_tests.erl | 29 +++----- src/fabric/test/fabric2_node_types_tests.erl | 21 ++---- src/fabric/test/fabric2_rev_stemming.erl | 23 +++--- src/fabric/test/fabric2_test.hrl | 25 +++++++ src/fabric/test/fabric2_trace_db_create_tests.erl | 9 ++- src/fabric/test/fabric2_trace_db_delete_tests.erl | 7 +- src/fabric/test/fabric2_trace_db_open_tests.erl | 7 +- src/fabric/test/fabric2_trace_doc_create_tests.erl | 13 ++-- 17 files changed, 214 insertions(+), 209 deletions(-) create mode 100644 src/fabric/test/fabric2_test.hrl diff --git a/src/fabric/test/fabric2_changes_fold_tests.erl b/src/fabric/test/fabric2_changes_fold_tests.erl index 892b448b4..8a29bcb00 100644 --- a/src/fabric/test/fabric2_changes_fold_tests.erl +++ b/src/fabric/test/fabric2_changes_fold_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). -define(DOC_COUNT, 25). @@ -28,14 +29,14 @@ changes_fold_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun fold_changes_basic/1, - fun fold_changes_since_now/1, - fun fold_changes_since_seq/1, - fun fold_changes_basic_rev/1, - fun fold_changes_since_now_rev/1, - fun fold_changes_since_seq_rev/1 - ]} + with([ + ?TDEF(fold_changes_basic), + ?TDEF(fold_changes_since_now), + ?TDEF(fold_changes_since_seq), + ?TDEF(fold_changes_basic_rev), + ?TDEF(fold_changes_since_now_rev), + ?TDEF(fold_changes_since_seq_rev) + ]) } }. diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 24deeb2dc..cc44f7d6b 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -15,9 +15,7 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). - - --define(TDEF(A), {atom_to_list(A), fun A/0}). +-include("fabric2_test.hrl"). crud_test_() -> @@ -27,24 +25,24 @@ crud_test_() -> setup, fun() -> test_util:start_couch([fabric]) end, fun test_util:stop_couch/1, - [ + with([ ?TDEF(create_db), ?TDEF(open_db), ?TDEF(delete_db), ?TDEF(list_dbs) - ] + ]) } }. -create_db() -> +create_db(_) -> DbName = ?tempdb(), ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), ?assertEqual(true, ets:member(fabric2_server, DbName)), ?assertEqual({error, file_exists}, fabric2_db:create(DbName, [])). -open_db() -> +open_db(_) -> DbName = ?tempdb(), ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])), @@ -59,7 +57,7 @@ open_db() -> ?assertMatch({ok, _}, fabric2_db:open(DbName, [])). -delete_db() -> +delete_db(_) -> DbName = ?tempdb(), ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), @@ -72,7 +70,7 @@ delete_db() -> ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])). -list_dbs() -> +list_dbs(_) -> DbName = ?tempdb(), AllDbs1 = fabric2_db:list_dbs(), diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 913b6aa98..12fc3e50b 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -17,9 +17,7 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). -include("fabric2.hrl"). - - --define(TDEF(A), {atom_to_list(A), fun A/1}). +-include("fabric2_test.hrl"). misc_test_() -> @@ -29,16 +27,16 @@ misc_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun empty_db_info/1, - fun accessors/1, - fun set_revs_limit/1, - fun set_security/1, - fun is_system_db/1, - fun ensure_full_commit/1, - fun metadata_bump/1, - fun db_version_bump/1 - ]} + with([ + ?TDEF(empty_db_info), + ?TDEF(accessors), + ?TDEF(set_revs_limit), + ?TDEF(set_security), + ?TDEF(is_system_db), + ?TDEF(ensure_full_commit), + ?TDEF(metadata_bump), + ?TDEF(db_version_bump) + ]) } }. diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl index 501545484..063979a3f 100644 --- a/src/fabric/test/fabric2_db_security_tests.erl +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). security_test_() -> @@ -25,22 +26,22 @@ security_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun check_is_admin/1, - fun check_is_not_admin/1, - fun check_is_admin_role/1, - fun check_is_not_admin_role/1, - fun check_is_member_name/1, - fun check_is_not_member_name/1, - fun check_is_member_role/1, - fun check_is_not_member_role/1, - fun check_admin_is_member/1, - fun check_is_member_of_public_db/1, - fun check_set_user_ctx/1, - fun check_forbidden/1, - fun check_fail_no_opts/1, - fun check_fail_name_null/1 - ]} + with([ + ?TDEF(check_is_admin), + ?TDEF(check_is_not_admin), + ?TDEF(check_is_admin_role), + ?TDEF(check_is_not_admin_role), + ?TDEF(check_is_member_name), + ?TDEF(check_is_not_member_name), + ?TDEF(check_is_member_role), + ?TDEF(check_is_not_member_role), + ?TDEF(check_admin_is_member), + ?TDEF(check_is_member_of_public_db), + ?TDEF(check_set_user_ctx), + ?TDEF(check_forbidden), + ?TDEF(check_fail_no_opts), + ?TDEF(check_fail_name_null) + ]) } }. diff --git a/src/fabric/test/fabric2_dir_prefix_tests.erl b/src/fabric/test/fabric2_dir_prefix_tests.erl index e4e78a338..75d68a80f 100644 --- a/src/fabric/test/fabric2_dir_prefix_tests.erl +++ b/src/fabric/test/fabric2_dir_prefix_tests.erl @@ -15,34 +15,34 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). - - --define(TDEF(A), {atom_to_list(A), fun A/0}). +-include("fabric2_test.hrl"). dir_prefix_test_() -> { "Test couchdb fdb directory prefix", - foreach, + setup, fun() -> % erlfdb, rexi and mem3 are all dependent apps for fabric. We make % sure to start them so when fabric is started during the test it % already has its dependencies - test_util:start_couch([erlfdb, rexi, mem3, ctrace]) + test_util:start_couch([erlfdb, rexi, mem3, ctrace, fabric]) end, fun(Ctx) -> config:delete("fabric", "fdb_directory"), - ok = application:stop(fabric), test_util:stop_couch(Ctx) end, - [ + with([ ?TDEF(default_prefix), ?TDEF(custom_prefix) - ] + ]) }. -default_prefix() -> +default_prefix(_) -> + erase(fdb_directory), + ok = config:delete("fabric", "fdb_directory", false), + ok = application:stop(fabric), ok = application:start(fabric), ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), @@ -55,8 +55,10 @@ default_prefix() -> ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). -custom_prefix() -> - ok = config:set("fabric", "fdb_directory", "couchdb_foo"), +custom_prefix(_) -> + erase(fdb_directory), + ok = config:set("fabric", "fdb_directory", "couchdb_foo", false), + ok = application:stop(fabric), ok = application:start(fabric), ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), diff --git a/src/fabric/test/fabric2_doc_count_tests.erl b/src/fabric/test/fabric2_doc_count_tests.erl index 743ae7665..7aaf288f4 100644 --- a/src/fabric/test/fabric2_doc_count_tests.erl +++ b/src/fabric/test/fabric2_doc_count_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). -define(DOC_COUNT, 10). @@ -28,12 +29,12 @@ doc_count_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun normal_docs/1, - fun replicated_docs/1, - fun design_docs/1, - fun local_docs/1 - ]} + with([ + ?TDEF(normal_docs), + ?TDEF(replicated_docs), + ?TDEF(design_docs), + ?TDEF(local_docs) + ]) } }. diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index a9085be77..184eb4a66 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -17,6 +17,7 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). -include("fabric2.hrl"). +-include("fabric2_test.hrl"). doc_crud_test_() -> @@ -26,48 +27,48 @@ doc_crud_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun open_missing_doc/1, - fun create_new_doc/1, - fun create_ddoc_basic/1, - fun create_ddoc_requires_admin/1, - fun create_ddoc_requires_validation/1, - fun create_ddoc_requires_compilation/1, - fun can_create_a_partitioned_ddoc/1, - fun update_doc_basic/1, - fun update_ddoc_basic/1, - fun update_doc_replicated/1, - fun update_doc_replicated_add_conflict/1, - fun update_doc_replicated_changes_winner/1, - fun update_doc_replicated_extension/1, - fun update_doc_replicate_existing_rev/1, - fun update_winning_conflict_branch/1, - fun update_non_winning_conflict_branch/1, - fun delete_doc_basic/1, - fun delete_changes_winner/1, - fun recreate_doc_basic/1, - fun conflict_on_create_new_with_rev/1, - fun conflict_on_update_with_no_rev/1, - fun conflict_on_create_as_deleted/1, - fun conflict_on_recreate_as_deleted/1, - fun conflict_on_extend_deleted/1, - fun open_doc_revs_basic/1, - fun open_doc_revs_all/1, - fun open_doc_revs_latest/1, - fun get_missing_revs_basic/1, - fun get_missing_revs_on_missing_doc/1, - fun open_missing_local_doc/1, - fun create_local_doc_basic/1, - fun update_local_doc_basic/1, - fun delete_local_doc_basic/1, - fun recreate_local_doc/1, - fun create_local_doc_bad_rev/1, - fun create_local_doc_random_rev/1, - fun create_a_large_local_doc/1, - fun create_2_large_local_docs/1, - fun local_doc_with_previous_encoding/1, - fun before_doc_update_skips_local_docs/1 - ]} + with([ + ?TDEF(open_missing_doc), + ?TDEF(create_new_doc), + ?TDEF(create_ddoc_basic), + ?TDEF(create_ddoc_requires_admin), + ?TDEF(create_ddoc_requires_validation), + ?TDEF(create_ddoc_requires_compilation), + ?TDEF(can_create_a_partitioned_ddoc), + ?TDEF(update_doc_basic), + ?TDEF(update_ddoc_basic), + ?TDEF(update_doc_replicated), + ?TDEF(update_doc_replicated_add_conflict), + ?TDEF(update_doc_replicated_changes_winner), + ?TDEF(update_doc_replicated_extension), + ?TDEF(update_doc_replicate_existing_rev), + ?TDEF(update_winning_conflict_branch), + ?TDEF(update_non_winning_conflict_branch), + ?TDEF(delete_doc_basic), + ?TDEF(delete_changes_winner), + ?TDEF(recreate_doc_basic), + ?TDEF(conflict_on_create_new_with_rev), + ?TDEF(conflict_on_update_with_no_rev), + ?TDEF(conflict_on_create_as_deleted), + ?TDEF(conflict_on_recreate_as_deleted), + ?TDEF(conflict_on_extend_deleted), + ?TDEF(open_doc_revs_basic), + ?TDEF(open_doc_revs_all), + ?TDEF(open_doc_revs_latest), + ?TDEF(get_missing_revs_basic), + ?TDEF(get_missing_revs_on_missing_doc), + ?TDEF(open_missing_local_doc), + ?TDEF(create_local_doc_basic), + ?TDEF(update_local_doc_basic), + ?TDEF(delete_local_doc_basic), + ?TDEF(recreate_local_doc), + ?TDEF(create_local_doc_bad_rev), + ?TDEF(create_local_doc_random_rev), + ?TDEF(create_a_large_local_doc), + ?TDEF(create_2_large_local_docs), + ?TDEF(local_doc_with_previous_encoding), + ?TDEF(before_doc_update_skips_local_docs) + ]) } }. diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl index 3cb68bdac..6262a10fe 100644 --- a/src/fabric/test/fabric2_doc_fold_tests.erl +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -16,20 +16,11 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). -define(DOC_COUNT, 50). -%% eunit implementation of {with, Tests} doesn't detect test name correctly -with(Tests) -> - fun(ArgsTuple) -> - [{Name, ?_test(Fun(ArgsTuple))} || {Name, Fun} <- Tests] - ++ - [{Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} || {Name, Timeout, Fun} <- Tests] - end. - --define(NAMED(A), {atom_to_list(A), fun A/1}). --define(WITH_TIMEOUT(Timeout, A), {atom_to_list(A), Timeout, fun A/1}). doc_fold_test_() -> { @@ -39,15 +30,15 @@ doc_fold_test_() -> fun setup/0, fun cleanup/1, with([ - ?NAMED(fold_docs_basic), - ?NAMED(fold_docs_rev), - ?NAMED(fold_docs_with_start_key), - ?NAMED(fold_docs_with_end_key), - ?NAMED(fold_docs_with_both_keys_the_same), - ?WITH_TIMEOUT(10000, fold_docs_with_different_keys), - ?NAMED(fold_docs_with_limit), - ?NAMED(fold_docs_with_skip), - ?NAMED(fold_docs_with_skip_and_limit) + ?TDEF(fold_docs_basic), + ?TDEF(fold_docs_rev), + ?TDEF(fold_docs_with_start_key), + ?TDEF(fold_docs_with_end_key), + ?TDEF(fold_docs_with_both_keys_the_same), + ?TDEF(fold_docs_with_different_keys, 10000), + ?TDEF(fold_docs_with_limit), + ?TDEF(fold_docs_with_skip), + ?TDEF(fold_docs_with_skip_and_limit) ]) } }. diff --git a/src/fabric/test/fabric2_fdb_tx_retry_tests.erl b/src/fabric/test/fabric2_fdb_tx_retry_tests.erl index c924ce52a..7fb0f21d0 100644 --- a/src/fabric/test/fabric2_fdb_tx_retry_tests.erl +++ b/src/fabric/test/fabric2_fdb_tx_retry_tests.erl @@ -14,12 +14,26 @@ -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). --define(TDEF(A), {atom_to_list(A), fun A/0}). +retry_test_() -> + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(read_only_no_retry), + ?TDEF(read_only_commit_unknown_result), + ?TDEF(run_on_first_try), + ?TDEF(retry_when_commit_conflict), + ?TDEF(retry_when_txid_not_found), + ?TDEF(no_retry_when_txid_found) + ]) + }. -meck_setup() -> +setup() -> meck:new(erlfdb), meck:new(fabric2_txids), EnvSt = case application:get_env(fabric, db) of @@ -30,7 +44,7 @@ meck_setup() -> EnvSt. -meck_cleanup(EnvSt) -> +cleanup(EnvSt) -> case EnvSt of {ok, Db} -> application:set_env(fabric, db, Db); undefined -> application:unset_env(fabric, db) @@ -38,23 +52,7 @@ meck_cleanup(EnvSt) -> meck:unload(). -retry_test_() -> - { - foreach, - fun meck_setup/0, - fun meck_cleanup/1, - [ - ?TDEF(read_only_no_retry), - ?TDEF(read_only_commit_unknown_result), - ?TDEF(run_on_first_try), - ?TDEF(retry_when_commit_conflict), - ?TDEF(retry_when_txid_not_found), - ?TDEF(no_retry_when_txid_found) - ] - }. - - -read_only_no_retry() -> +read_only_no_retry(_) -> meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> UserFun(not_a_real_transaction) end), @@ -72,7 +70,7 @@ read_only_no_retry() -> ?assert(meck:validate([erlfdb, fabric2_txids])). -read_only_commit_unknown_result() -> +read_only_commit_unknown_result(_) -> % Not 100% certain that this would ever actually % happen in the wild but might as well test that % we don't blow up if it does. @@ -93,7 +91,7 @@ read_only_commit_unknown_result() -> ?assert(meck:validate([erlfdb, fabric2_txids])). -run_on_first_try() -> +run_on_first_try(_) -> meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> UserFun(not_a_real_transaction) end), @@ -113,7 +111,7 @@ run_on_first_try() -> ?assert(meck:validate([erlfdb, fabric2_txids])). -retry_when_commit_conflict() -> +retry_when_commit_conflict(_) -> meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> UserFun(not_a_real_transaction) end), @@ -133,7 +131,7 @@ retry_when_commit_conflict() -> ?assert(meck:validate([erlfdb, fabric2_txids])). -retry_when_txid_not_found() -> +retry_when_txid_not_found(_) -> meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> UserFun(not_a_real_transaction) end), @@ -157,7 +155,7 @@ retry_when_txid_not_found() -> ?assert(meck:validate([erlfdb, fabric2_txids])). -no_retry_when_txid_found() -> +no_retry_when_txid_found(_) -> meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> UserFun(not_a_real_transaction) end), diff --git a/src/fabric/test/fabric2_local_doc_fold_tests.erl b/src/fabric/test/fabric2_local_doc_fold_tests.erl index 82203b433..e3ff0eb21 100644 --- a/src/fabric/test/fabric2_local_doc_fold_tests.erl +++ b/src/fabric/test/fabric2_local_doc_fold_tests.erl @@ -16,20 +16,11 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). -define(DOC_COUNT, 50). -%% eunit implementation of {with, Tests} doesn't detect test name correctly -with(Tests) -> - fun(ArgsTuple) -> - [{Name, ?_test(Fun(ArgsTuple))} || {Name, Fun} <- Tests] - ++ - [{Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} || {Name, Timeout, Fun} <- Tests] - end. - --define(NAMED(A), {atom_to_list(A), fun A/1}). --define(WITH_TIMEOUT(Timeout, A), {atom_to_list(A), Timeout, fun A/1}). doc_fold_test_() -> { @@ -39,15 +30,15 @@ doc_fold_test_() -> fun setup/0, fun cleanup/1, with([ - ?NAMED(fold_local_docs_basic), - ?NAMED(fold_local_docs_rev), - ?NAMED(fold_local_docs_with_start_key), - ?NAMED(fold_local_docs_with_end_key), - ?NAMED(fold_local_docs_with_both_keys_the_same), - ?WITH_TIMEOUT(15000, fold_local_docs_with_different_keys), - ?NAMED(fold_local_docs_with_limit), - ?NAMED(fold_local_docs_with_skip), - ?NAMED(fold_local_docs_with_skip_and_limit) + ?TDEF(fold_local_docs_basic), + ?TDEF(fold_local_docs_rev), + ?TDEF(fold_local_docs_with_start_key), + ?TDEF(fold_local_docs_with_end_key), + ?TDEF(fold_local_docs_with_both_keys_the_same), + ?TDEF(fold_local_docs_with_different_keys, 15000), + ?TDEF(fold_local_docs_with_limit), + ?TDEF(fold_local_docs_with_skip), + ?TDEF(fold_local_docs_with_skip_and_limit) ]) } }. diff --git a/src/fabric/test/fabric2_node_types_tests.erl b/src/fabric/test/fabric2_node_types_tests.erl index ad400f98f..074afe86b 100644 --- a/src/fabric/test/fabric2_node_types_tests.erl +++ b/src/fabric/test/fabric2_node_types_tests.erl @@ -14,15 +14,13 @@ -include_lib("eunit/include/eunit.hrl"). - - --define(TDEF(A), {atom_to_list(A), fun A/0}). +-include("fabric2_test.hrl"). node_types_test_() -> { "Test node types", - foreach, + setup, fun() -> os:putenv("COUCHDB_NODE_TYPE_FOO", "false"), os:putenv("COUCHDB_NODE_TYPE_BAZ", "true"), @@ -30,26 +28,23 @@ node_types_test_() -> % erlfdb, rexi and mem3 are all dependent apps for fabric. We make % sure to start them so when fabric is started during the test it % already has its dependencies - test_util:start_couch([erlfdb, rexi, mem3, ctrace]) + test_util:start_couch([erlfdb, rexi, mem3, ctrace, fabric]) end, fun(Ctx) -> - ok = application:stop(fabric), test_util:stop_couch(Ctx), application:unset_env(fabric, node_types), os:unsetenv("COUCHDB_NODE_TYPE_FOO"), os:unsetenv("COUCHDB_NODE_TYPE_BAZ"), os:unsetenv("COUCHDB_NODE_TYPE_ZIG") end, - [ + with([ ?TDEF(basics), ?TDEF(os_env_priority) - ] + ]) }. -basics() -> - ok = application:start(fabric), - +basics(_) -> % default is true for new types ?assert(fabric2_node_types:is_type(some_new_node_type)), @@ -64,9 +59,7 @@ basics() -> ?assert(not fabric2_node_types:is_type(bam)). -os_env_priority() -> - ok = application:start(fabric), - +os_env_priority(_) -> % os env takes precedence application:set_env(fabric, node_types, [{foo, true}, {baz, false}]), ?assert(not fabric2_node_types:is_type(foo)), diff --git a/src/fabric/test/fabric2_rev_stemming.erl b/src/fabric/test/fabric2_rev_stemming.erl index 99e086e2a..62ce6901a 100644 --- a/src/fabric/test/fabric2_rev_stemming.erl +++ b/src/fabric/test/fabric2_rev_stemming.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). doc_crud_test_() -> @@ -25,16 +26,16 @@ doc_crud_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun update_doc/1, - fun update_doc_replicated_no_stemming/1, - fun update_doc_replicated_with_stemming/1, - fun update_doc_replicate_existing_rev/1, - fun update_winning_conflict_branch/1, - fun update_non_winning_conflict_branch/1, - fun delete_doc_basic/1, - fun recreate_doc_basic/1 - ]} + with([ + ?TDEF(update_doc), + ?TDEF(update_doc_replicated_no_stemming), + ?TDEF(update_doc_replicated_with_stemming), + ?TDEF(update_doc_replicate_existing_rev), + ?TDEF(update_winning_conflict_branch), + ?TDEF(update_non_winning_conflict_branch), + ?TDEF(delete_doc_basic), + ?TDEF(recreate_doc_basic) + ]) } }. @@ -192,7 +193,7 @@ recreate_doc_basic({Db, _}) -> deleted = true, body = {[{<<"state">>, 2}]} }, - {ok, {2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + {ok, {2, _Rev2}} = fabric2_db:update_doc(Db, Doc2), Doc3 = Doc1#doc{ revs = {0, []}, deleted = false, diff --git a/src/fabric/test/fabric2_test.hrl b/src/fabric/test/fabric2_test.hrl new file mode 100644 index 000000000..a0532b360 --- /dev/null +++ b/src/fabric/test/fabric2_test.hrl @@ -0,0 +1,25 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(TDEF(Name), {atom_to_list(Name), fun Name/1}). +-define(TDEF(Name, Timeout), {atom_to_list(Name), Timeout, fun Name/1}). + + +with(Tests) -> + fun(ArgsTuple) -> + lists:map(fun + ({Name, Fun}) -> + {Name, ?_test(Fun(ArgsTuple))}; + ({Name, Timeout, Fun}) -> + {Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} + end, Tests) + end. diff --git a/src/fabric/test/fabric2_trace_db_create_tests.erl b/src/fabric/test/fabric2_trace_db_create_tests.erl index 09cc86375..926219f6a 100644 --- a/src/fabric/test/fabric2_trace_db_create_tests.erl +++ b/src/fabric/test/fabric2_trace_db_create_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). trace_test_() -> @@ -25,9 +26,9 @@ trace_test_() -> setup, fun setup/0, fun cleanup/1, - [ - fun create_db/0 - ] + with([ + ?TDEF(create_db) + ]) } }. @@ -41,6 +42,6 @@ cleanup(Ctx) -> test_util:stop_couch(Ctx). -create_db() -> +create_db(_) -> put(erlfdb_trace, <<"create db">>), {ok, _Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]). diff --git a/src/fabric/test/fabric2_trace_db_delete_tests.erl b/src/fabric/test/fabric2_trace_db_delete_tests.erl index ddbb2c858..ac92c5335 100644 --- a/src/fabric/test/fabric2_trace_db_delete_tests.erl +++ b/src/fabric/test/fabric2_trace_db_delete_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). trace_test_() -> @@ -25,9 +26,9 @@ trace_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun delete_db/1 - ]} + with([ + ?TDEF(delete_db) + ]) } }. diff --git a/src/fabric/test/fabric2_trace_db_open_tests.erl b/src/fabric/test/fabric2_trace_db_open_tests.erl index 71e33019e..3602b50e1 100644 --- a/src/fabric/test/fabric2_trace_db_open_tests.erl +++ b/src/fabric/test/fabric2_trace_db_open_tests.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). trace_test_() -> @@ -25,9 +26,9 @@ trace_test_() -> setup, fun setup/0, fun cleanup/1, - {with, [ - fun open_db/1 - ]} + with([ + ?TDEF(open_db) + ]) } }. diff --git a/src/fabric/test/fabric2_trace_doc_create_tests.erl b/src/fabric/test/fabric2_trace_doc_create_tests.erl index 1e0b47cec..888039d05 100644 --- a/src/fabric/test/fabric2_trace_doc_create_tests.erl +++ b/src/fabric/test/fabric2_trace_doc_create_tests.erl @@ -16,20 +16,21 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). -doc_crud_test_() -> +trace_doc_create_test_() -> { "Test document CRUD operations", { setup, fun setup/0, fun cleanup/1, - {with, [ - fun create_new_doc/1, - fun create_two_docs/1, - fun create_50_docs/1 - ]} + with([ + ?TDEF(create_new_doc), + ?TDEF(create_two_docs), + ?TDEF(create_50_docs) + ]) } }. -- cgit v1.2.1 From ebe14eccd293e971b4b024f930fbf2cea596425e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 6 Dec 2019 14:40:55 -0600 Subject: Delete attachments when no longer referenced This fixes attachment handling to properly remove attachment data when it is no longer referenced by the document's revision tree. As implemented this accounts for the possibility that multiple revisions may reference a given attachment id. However, due to how the current revision tree removes revisions aggressively it's not currently possible for multiple leaf revisions to share an underlying attachment. This is because when attempting to use a stub attachment when replicating in a conflict we will encounter the `missing_stub` error because the previous shared revision has already been removed. --- src/fabric/include/fabric2.hrl | 5 +- src/fabric/src/fabric2_db.erl | 16 +- src/fabric/src/fabric2_fdb.erl | 126 +++++++++++-- src/fabric/src/fabric2_util.erl | 16 ++ src/fabric/test/fabric2_doc_att_tests.erl | 285 ++++++++++++++++++++++++++++++ 5 files changed, 428 insertions(+), 20 deletions(-) create mode 100644 src/fabric/test/fabric2_doc_att_tests.erl diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 189995de2..b4dd084a2 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -37,12 +37,15 @@ -define(DB_ATTS, 23). -define(DB_VIEWS, 24). -define(DB_LOCAL_DOC_BODIES, 25). +-define(DB_ATT_NAMES, 26). % Versions --define(CURR_REV_FORMAT, 0). +% 0 - Initial implementation +% 1 - Added attachment hash +-define(CURR_REV_FORMAT, 1). % Misc constants diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 88840e702..6d015df0e 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1341,7 +1341,8 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> #doc{ deleted = NewDeleted, - revs = {NewRevPos, [NewRev | NewRevPath]} + revs = {NewRevPos, [NewRev | NewRevPath]}, + atts = Atts } = Doc4 = stem_revisions(Db, Doc3), NewRevInfo = #{ @@ -1350,7 +1351,8 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> rev_id => {NewRevPos, NewRev}, rev_path => NewRevPath, sequence => undefined, - branch_count => undefined + branch_count => undefined, + att_hash => fabric2_util:hash_atts(Atts) }, % Gather the list of possible winnig revisions @@ -1405,7 +1407,8 @@ update_doc_replicated(Db, Doc0, _Options) -> rev_id => {RevPos, Rev}, rev_path => RevPath, sequence => undefined, - branch_count => undefined + branch_count => undefined, + att_hash => <<>> }, AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId), @@ -1444,6 +1447,9 @@ update_doc_replicated(Db, Doc0, _Options) -> PrevRevInfo = find_prev_revinfo(RevPos, LeafPath), Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo), Doc3 = flush_doc_atts(Db, Doc2), + DocRevInfo2 = DocRevInfo1#{ + atts_hash => fabric2_util:hash_atts(Doc3#doc.atts) + }, % Possible winners are the previous winner and % the new DocRevInfo @@ -1453,9 +1459,9 @@ update_doc_replicated(Db, Doc0, _Options) -> end, {NewWinner0, NonWinner} = case Winner == PrevRevInfo of true -> - {DocRevInfo1, not_found}; + {DocRevInfo2, not_found}; false -> - [W, NW] = fabric2_util:sort_revinfos([Winner, DocRevInfo1]), + [W, NW] = fabric2_util:sort_revinfos([Winner, DocRevInfo2]), {W, NW} end, diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index fb2891be7..404460ed5 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -580,9 +580,40 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> #doc{ id = DocId, - deleted = Deleted + deleted = Deleted, + atts = Atts } = Doc, + % Doc body + + ok = write_doc_body(Db, Doc), + + % Attachment bookkeeping + + % If a document's attachments have changed we have to scan + % for any attachments that may need to be deleted. The check + % for `>= 2` is a bit subtle. The important point is that + % one of the revisions will be from the new document so we + % have to find at least one more beyond that to assert that + % the attachments have not changed. + AttHash = fabric2_util:hash_atts(Atts), + RevsToCheck = [NewWinner0] ++ ToUpdate ++ ToRemove, + AttHashCount = lists:foldl(fun(Att, Count) -> + #{att_hash := RevAttHash} = Att, + case RevAttHash == AttHash of + true -> Count + 1; + false -> Count + end + end, 0, RevsToCheck), + if + AttHashCount == length(RevsToCheck) -> + ok; + AttHashCount >= 2 -> + ok; + true -> + cleanup_attachments(Db, DocId, Doc, ToRemove) + end, + % Revision tree NewWinner = NewWinner0#{winner := true}, @@ -649,8 +680,6 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> % And all the rest... - ok = write_doc_body(Db, Doc), - IsDDoc = case Doc#doc.id of <> -> true; _ -> false @@ -755,6 +784,9 @@ write_attachment(#{} = Db, DocId, Data) when is_binary(Data) -> AttId = fabric2_util:uuid(), Chunks = chunkify_binary(Data), + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + ok = erlfdb:set(Tx, IdKey, <<>>), + lists:foldl(fun(Chunk, ChunkId) -> AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), ok = erlfdb:set(Tx, AttKey, Chunk), @@ -1014,16 +1046,71 @@ clear_doc_body(#{} = Db, DocId, #{} = RevInfo) -> ok = erlfdb:clear_range(Tx, StartKey, EndKey). +cleanup_attachments(Db, DocId, NewDoc, ToRemove) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + RemoveRevs = lists:map(fun(#{rev_id := RevId}) -> RevId end, ToRemove), + + % Gather all known document revisions + {ok, DiskDocs} = fabric2_db:open_doc_revs(Db, DocId, all, []), + AllDocs = [{ok, NewDoc} | DiskDocs], + + % Get referenced attachment ids + ActiveIdSet = lists:foldl(fun({ok, Doc}, Acc) -> + #doc{ + revs = {Pos, [Rev | _]} + } = Doc, + case lists:member({Pos, Rev}, RemoveRevs) of + true -> + Acc; + false -> + lists:foldl(fun(Att, InnerAcc) -> + {loc, _Db, _DocId, AttId} = couch_att:fetch(data, Att), + sets:add_element(AttId, InnerAcc) + end, Acc, Doc#doc.atts) + end + end, sets:new(), AllDocs), + + AttPrefix = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId}, DbPrefix), + Options = [{streaming_mode, want_all}], + Future = erlfdb:get_range_startswith(Tx, AttPrefix, Options), + + ExistingIdSet = lists:foldl(fun({K, _}, Acc) -> + {?DB_ATT_NAMES, DocId, AttId} = erlfdb_tuple:unpack(K, DbPrefix), + sets:add_element(AttId, Acc) + end, sets:new(), erlfdb:wait(Future)), + + AttsToRemove = sets:subtract(ExistingIdSet, ActiveIdSet), + + lists:foreach(fun(AttId) -> + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + erlfdb:clear(Tx, IdKey), + + ChunkKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + erlfdb:clear_range_startswith(Tx, ChunkKey) + end, sets:to_list(AttsToRemove)). + + revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> #{ deleted := Deleted, rev_id := {RevPos, Rev}, rev_path := RevPath, - branch_count := BranchCount + branch_count := BranchCount, + att_hash := AttHash } = RevId, VS = new_versionstamp(Tx), Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, - Val = {?CURR_REV_FORMAT, VS, BranchCount, list_to_tuple(RevPath)}, + Val = { + ?CURR_REV_FORMAT, + VS, + BranchCount, + list_to_tuple(RevPath), + AttHash + }, KBin = erlfdb_tuple:pack(Key, DbPrefix), VBin = erlfdb_tuple:pack_vs(Val), {KBin, VBin, VS}; @@ -1032,38 +1119,49 @@ revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) -> #{ deleted := Deleted, rev_id := {RevPos, Rev}, - rev_path := RevPath + rev_path := RevPath, + att_hash := AttHash } = RevId, Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, - Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath)}, + Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash}, KBin = erlfdb_tuple:pack(Key, DbPrefix), VBin = erlfdb_tuple:pack(Val), {KBin, VBin, undefined}. -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, Sequence, BranchCount, RevPath} = Val, + {_RevFormat, Sequence, BranchCount, RevPath, AttHash} = Val, #{ winner => true, deleted => not NotDeleted, rev_id => {RevPos, Rev}, rev_path => tuple_to_list(RevPath), sequence => Sequence, - branch_count => BranchCount + branch_count => BranchCount, + att_hash => AttHash }; -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _} = Val) -> +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, RevPath} = Val, + {_RevFormat, RevPath, AttHash} = Val, #{ winner => false, deleted => not NotDeleted, rev_id => {RevPos, Rev}, rev_path => tuple_to_list(RevPath), sequence => undefined, - branch_count => undefined - }. + branch_count => undefined, + att_hash => AttHash + }; + +fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> + Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, <<>>}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {0, RPath}) -> + Val = {?CURR_REV_FORMAT, RPath, <<>>}, + fdb_to_revinfo(Key, Val). doc_to_fdb(Db, #doc{} = Doc) -> diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 2b8e49ebf..4e2e2d76b 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -25,6 +25,8 @@ validate_security_object/1, + hash_atts/1, + dbname_ends_with/2, get_value/2, @@ -124,6 +126,20 @@ validate_json_list_of_strings(Member, Props) -> end. +hash_atts([]) -> + <<>>; + +hash_atts(Atts) -> + SortedAtts = lists:sort(fun(A, B) -> + couch_att:fetch(name, A) =< couch_att:fetch(name, B) + end, Atts), + Md5St = lists:foldl(fun(Att, Acc) -> + {loc, _Db, _DocId, AttId} = couch_att:fetch(data, Att), + couch_hash:md5_hash_update(Acc, AttId) + end, couch_hash:md5_hash_init(), SortedAtts), + couch_hash:md5_hash_final(Md5St). + + dbname_ends_with(#{} = Db, Suffix) -> dbname_ends_with(fabric2_db:name(Db), Suffix); diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl new file mode 100644 index 000000000..331e1a4e8 --- /dev/null +++ b/src/fabric/test/fabric2_doc_att_tests.erl @@ -0,0 +1,285 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_att_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2.hrl"). +-include("fabric2_test.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(create_att), + ?TDEF(delete_att), + ?TDEF(multiple_atts), + ?TDEF(delete_one_att), + ?TDEF(large_att), + ?TDEF(att_on_conflict_isolation) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +create_att({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, identity}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + AttData = fabric2_db:read_attachment(Db, DocId, AttId), + ?assertEqual(<<"foobar">>, AttData), + + % Check that the raw keys exist + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(<<>>, IdVal), + ?assertMatch([{_, <<"foobar">>}], AttVals) + end). + + +delete_att({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, identity}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + + Doc3 = Doc2#doc{atts = []}, + {ok, _} = fabric2_db:update_doc(Db, Doc3), + + {ok, Doc4} = fabric2_db:open_doc(Db, DocId), + ?assertEqual([], Doc4#doc.atts), + + % Check that the raw keys were removed + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(not_found, IdVal), + ?assertMatch([], AttVals) + end). + + +multiple_atts({Db, _}) -> + DocId = fabric2_util:uuid(), + Atts = [ + mk_att(<<"foo.txt">>, <<"foobar">>), + mk_att(<<"bar.txt">>, <<"barfoo">>), + mk_att(<<"baz.png">>, <<"blargh">>) + ], + {ok, _} = create_doc(Db, DocId, Atts), + ?assertEqual( + #{ + <<"foo.txt">> => <<"foobar">>, + <<"bar.txt">> => <<"barfoo">>, + <<"baz.png">> => <<"blargh">> + }, + read_atts(Db, DocId) + ). + + +delete_one_att({Db, _}) -> + DocId = fabric2_util:uuid(), + Atts1 = [ + mk_att(<<"foo.txt">>, <<"foobar">>), + mk_att(<<"bar.txt">>, <<"barfoo">>), + mk_att(<<"baz.png">>, <<"blargh">>) + ], + {ok, RevId} = create_doc(Db, DocId, Atts1), + Atts2 = tl(Atts1), + {ok, _} = update_doc(Db, DocId, RevId, stubify(RevId, Atts2)), + ?assertEqual( + #{ + <<"bar.txt">> => <<"barfoo">>, + <<"baz.png">> => <<"blargh">> + }, + read_atts(Db, DocId) + ). + + +large_att({Db, _}) -> + DocId = fabric2_util:uuid(), + % Total size ~360,000 bytes + AttData = iolist_to_binary([ + <<"foobar">> || _ <- lists:seq(1, 60000) + ]), + Att1 = mk_att("long.txt", AttData), + {ok, _} = create_doc(Db, DocId, [Att1]), + ?assertEqual(#{"long.txt" => AttData}, read_atts(Db, DocId)), + + {ok, Doc} = fabric2_db:open_doc(Db, DocId), + #doc{atts = [Att2]} = Doc, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + + #{db_prefix := DbPrefix} = Db, + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + fabric2_fdb:transactional(fun(Tx) -> + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + ?assertEqual(4, length(AttVals)) + end). + + +att_on_conflict_isolation({Db, _}) -> + DocId = fabric2_util:uuid(), + [PosRevA1, PosRevB1] = create_conflicts(Db, DocId, []), + Att = mk_att(<<"happy_goat.tiff">>, <<":D>">>), + {ok, PosRevA2} = update_doc(Db, DocId, PosRevA1, [Att]), + ?assertEqual( + #{<<"happy_goat.tiff">> => <<":D>">>}, + read_atts(Db, DocId, PosRevA2) + ), + ?assertEqual(#{}, read_atts(Db, DocId, PosRevB1)). + + +mk_att(Name, Data) -> + couch_att:new([ + {name, Name}, + {type, <<"application/octet-stream">>}, + {att_len, size(Data)}, + {data, Data}, + {encoding, identity}, + {md5, <<>>} + ]). + + +stubify(RevId, Atts) when is_list(Atts) -> + lists:map(fun(Att) -> + stubify(RevId, Att) + end, Atts); + +stubify({Pos, _Rev}, Att) -> + couch_att:store([ + {data, stub}, + {revpos, Pos} + ], Att). + + +create_doc(Db, DocId, Atts) -> + Doc = #doc{ + id = DocId, + atts = Atts + }, + fabric2_db:update_doc(Db, Doc). + + +update_doc(Db, DocId, {Pos, Rev}, Atts) -> + Doc = #doc{ + id = DocId, + revs = {Pos, [Rev]}, + atts = Atts + }, + fabric2_db:update_doc(Db, Doc). + + +create_conflicts(Db, DocId, Atts) -> + Base = #doc{ + id = DocId, + atts = Atts + }, + {ok, {_, Rev1} = PosRev} = fabric2_db:update_doc(Db, Base), + <> = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev2, Rev1]}, + atts = stubify(PosRev, Atts) + }, + Doc2 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + atts = stubify(PosRev, Atts) + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + {ok, _} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + lists:reverse(lists:sort([{2, Rev2}, {2, Rev3}])). + + +read_atts(Db, DocId) -> + {ok, #doc{atts = Atts}} = fabric2_db:open_doc(Db, DocId), + atts_to_map(Db, DocId, Atts). + + +read_atts(Db, DocId, PosRev) -> + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, [PosRev], []), + [{ok, #doc{atts = Atts}}] = Docs, + atts_to_map(Db, DocId, Atts). + + +atts_to_map(Db, DocId, Atts) -> + lists:foldl(fun(Att, Acc) -> + [Name, Data] = couch_att:fetch([name, data], Att), + {loc, _Db, DocId, AttId} = Data, + AttBin = fabric2_db:read_attachment(Db, DocId, AttId), + maps:put(Name, AttBin, Acc) + end, #{}, Atts). -- cgit v1.2.1 From 4e8b200406baf8514a195e1b59df41bbf23bea57 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 10 Dec 2019 16:49:38 -0800 Subject: Expiring cache This is a library for creating an FDB backed key value cache, where each entry has a `stale` and `expires` time associated with it. Once the current time exceeds the `expires` time, the entry is automatically removed. The `stale` time can be used to indicate that a refresh is necessary, while still returning a non-expired value. It is potentially useful for implementing e.g. caches to external systems of record, such as OAuth 2. --- rebar.config.script | 1 + src/couch_expiring_cache/README.md | 71 +++++++++++++ .../include/couch_expiring_cache.hrl | 17 +++ src/couch_expiring_cache/rebar.config | 14 +++ .../src/couch_expiring_cache.app.src | 27 +++++ .../src/couch_expiring_cache.erl | 56 ++++++++++ .../src/couch_expiring_cache_fdb.erl | 116 +++++++++++++++++++++ .../src/couch_expiring_cache_server.erl | 110 +++++++++++++++++++ .../test/couch_expiring_cache_tests.erl | 95 +++++++++++++++++ 9 files changed, 507 insertions(+) create mode 100644 src/couch_expiring_cache/README.md create mode 100644 src/couch_expiring_cache/include/couch_expiring_cache.hrl create mode 100644 src/couch_expiring_cache/rebar.config create mode 100644 src/couch_expiring_cache/src/couch_expiring_cache.app.src create mode 100644 src/couch_expiring_cache/src/couch_expiring_cache.erl create mode 100644 src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl create mode 100644 src/couch_expiring_cache/src/couch_expiring_cache_server.erl create mode 100644 src/couch_expiring_cache/test/couch_expiring_cache_tests.erl diff --git a/rebar.config.script b/rebar.config.script index 2eeec988d..390c7792b 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -135,6 +135,7 @@ SubDirs = [ "src/dreyfus", "src/fabric", "src/couch_jobs", + "src/couch_expiring_cache", "src/global_changes", "src/ioq", "src/ken", diff --git a/src/couch_expiring_cache/README.md b/src/couch_expiring_cache/README.md new file mode 100644 index 000000000..2ab1699db --- /dev/null +++ b/src/couch_expiring_cache/README.md @@ -0,0 +1,71 @@ +# Couch Expiring Cache + +This is a library for creating an FDB backed key value cache, where +each entry has a `stale` and `expires` time associated with it. Once +the current time exceeds the `expires` time, the entry is +automatically removed. The `stale` time can be used to indicate that a +refresh is necessary, while still returning a non-expired value. It is +potentially useful for implementing e.g. caches to external systems of +record, such as OAuth 2. + +The data model is based on this [FDB forum discussion]( +https://forums.foundationdb.org/t/designing-key-value-expiration-in-fdb/156). + +``` +(?EXPIRING_CACHE, Name, ?PK, Key) := (Val, StaleTS, ExpireTS) +(?EXPIRING_CACHE, Name, ?EXP, ExpireTS, Key) := () +``` +where `Name` is a unique namespace for a particular use case. N.B. +that it's possible for cache data remain indefinitely in FDB when a +`Name` is changed or retired with unexpired entries. For such cases, +we provide `couch_expiring_cache_fdb:clear_all/1` to manually clean +up those entries. + +## Example + +Typical usage for this library is to create a separate behaviour +module for each `Name`, which internally starts a uniquely named +`couch_expiring_cache_server` to handle expiration and removal of +entries for that `Name`. For example, to cache authorization decisions +from an external source, one could implement a module like the +following: + +```erlang +-module(auth_fdb_decision_cache). + +-behaviour(couch_expiring_cache_server). + +-export([ + start_link/0 +]). + + +-define(CACHE_NAME, <<"auth-decision">>). + + +start_link() -> + Opts = #{ + cache_name => ?CACHE_NAME, + period => 1000, % clear expired entries every second + batch_size => 500, % clear at most 500 entries each period + max_jitter => 10 + }, + couch_expiring_cache_server:start_link(?MODULE, Opts). +``` + +## Modules + +* `couch_expiring_cache`: The API module, it contains functions for + inserting and looking up cache entries, which are simply + pass-throughs to `couch_expiring_cache_fdb`. + +* `couch_expiring_cache_fdb`: The module which interacts with FDB, in + addition to insertion and lookup functions, it also contains a + function to clear an expired range, which is called periodically + from instances of `couch_expiring_cache_server`. + +* `couch_expiring_cache_server`: An "abstract" gen_server, a specific + behaviour of this module should be created for each `Name`, which + can override the default expiration parameters. It periodically + removes expired cache entries using configurable parameters for + period, jitter, and batch size. diff --git a/src/couch_expiring_cache/include/couch_expiring_cache.hrl b/src/couch_expiring_cache/include/couch_expiring_cache.hrl new file mode 100644 index 000000000..78e6a8552 --- /dev/null +++ b/src/couch_expiring_cache/include/couch_expiring_cache.hrl @@ -0,0 +1,17 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(TIME_UNIT, millisecond). + +-type millisecond() :: non_neg_integer(). + +-type jtx() :: map() | undefined | tuple(). % copied from couch_jobs.hrl diff --git a/src/couch_expiring_cache/rebar.config b/src/couch_expiring_cache/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_expiring_cache/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_expiring_cache/src/couch_expiring_cache.app.src b/src/couch_expiring_cache/src/couch_expiring_cache.app.src new file mode 100644 index 000000000..27d58ee0e --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_expiring_cache, [ + {description, "CouchDB Expiring Cache"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + erlfdb, + config, + couch_log, + couch_stats, + couch_jobs, + fabric + ]} +]}. diff --git a/src/couch_expiring_cache/src/couch_expiring_cache.erl b/src/couch_expiring_cache/src/couch_expiring_cache.erl new file mode 100644 index 000000000..b26556e98 --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache.erl @@ -0,0 +1,56 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache). + +-export([ + insert/5, + insert/6, + lookup/2, + lookup/3 +]). + + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +-spec insert(Name :: binary(), Key :: binary(), Value :: binary(), + StaleTS :: ?TIME_UNIT(), ExpiresTS :: ?TIME_UNIT()) -> ok. +insert(Name, Key, Value, StaleTS, ExpiresTS) + when is_binary(Name), is_binary(Key), is_binary(Value), + is_integer(StaleTS), is_integer(ExpiresTS) -> + insert(undefined, Name, Key, Value, StaleTS, ExpiresTS). + + +-spec insert(Tx :: jtx(), Name :: binary(), Key :: binary(), Value :: binary(), + StaleTS :: ?TIME_UNIT(), ExpiresTS :: ?TIME_UNIT()) -> ok. +insert(Tx, Name, Key, Value, StaleTS, ExpiresTS) + when is_binary(Name), is_binary(Key), is_binary(Value), + is_integer(StaleTS), is_integer(ExpiresTS) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_expiring_cache_fdb:insert( + JTx, Name, Key, Value, StaleTS, ExpiresTS) + end). + + +-spec lookup(Name :: binary(), Key :: binary()) -> + not_found | {fresh, Val :: binary()} | {stale, Val :: binary()} | expired. +lookup(Name, Key) when is_binary(Name), is_binary(Key) -> + lookup(undefined, Name, Key). + + +-spec lookup(Tx :: jtx(), Name :: binary(), Key :: binary()) -> + not_found | {fresh, Val :: binary()} | {stale, Val :: binary()} | expired. +lookup(Tx, Name, Key) when is_binary(Name), is_binary(Key) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_expiring_cache_fdb:lookup(JTx, Name, Key) + end). diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl new file mode 100644 index 000000000..fa8508e14 --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl @@ -0,0 +1,116 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache_fdb). + +-export([ + insert/6, + lookup/3, + clear_all/1, + clear_expired_range/3 +]). + + +-define(EXPIRING_CACHE, 53). % coordinate with fabric2.hrl +-define(PK, 1). +-define(EXP, 2). + + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +% Data model +% see: https://forums.foundationdb.org/t/designing-key-value-expiration-in-fdb/156 +% +% (?EXPIRING_CACHE, Name, ?PK, Key) := (Val, StaleTS, ExpiresTS) +% (?EXPIRING_CACHE, Name, ?EXP, ExpiresTS, Key) := () + + +-spec insert(JTx :: jtx(), Name :: binary(), Key :: binary(), Value :: binary(), + StaleTS :: ?TIME_UNIT, ExpiresTS :: ?TIME_UNIT) -> ok. +insert(#{jtx := true} = JTx, Name, Key, Val, StaleTS, ExpiresTS) -> + #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), + PK = primary_key(Name, Key, LayerPrefix), + PV = erlfdb_tuple:pack({Val, StaleTS, ExpiresTS}), + XK = expiry_key(ExpiresTS, Name, Key, LayerPrefix), + XV = erlfdb_tuple:pack({}), + ok = erlfdb:set(Tx, PK, PV), + ok = erlfdb:set(Tx, XK, XV). + + +-spec lookup(JTx :: jtx(), Name :: binary(), Key :: binary()) -> + not_found | {fresh, Val :: binary()} | {stale, Val :: binary()} | expired. +lookup(#{jtx := true} = JTx, Name, Key) -> + #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), + PK = primary_key(Name, Key, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, PK)) of + not_found -> + not_found; + Bin when is_binary(Bin) -> + {Val, StaleTS, ExpiresTS} = erlfdb_tuple:unpack(Bin), + Now = erlang:system_time(?TIME_UNIT), + if + Now < StaleTS -> {fresh, Val}; + Now < ExpiresTS -> {stale, Val}; + true -> expired + end + end. + + +-spec clear_all(Name :: binary()) -> + ok. +clear_all(Name) -> + fabric2_fdb:transactional(fun(Tx) -> + LayerPrefix = fabric2_fdb:get_dir(Tx), + NamePrefix = erlfdb_tuple:pack({?EXPIRING_CACHE, Name}, LayerPrefix), + erlfdb:clear_range_startswith(Tx, NamePrefix) + end). + + +-spec clear_expired_range(Name :: binary(), EndTS :: ?TIME_UNIT, + Limit :: non_neg_integer()) -> + OldestTS :: ?TIME_UNIT. +clear_expired_range(Name, EndTS, Limit) when Limit > 0 -> + fabric2_fdb:transactional(fun(Tx) -> + LayerPrefix = fabric2_fdb:get_dir(Tx), + ExpiresPrefix = erlfdb_tuple:pack( + {?EXPIRING_CACHE, Name, ?EXP}, LayerPrefix), + fabric2_fdb:fold_range({tx, Tx}, ExpiresPrefix, fun({K, _V}, Acc) -> + Unpacked = erlfdb_tuple:unpack(K, ExpiresPrefix), + couch_log:debug("~p clearing ~p", [?MODULE, Unpacked]), + {ExpiresTS, Key} = Unpacked, + clear_expired(Tx, ExpiresTS, Name, Key, LayerPrefix), + oldest_ts(ExpiresTS, Acc) + end, 0, [{end_key, EndTS}, {limit, Limit}]) + end). + + +%% Private + + +clear_expired(Tx, ExpiresTS, Name, Key, Prefix) -> + PK = primary_key(Name, Key, Prefix), + XK = expiry_key(ExpiresTS, Name, Key, Prefix), + ok = erlfdb:clear(Tx, PK), + ok = erlfdb:clear(Tx, XK). + + +oldest_ts(TS, 0) -> TS; % handle initial Acc = 0 case +oldest_ts(TS, OldestTS) -> min(TS, OldestTS). + + +primary_key(Name, Key, Prefix) -> + erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?PK, Key}, Prefix). + + +expiry_key(ExpiresTS, Name, Key, Prefix) -> + erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?EXP, ExpiresTS, Key}, Prefix). diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl new file mode 100644 index 000000000..6f9dc1fd1 --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl @@ -0,0 +1,110 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache_server). + +-behaviour(gen_server). + +-callback start_link() -> {ok, pid()} | ignore | {error, term()}. + +-export([ + start_link/2 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-define(DEFAULT_BATCH_SIZE, 1000). +-define(DEFAULT_PERIOD_MSEC, 5000). +-define(DEFAULT_MAX_JITTER_MSEC, 1000). + + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +start_link(Name, Opts) when is_atom(Name) -> + gen_server:start_link({local, Name}, ?MODULE, Opts#{name => Name}, []). + + +init(Opts) -> + ?MODULE = ets:new(?MODULE, [named_table, public, {read_concurrency, true}]), + DefaultCacheName = atom_to_binary(maps:get(name, Opts), utf8), + Period = maps:get(period, Opts, ?DEFAULT_PERIOD_MSEC), + MaxJitter = maps:get(max_jitter, Opts, ?DEFAULT_MAX_JITTER_MSEC), + {ok, #{ + cache_name => maps:get(cache_name, Opts, DefaultCacheName), + batch_size => maps:get(batch_size, Opts, ?DEFAULT_BATCH_SIZE), + period => Period, + max_jitter => MaxJitter, + timer_ref => schedule_remove_expired(Period, MaxJitter), + oldest_ts => 0, + elapsed => 0, + largest_elapsed => 0, + lag => 0}}. + + +terminate(_, _) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(remove_expired, St) -> + #{ + cache_name := Name, + batch_size := BatchSize, + period := Period, + max_jitter := MaxJitter, + oldest_ts := OldestTS0, + largest_elapsed := LargestElapsed + } = St, + + NowTS = erlang:system_time(?TIME_UNIT), + OldestTS = max(OldestTS0, + couch_expiring_cache_fdb:clear_expired_range(Name, NowTS, BatchSize)), + Elapsed = erlang:system_time(?TIME_UNIT) - NowTS, + + {noreply, St#{ + timer_ref := schedule_remove_expired(Period, MaxJitter), + oldest_ts := OldestTS, + elapsed := Elapsed, + largest_elapsed := max(Elapsed, LargestElapsed), + lag := NowTS - OldestTS}}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +%% Private + + +schedule_remove_expired(Timeout, MaxJitter) -> + Jitter = max(Timeout div 2, MaxJitter), + Wait = Timeout + rand:uniform(max(1, Jitter)), + erlang:send_after(Wait, self(), remove_expired). diff --git a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl new file mode 100644 index 000000000..aeb1df6f0 --- /dev/null +++ b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl @@ -0,0 +1,95 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +-define(CACHE_NAME, <<"test">>). + + +start_link() -> + Opts = #{ + cache_name => ?CACHE_NAME, + period => 20, + max_jitter => 0 + }, + couch_expiring_cache_server:start_link(?MODULE, Opts). + + +couch_expiring_cache_basic_test_() -> + { + "Test expiring cache basics", + { + setup, + fun setup_couch/0, fun teardown_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun simple_lifecycle/1 + ] + } + } + }. + + +setup_couch() -> + test_util:start_couch([fabric, couch_jobs]). + + +teardown_couch(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Pid} = start_link(), + true = unlink(Pid), + #{pid => Pid}. + + +teardown(#{pid := Pid}) -> + exit(Pid, kill). + + +simple_lifecycle(_) -> + ?_test(begin + Now = erlang:system_time(?TIME_UNIT), + StaleTS = Now + 100, + ExpiresTS = Now + 200, + Name = ?CACHE_NAME, + Key = <<"key">>, + Val = <<"val">>, + + ?assertEqual(ok, couch_expiring_cache_fdb:clear_all(Name)), + ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)), + ?assertEqual(ok, + couch_expiring_cache:insert(Name, Key, Val, StaleTS, ExpiresTS)), + ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)), + ok = wait_lookup(Name, Key, {stale, Val}), + ok = wait_lookup(Name, Key, expired), + ok = wait_lookup(Name, Key, not_found), + ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)) + end). + + +wait_lookup(Name, Key, Expect) -> + test_util:wait(fun() -> + case couch_expiring_cache:lookup(Name, Key) of + Expect -> ok; + _ -> wait + end + end, _Timeout = 1000, _PollingInterval = 10). -- cgit v1.2.1 From f358971f6ff9673af289fcdb2bbc12c09ada5d3a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 18 Dec 2019 13:49:03 -0500 Subject: Improve transaction name setting when tracing FDB transactions Previously the per-request nonce value was set as the transaction name and so in the trace logs multiple transactions ended up having the same `TransactionID` which was pretty confusing. To fix the issue, append a transaction ID to the name. The ID is guaranteed to be unique for the life of the VM node. --- src/fabric/src/fabric2_fdb.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 404460ed5..6abe1f6de 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -118,7 +118,10 @@ do_transaction(Fun, LayerPrefix) when is_function(Fun, 1) -> erlfdb:transactional(Db, fun(Tx) -> case get(erlfdb_trace) of Name when is_binary(Name) -> - erlfdb:set_option(Tx, transaction_logging_enable, Name); + UId = erlang:unique_integer([positive]), + UIdBin = integer_to_binary(UId, 36), + TxId = <>, + erlfdb:set_option(Tx, transaction_logging_enable, TxId); _ -> ok end, -- cgit v1.2.1 From e6c4ca3a415cae873f13fd61fa19219985326502 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 14 Jan 2020 02:41:50 -0800 Subject: Add `external` tag to opentrace events This PR adds an ability to selectively enable opentracing for HTTP requests with X-B3-... headers. This is helpful in following cases: - tracing all requests with X-B3-... headers `all = (#{external := E}) when E == true -> true` - tracing all requests to specific database with X-B3-... headers ``` all = (#{external := E, 'db.name' := Db}) when E == true andalso Db == <<"foo">> -> true ``` - tracing requests to specific endpoint with X-B3-... headers ``` db.design.view.read = (#{external := E, 'design.id' := Name}) when E == true andalso Name == <<"bar">> -> true ``` I want to remind that we support following X-B3-... headers: - X-B3-TraceId - X-B3-SpanId - X-B3-ParentSpanId - B3 which is in the following format --<1 | 0>- --- src/chttpd/src/chttpd.erl | 12 +++++++----- src/ctrace/README.md | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 7fc9cffd4..848fdcafb 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -1252,6 +1252,7 @@ start_span(Req) -> [] -> <<"">>; [_ | _] -> filename:join(PathParts) end, + {IsExternalSpan, RootOptions} = root_span_options(MochiReq), Tags = maps:merge(#{ peer => Peer, 'http.method' => Method, @@ -1259,21 +1260,22 @@ start_span(Req) -> 'http.url' => MochiReq:get(raw_path), path_parts => Path, 'span.kind' => <<"server">>, - component => <<"couchdb.chttpd">> + component => <<"couchdb.chttpd">>, + external => IsExternalSpan }, ExtraTags), ctrace:start_span(OperationName, [ {tags, Tags}, {time, Begin} - ] ++ maybe_root_span(MochiReq)). + ] ++ RootOptions). -maybe_root_span(MochiReq) -> +root_span_options(MochiReq) -> case get_trace_headers(MochiReq) of [undefined, _, _] -> - []; + {false, []}; [TraceId, SpanId, ParentSpanId] -> Span = ctrace:external_span(TraceId, SpanId, ParentSpanId), - [{root, Span}] + {true, [{root, Span}]} end. parse_trace_id(undefined) -> diff --git a/src/ctrace/README.md b/src/ctrace/README.md index 6e40b434c..3172f268b 100644 --- a/src/ctrace/README.md +++ b/src/ctrace/README.md @@ -146,7 +146,10 @@ and logged. ```ini [tracing.filters] +; trace all events ; all = (#{}) -> true +; trace all events with X-B3-... headers +; all = (#{external := External}) when External == true -> true ; database-info.read = (#{'http.method' := Method}) when Method == 'GET' -> true ; view.build = (#{'view.name' := Name}) when Name == "foo" -> 0.25 ``` -- cgit v1.2.1 From 9785ea5bb18e6eba0791bc4857e99ce9a4f3ba40 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 16 Jan 2020 15:29:08 +0200 Subject: Change map indexes to be stored in one row Changes map indexes to store the original key and value in a single FDB row. --- src/couch_views/include/couch_views.hrl | 3 - src/couch_views/src/couch_views_fdb.erl | 126 ++++------------------------- src/couch_views/src/couch_views_reader.erl | 2 +- 3 files changed, 17 insertions(+), 114 deletions(-) diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl index 2e443ebc3..642431dfe 100644 --- a/src/couch_views/include/couch_views.hrl +++ b/src/couch_views/include/couch_views.hrl @@ -19,8 +19,5 @@ -define(VIEW_ROW_COUNT, 0). -define(VIEW_KV_SIZE, 1). --define(VIEW_ROW_KEY, 0). --define(VIEW_ROW_VALUE, 1). - % jobs api -define(INDEX_JOB_TYPE, <<"views">>). diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 60ce30019..98cff46b2 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -95,31 +95,14 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), FoldAcc = #{ prefix => MapIdxPrefix, - sort_key => undefined, - docid => undefined, - dupe_id => undefined, callback => Callback, acc => Acc0 - }, - - {Fun, Acc} = case fabric2_util:get_value(dir, Options, fwd) of - fwd -> - FwdAcc = FoldAcc#{ - next => key, - key => undefined - }, - {fun fold_fwd/2, FwdAcc}; - rev -> - RevAcc = FoldAcc#{ - next => value, - value => undefined - }, - {fun fold_rev/2, RevAcc} - end, + }, + Fun = fun fold_fwd/2, #{ acc := Acc1 - } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, Acc, Options), + } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, FoldAcc, Options), Acc1. @@ -169,110 +152,34 @@ write_doc(TxDb, Sig, ViewIds, Doc) -> end, lists:zip(ViewIds, Results)). -% For each row in a map view there are two rows stored in -% FoundationDB: +% For each row in a map view we store the the key/value +% in FoundationDB: % -% `(EncodedSortKey, EncodedKey)` -% `(EncodedSortKey, EncodedValue)` +% `(EncodedSortKey, (EncodedKey, EncodedValue))` % % The difference between `EncodedSortKey` and `EndcodedKey` is % the use of `couch_util:get_sort_key/1` which turns UTF-8 % strings into binaries that are byte comparable. Given a sort % key binary we cannot recover the input so to return unmodified % user data we are forced to store the original. -% -% These two fold functions exist so that we can be fairly -% forceful on our assertions about which rows to see. Since -% when we're folding forward we'll see the key first. When -% `descending=true` and we're folding in reverse we'll see -% the value first. -fold_fwd({RowKey, EncodedOriginalKey}, #{next := key} = Acc) -> - #{ - prefix := Prefix - } = Acc, - - {{SortKey, DocId}, DupeId, ?VIEW_ROW_KEY} = - erlfdb_tuple:unpack(RowKey, Prefix), - Acc#{ - next := value, - key := couch_views_encoding:decode(EncodedOriginalKey), - sort_key := SortKey, - docid := DocId, - dupe_id := DupeId - }; - -fold_fwd({RowKey, EncodedValue}, #{next := value} = Acc) -> +fold_fwd({RowKey, PackedKeyValue}, Acc) -> #{ prefix := Prefix, - key := Key, - sort_key := SortKey, - docid := DocId, - dupe_id := DupeId, callback := UserCallback, acc := UserAcc0 } = Acc, - % We're asserting there that this row is paired - % correctly with the previous row by relying on - % a badmatch if any of these values don't match. - {{SortKey, DocId}, DupeId, ?VIEW_ROW_VALUE} = + {{_SortKey, DocId}, _DupeId} = erlfdb_tuple:unpack(RowKey, Prefix), + {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue), Value = couch_views_encoding:decode(EncodedValue), - UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0), - - Acc#{ - next := key, - key := undefined, - sort_key := undefined, - docid := undefined, - dupe_id := undefined, - acc := UserAcc1 - }. - - -fold_rev({RowKey, EncodedValue}, #{next := value} = Acc) -> - #{ - prefix := Prefix - } = Acc, - - {{SortKey, DocId}, DupeId, ?VIEW_ROW_VALUE} = - erlfdb_tuple:unpack(RowKey, Prefix), - Acc#{ - next := key, - value := couch_views_encoding:decode(EncodedValue), - sort_key := SortKey, - docid := DocId, - dupe_id := DupeId - }; - -fold_rev({RowKey, EncodedOriginalKey}, #{next := key} = Acc) -> - #{ - prefix := Prefix, - value := Value, - sort_key := SortKey, - docid := DocId, - dupe_id := DupeId, - callback := UserCallback, - acc := UserAcc0 - } = Acc, - - % We're asserting there that this row is paired - % correctly with the previous row by relying on - % a badmatch if any of these values don't match. - {{SortKey, DocId}, DupeId, ?VIEW_ROW_KEY} = - erlfdb_tuple:unpack(RowKey, Prefix), - Key = couch_views_encoding:decode(EncodedOriginalKey), + UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0), Acc#{ - next := value, - value := undefined, - sort_key := undefined, - docid := undefined, - dupe_id := undefined, acc := UserAcc1 }. @@ -330,11 +237,10 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> KVsToAdd = process_rows(NewRows), MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), - lists:foreach(fun({DupeId, Key1, Key2, Val}) -> - KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId, ?VIEW_ROW_KEY), - VK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId, ?VIEW_ROW_VALUE), - ok = erlfdb:set(Tx, KK, Key2), - ok = erlfdb:set(Tx, VK, Val) + lists:foreach(fun({DupeId, Key1, Key2, EV}) -> + KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId), + Val = erlfdb_tuple:pack({Key2, EV}), + ok = erlfdb:set(Tx, KK, Val) end, KVsToAdd). @@ -400,8 +306,8 @@ map_idx_prefix(DbPrefix, Sig, ViewId) -> erlfdb_tuple:pack(Key, DbPrefix). -map_idx_key(MapIdxPrefix, MapKey, DupeId, Type) -> - Key = {MapKey, DupeId, Type}, +map_idx_key(MapIdxPrefix, MapKey, DupeId) -> + Key = {MapKey, DupeId}, erlfdb_tuple:pack(Key, MapIdxPrefix). diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl index 27671fb9c..76dbed11f 100644 --- a/src/couch_views/src/couch_views_reader.erl +++ b/src/couch_views/src/couch_views_reader.erl @@ -183,7 +183,7 @@ mrargs_to_fdb_options(Args) -> [ {dir, Direction}, - {limit, Limit * 2 + Skip * 2}, + {limit, Limit + Skip}, {streaming_mode, want_all} ] ++ StartKeyOpts ++ EndKeyOpts. -- cgit v1.2.1 From 2fb578df535e308e72b0b3d098b7ec330a403afb Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 28 Jan 2020 19:06:07 -0800 Subject: Delete unused ets table creation This ets table was a holdover from when couch_expiring_cache was a non- library OTP application. It is unused, and would prevent multiple users of the library in the same project. --- src/couch_expiring_cache/src/couch_expiring_cache_server.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl index 6f9dc1fd1..65e742bba 100644 --- a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl +++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl @@ -43,7 +43,6 @@ start_link(Name, Opts) when is_atom(Name) -> init(Opts) -> - ?MODULE = ets:new(?MODULE, [named_table, public, {read_concurrency, true}]), DefaultCacheName = atom_to_binary(maps:get(name, Opts), utf8), Period = maps:get(period, Opts, ?DEFAULT_PERIOD_MSEC), MaxJitter = maps:get(max_jitter, Opts, ?DEFAULT_MAX_JITTER_MSEC), -- cgit v1.2.1 From f62ac8ef881b5b695dc9edba85f33e6b1f65a293 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 29 Jan 2020 20:41:07 +0000 Subject: reserve search namespace --- src/fabric/include/fabric2.hrl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index b4dd084a2..828a51b8f 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -38,6 +38,7 @@ -define(DB_VIEWS, 24). -define(DB_LOCAL_DOC_BODIES, 25). -define(DB_ATT_NAMES, 26). +-define(DB_SEARCH, 27). % Versions -- cgit v1.2.1 From 98a9d80eee3fdef6ef5f31b8bb064307abd03117 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 17 Jan 2020 09:37:54 -0800 Subject: Support jaeger http reporter --- rebar.config.script | 4 ++-- rel/overlay/etc/default.ini | 12 +++++++++--- src/ctrace/README.md | 18 ++++++++++++++++-- src/ctrace/src/ctrace_config.erl | 38 +++++++++++++++++++++++++++++--------- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/rebar.config.script b/rebar.config.script index 390c7792b..d315c75ad 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -163,14 +163,14 @@ DepDescs = [ {folsom, "folsom", {tag, "CouchDB-0.8.3"}}, {hyper, "hyper", {tag, "CouchDB-2.2.0-6"}}, {ibrowse, "ibrowse", {tag, "CouchDB-4.0.1-1"}}, -{jaeger_passage, "jaeger-passage", {tag, "CouchDB-0.1.13-1"}}, +{jaeger_passage, "jaeger-passage", {tag, "CouchDB-0.1.14-1"}}, {jiffy, "jiffy", {tag, "CouchDB-1.0.4-1"}}, {local, "local", {tag, "0.2.1"}}, {mochiweb, "mochiweb", {tag, "v2.20.0"}}, {meck, "meck", {tag, "0.8.8"}}, {recon, "recon", {tag, "2.5.0"}}, {passage, "passage", {tag, "0.2.6"}}, -{thrift_protocol, "thrift-protocol", {tag, "0.1.3"}}, +{thrift_protocol, "thrift-protocol", {tag, "0.1.5"}}, %% TMP - Until this is moved to a proper Apache repo {erlfdb, "erlfdb", {branch, "master"}} diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index db69fe1b7..f2a81875c 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -649,13 +649,19 @@ compaction = false [tracing] ; ; Configuration settings for the `ctrace` OpenTracing -; API. -; +; API. There are two reporter which we support. +; - jaeger.thrift over udp +; - jaeger.thrift over http +; ## Common settings ; enabled = false ; true | false +; app_name = couchdb ; value to use for the `location.application` tag +; protocol = udp ; udp | http - which reporter to use +; ## jaeger.thrift over udp reporter ; thrift_format = compact ; compact | binary ; agent_host = 127.0.0.1 ; agent_port = 6831 -; app_name = couchdb ; value to use for the `location.application` tag +; ## jaeger.thrift over udp reporter +; endpoint = http://127.0.0.1:14268 [tracing.filters] ; diff --git a/src/ctrace/README.md b/src/ctrace/README.md index 3172f268b..4b0238b14 100644 --- a/src/ctrace/README.md +++ b/src/ctrace/README.md @@ -120,9 +120,23 @@ Configuration Traces are configured using standard CouchDB ini file based configuration. There is a global toggle `[tracing] enabled = true | false` that switches tracing on or off completely. The `[tracing]` section also includes -configuration for where to send trace data. +configuration for where to send trace data. There are two reporters which we +support. -An example `[tracing]` section +The thrift over udp reporter (this is the default) has following configuration +options: + +- protocol = udp +- thrift_format = compact | binary +- agent_host = 127.0.0.1 +- agent_port = 6831 + +The thrift over http has following options + +- protocol = http +- endpoint = http://127.0.0.1:14268 + +An example of `[tracing]` section ```ini [tracing] diff --git a/src/ctrace/src/ctrace_config.erl b/src/ctrace/src/ctrace_config.erl index bc2a3dff2..c63c77f1b 100644 --- a/src/ctrace/src/ctrace_config.erl +++ b/src/ctrace/src/ctrace_config.erl @@ -98,17 +98,37 @@ maybe_start_main_tracer(TracerId) -> start_main_tracer(TracerId) -> - Sampler = passage_sampler_all:new(), - Options = [ - {thrift_format, - list_to_atom(config:get("tracing", "thrift_format", "compact"))}, - {agent_host, config:get("tracing", "agent_host", "127.0.0.1")}, - {agent_port, config:get_integer("tracing", "agent_port", 6831)}, - {default_service_name, - list_to_atom(config:get("tracing", "app_name", "couchdb"))} - ], + MaxQueueLen = config:get_integer("tracing", "max_queue_len", 1024), + Sampler = jaeger_passage_sampler_queue_limit:new( + passage_sampler_all:new(), TracerId, MaxQueueLen), + ServiceName = list_to_atom(config:get("tracing", "app_name", "couchdb")), + + ProtocolOptions = case config:get("tracing", "protocol", "udp") of + "udp" -> + [ + {thrift_format, list_to_atom( + config:get("tracing", "thrift_format", "compact"))}, + {agent_host, + config:get("tracing", "agent_host", "127.0.0.1")}, + {agent_port, + config:get_integer("tracing", "agent_port", 6831)}, + {protocol, udp}, + {default_service_name, ServiceName} + ]; + "http" ++ _ -> + [ + {endpoint, + config:get("tracing", "endpoint", "http://127.0.0.1:14268")}, + {protocol, http}, + {http_client, fun http_client/5}, + {default_service_name, ServiceName} + ] + end, + Options = [{default_service_name, ServiceName}|ProtocolOptions], ok = jaeger_passage:start_tracer(TracerId, Sampler, Options). +http_client(Endpoint, Method, Headers, Body, _ReporterOptions) -> + ibrowse:send_req(Endpoint, Headers, Method, Body, []). compile_filter(OperationId, FilterDef) -> try -- cgit v1.2.1 From b4a7f6dc8c18f139ce2bc02691268eaf61a16e28 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 4 Feb 2020 09:29:33 -0800 Subject: fix b3 - Headers suppose to be strings --- src/chttpd/src/chttpd.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 848fdcafb..598436153 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -1304,7 +1304,7 @@ get_trace_headers(MochiReq) -> parse_span_id(MochiReq:get_header_value("X-B3-ParentSpanId")) ]; Value -> - case binary:split(Value, <<"-">>, [global]) of + case string:split(Value, "-", all) of [TraceIdStr, SpanIdStr, _SampledStr, ParentSpanIdStr] -> [ parse_trace_id(TraceIdStr), -- cgit v1.2.1 From 8bcf5667cc30071b61dd140e413f59e421624789 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 4 Feb 2020 09:30:10 -0800 Subject: Add basic test case for b3 fix --- src/chttpd/test/exunit/test_helper.exs | 2 + src/chttpd/test/exunit/tracing_test.exs | 101 ++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 src/chttpd/test/exunit/test_helper.exs create mode 100644 src/chttpd/test/exunit/tracing_test.exs diff --git a/src/chttpd/test/exunit/test_helper.exs b/src/chttpd/test/exunit/test_helper.exs new file mode 100644 index 000000000..314050085 --- /dev/null +++ b/src/chttpd/test/exunit/test_helper.exs @@ -0,0 +1,2 @@ +ExUnit.configure(formatters: [JUnitFormatter, ExUnit.CLIFormatter]) +ExUnit.start() diff --git a/src/chttpd/test/exunit/tracing_test.exs b/src/chttpd/test/exunit/tracing_test.exs new file mode 100644 index 000000000..b50ef936e --- /dev/null +++ b/src/chttpd/test/exunit/tracing_test.exs @@ -0,0 +1,101 @@ +defmodule Couch.Test.OpenTracing do + use Couch.Test.ExUnit.Case + alias Couch.Test.Setup + alias Couch.Test.Setup.Step + alias Couch.Test.Utils + import Couch.DBTest, only: [retry_until: 1] + + defp create_admin(user_name, password) do + hashed = String.to_charlist(:couch_passwords.hash_admin_password(password)) + :config.set('admins', String.to_charlist(user_name), hashed, false) + end + + defp base_url() do + addr = :config.get('chttpd', 'bind_address', '127.0.0.1') + port = :mochiweb_socket_server.get(:chttpd, :port) + "http://#{addr}:#{port}" + end + + setup_all context do + test_ctx = :test_util.start_couch([:chttpd]) + :ok = create_admin("adm", "pass") + + Map.merge(context, %{ + base_url: base_url(), + user: "adm", + pass: "pass" + }) + end + + setup context do + db_name = Utils.random_name("db") + session = Couch.login(context.base_url, context.user, context.pass) + + on_exit(fn -> + delete_db(session, db_name) + end) + + create_db(session, db_name) + + Map.merge(context, %{ + db_name: db_name, + session: session + }) + end + + def create_db(session, db_name, opts \\ []) do + retry_until(fn -> + resp = Couch.Session.put(session, "/#{db_name}", opts) + assert resp.status_code in [201, 202] + assert resp.body == %{"ok" => true} + {:ok, resp} + end) + end + + def delete_db(session, db_name) do + retry_until(fn -> + resp = Couch.Session.delete(session, "/#{db_name}") + assert resp.status_code in [200, 202, 404] + {:ok, resp} + end) + end + + def create_doc(session, db_name, body) do + retry_until(fn -> + resp = Couch.Session.post(session, "/#{db_name}", body: body) + assert resp.status_code in [201, 202] + assert resp.body["ok"] + {:ok, resp} + end) + end + + defp trace_id() do + :couch_util.to_hex(:crypto.strong_rand_bytes(16)) + end + + defp span_id() do + :couch_util.to_hex(:crypto.strong_rand_bytes(8)) + end + + describe "Open Tracing" do + test "should return success with combined b3 header", ctx do + %{session: session, db_name: db_name} = ctx + doc = '{"mr": "rockoartischocko"}' + {:ok, _} = create_doc(session, db_name, doc) + + resp = + retry_until(fn -> + b3 = "#{trace_id()}-#{span_id()}-#{span_id()}" + + response = + Couch.Session.get(session, "/#{db_name}/_all_docs", headers: [b3: b3]) + + assert %HTTPotion.Response{} = response + response + end) + + assert resp.status_code == 200, "Expected 200, got: #{resp.status_code}" + assert length(resp.body["rows"]) == 1 + end + end +end -- cgit v1.2.1 From 712fe04d97ccdfd86cafbbe2379fbca2dc616c57 Mon Sep 17 00:00:00 2001 From: garren smith Date: Thu, 13 Feb 2020 12:13:42 +0200 Subject: Encode startkey/endkey for all_docs (#2538) * Encode startkey/endkey for all_docs Encodes the startkey/endkey so that if a startkey is not binary it will return the expected results. --- src/chttpd/src/chttpd_db.erl | 10 +++---- src/fabric/src/fabric2_util.erl | 13 +++++++++- test/elixir/test/all_docs_test.exs | 53 +++++++++++++++++++++++++++++++------- 3 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index dbd52be40..3951fdb33 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -945,13 +945,13 @@ all_docs_view_opts(Args) -> EKey -> EKey end, StartKeyOpts = case StartKey of - <<_/binary>> -> [{start_key, StartKey}]; - undefined -> [] + undefined -> []; + _ -> [{start_key, fabric2_util:encode_all_doc_key(StartKey)}] end, EndKeyOpts = case {EndKey, Args#mrargs.inclusive_end} of - {<<_/binary>>, false} -> [{end_key_gt, EndKey}]; - {<<_/binary>>, true} -> [{end_key, EndKey}]; - {undefined, _} -> [] + {undefined, _} -> []; + {_, false} -> [{end_key_gt, fabric2_util:encode_all_doc_key(EndKey)}]; + {_, true} -> [{end_key, fabric2_util:encode_all_doc_key(EndKey)}] end, [ {dir, Args#mrargs.direction}, diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 4e2e2d76b..2a940659e 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -33,7 +33,9 @@ get_value/3, to_hex/1, from_hex/1, - uuid/0 + uuid/0, + + encode_all_doc_key/1 ]). @@ -235,3 +237,12 @@ hex_to_nibble(N) -> uuid() -> to_hex(crypto:strong_rand_bytes(16)). + + +encode_all_doc_key(null) -> <<>>; +encode_all_doc_key(true) -> <<>>; +encode_all_doc_key(false) -> <<>>; +encode_all_doc_key(N) when is_number(N) -> <<>>; +encode_all_doc_key(B) when is_binary(B) -> B; +encode_all_doc_key(L) when is_list(L) -> <<255>>; +encode_all_doc_key({O}) when is_list(O) -> <<255>>. diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index acf4f390e..9501b3bec 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -41,11 +41,9 @@ defmodule AllDocsTest do assert resp["total_rows"] == length(rows) # Check _all_docs offset - retry_until(fn -> - resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => "\"2\""}).body - assert resp["offset"] == :null - assert Enum.at(resp["rows"], 0)["key"] == "2" - end) + resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => "\"2\""}).body + assert resp["offset"] == :null + assert Enum.at(resp["rows"], 0)["key"] == "2" # Confirm that queries may assume raw collation resp = @@ -73,11 +71,9 @@ defmodule AllDocsTest do changes = Couch.get("/#{db_name}/_changes").body["results"] assert length(changes) == 4 - retry_until(fn -> - deleted = Enum.filter(changes, fn row -> row["deleted"] end) - assert length(deleted) == 1 - assert hd(deleted)["id"] == "1" - end) + deleted = Enum.filter(changes, fn row -> row["deleted"] end) + assert length(deleted) == 1 + assert hd(deleted)["id"] == "1" # (remember old seq) orig_doc = Enum.find(changes, fn row -> row["id"] == "3" end) @@ -297,4 +293,41 @@ defmodule AllDocsTest do assert resp.status_code == 200 assert length(Map.get(resp, :body)["rows"]) == 1 end + + @tag :with_db + test "all_docs ordering", context do + db_name = context[:db_name] + + docs = [ + %{:_id => "a"}, + %{:_id => "m"}, + %{:_id => "z"} + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}) + Enum.each(resp.body, &assert(&1["ok"])) + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => false}).body + rows = resp["rows"] + assert length(rows) === 3 + assert get_ids(resp) == ["a", "m", "z"] + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => 0}).body + rows = resp["rows"] + assert length(rows) === 3 + assert get_ids(resp) == ["a", "m", "z"] + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => "[1,2]"}).body + rows = resp["rows"] + assert length(rows) === 0 + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:end_key => 0}).body + rows = resp["rows"] + assert length(rows) === 0 + end + + defp get_ids(resp) do + %{"rows" => rows} = resp + Enum.map(rows, fn row -> row["id"] end) + end end -- cgit v1.2.1 From e7ed6e951364314345c76049654ea7bad7638f29 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 10 Feb 2020 12:42:22 -0600 Subject: Fix doc attachment tests Attachment names should be binaries --- src/fabric/test/fabric2_doc_att_tests.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl index 331e1a4e8..ac531e913 100644 --- a/src/fabric/test/fabric2_doc_att_tests.erl +++ b/src/fabric/test/fabric2_doc_att_tests.erl @@ -175,9 +175,9 @@ large_att({Db, _}) -> AttData = iolist_to_binary([ <<"foobar">> || _ <- lists:seq(1, 60000) ]), - Att1 = mk_att("long.txt", AttData), + Att1 = mk_att(<<"long.txt">>, AttData), {ok, _} = create_doc(Db, DocId, [Att1]), - ?assertEqual(#{"long.txt" => AttData}, read_atts(Db, DocId)), + ?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)), {ok, Doc} = fabric2_db:open_doc(Db, DocId), #doc{atts = [Att2]} = Doc, -- cgit v1.2.1 From 1511b6d84ef8065ef4bbc64824a3f95ef067dff5 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 3 Dec 2019 10:24:36 -0600 Subject: Track a database level view size rollup This way we can expose the total view size for a database in the dbinfo JSON blob. --- src/couch_views/src/couch_views_fdb.erl | 17 ++++++++++++++-- src/fabric/src/fabric2_fdb.erl | 36 ++++++++++++++++----------------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 98cff46b2..5edaa3a5f 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -272,8 +272,16 @@ update_kv_size(TxDb, Sig, ViewId, Increment) -> tx := Tx, db_prefix := DbPrefix } = TxDb, - Key = kv_size_key(DbPrefix, Sig, ViewId), - erlfdb:add(Tx, Key, Increment). + + % Track a view specific size for calls to + % GET /dbname/_design/doc/_info` + IdxKey = kv_size_key(DbPrefix, Sig, ViewId), + erlfdb:add(Tx, IdxKey, Increment), + + % Track a database level rollup for calls to + % GET /dbname + DbKey = db_kv_size_key(DbPrefix), + erlfdb:add(Tx, DbKey, Increment). seq_key(DbPrefix, Sig) -> @@ -291,6 +299,11 @@ kv_size_key(DbPrefix, Sig, ViewId) -> erlfdb_tuple:pack(Key, DbPrefix). +db_kv_size_key(DbPrefix) -> + Key = {?DB_STATS, <<"sizes">>, <<"views">>}, + erlfdb_tuple:pack(Key, DbPrefix). + + id_idx_key(DbPrefix, Sig, DocId, ViewId) -> Key = {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId}, erlfdb_tuple:pack(Key, DbPrefix). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 6abe1f6de..8bfbb749a 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -174,11 +174,16 @@ create(#{} = Db0, Options) -> {?DB_STATS, <<"doc_del_count">>, ?uint2bin(0)}, {?DB_STATS, <<"doc_design_count">>, ?uint2bin(0)}, {?DB_STATS, <<"doc_local_count">>, ?uint2bin(0)}, - {?DB_STATS, <<"size">>, ?uint2bin(2)} + {?DB_STATS, <<"sizes">>, <<"external">>, ?uint2bin(2)}, + {?DB_STATS, <<"sizes">>, <<"views">>, ?uint2bin(0)} ], - lists:foreach(fun({P, K, V}) -> - Key = erlfdb_tuple:pack({P, K}, DbPrefix), - erlfdb:set(Tx, Key, V) + lists:foreach(fun + ({P, K, V}) -> + Key = erlfdb_tuple:pack({P, K}, DbPrefix), + erlfdb:set(Tx, Key, V); + ({P, S, K, V}) -> + Key = erlfdb_tuple:pack({P, S, K}, DbPrefix), + erlfdb:set(Tx, Key, V) end, Defaults), UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), @@ -348,26 +353,21 @@ get_info(#{} = Db) -> end, CProp = {update_seq, RawSeq}, - MProps = lists:flatmap(fun({K, V}) -> + MProps = lists:foldl(fun({K, V}, Acc) -> case erlfdb_tuple:unpack(K, DbPrefix) of {?DB_STATS, <<"doc_count">>} -> - [{doc_count, ?bin2uint(V)}]; + [{doc_count, ?bin2uint(V)} | Acc]; {?DB_STATS, <<"doc_del_count">>} -> - [{doc_del_count, ?bin2uint(V)}]; - {?DB_STATS, <<"size">>} -> + [{doc_del_count, ?bin2uint(V)} | Acc]; + {?DB_STATS, <<"sizes">>, Name} -> Val = ?bin2uint(V), - [ - {other, {[{data_size, Val}]}}, - {sizes, {[ - {active, 0}, - {external, Val}, - {file, 0} - ]}} - ]; + {_, {Sizes}} = lists:keyfind(sizes, 1, Acc), + NewSizes = lists:keystore(Name, 1, Sizes, {Name, Val}), + lists:keystore(sizes, 1, Acc, {sizes, {NewSizes}}); {?DB_STATS, _} -> - [] + Acc end - end, erlfdb:wait(MetaFuture)), + end, [{sizes, {[]}}], erlfdb:wait(MetaFuture)), [CProp | MProps]. -- cgit v1.2.1 From 1b79e11f15513c33ae4df858d0f471976e9c1b9b Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 3 Dec 2019 12:44:34 -0600 Subject: Implement async API for `fabric2_fdb:get_info/1` --- src/fabric/src/fabric2_fdb.erl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 8bfbb749a..0e7cba859 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -29,6 +29,8 @@ list_dbs/4, get_info/1, + get_info_future/2, + get_info_wait/1, set_config/3, get_stat/2, @@ -333,7 +335,10 @@ get_info(#{} = Db) -> tx := Tx, db_prefix := DbPrefix } = ensure_current(Db), + get_info_wait(get_info_future(Tx, DbPrefix)). + +get_info_future(Tx, DbPrefix) -> {CStart, CEnd} = erlfdb_tuple:range({?DB_CHANGES}, DbPrefix), ChangesFuture = erlfdb:get_range(Tx, CStart, CEnd, [ {streaming_mode, exact}, @@ -344,6 +349,10 @@ get_info(#{} = Db) -> StatsPrefix = erlfdb_tuple:pack({?DB_STATS}, DbPrefix), MetaFuture = erlfdb:get_range_startswith(Tx, StatsPrefix), + {DbPrefix, ChangesFuture, MetaFuture}. + + +get_info_wait({DbPrefix, ChangesFuture, MetaFuture}) -> RawSeq = case erlfdb:wait(ChangesFuture) of [] -> vs_to_seq(fabric2_util:seq_zero_vs()); -- cgit v1.2.1 From bd69a01dbd903597b241d8f23e6fd7296d5d4ec6 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 3 Dec 2019 12:45:36 -0600 Subject: Implement `fabric2_db:list_dbs_info/1,2,3` This API allows for listing all database info blobs in a single request. It accepts the same parameters as `_all_dbs` for controlling pagination of results and so on. --- src/fabric/src/fabric2_db.erl | 100 +++++++++++++++++++++++++----- src/fabric/src/fabric2_fdb.erl | 11 ++++ src/fabric/test/fabric2_db_crud_tests.erl | 31 ++++++++- 3 files changed, 126 insertions(+), 16 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 6d015df0e..17c899d27 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -22,6 +22,10 @@ list_dbs/1, list_dbs/3, + list_dbs_info/0, + list_dbs_info/1, + list_dbs_info/3, + check_is_admin/1, check_is_member/1, @@ -238,6 +242,46 @@ list_dbs(UserFun, UserAcc0, Options) -> end). +list_dbs_info() -> + list_dbs_info([]). + + +list_dbs_info(Options) -> + Callback = fun(Value, Acc) -> + NewAcc = case Value of + {meta, _} -> Acc; + {row, DbInfo} -> [DbInfo | Acc]; + complete -> Acc + end, + {ok, NewAcc} + end, + {ok, DbInfos} = list_dbs_info(Callback, [], Options), + {ok, lists:reverse(DbInfos)}. + + +list_dbs_info(UserFun, UserAcc0, Options) -> + FoldFun = fun(DbName, InfoFuture, {FutureQ, Count, Acc}) -> + NewFutureQ = queue:in({DbName, InfoFuture}, FutureQ), + drain_info_futures(NewFutureQ, Count + 1, UserFun, Acc) + end, + fabric2_fdb:transactional(fun(Tx) -> + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + InitAcc = {queue:new(), 0, UserAcc1}, + {FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_dbs_info( + Tx, + FoldFun, + InitAcc, + Options + ), + UserAcc3 = drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2), + {ok, maybe_stop(UserFun(complete, UserAcc3))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + is_admin(Db, {SecProps}) when is_list(SecProps) -> case fabric2_db_plugin:check_is_admin(Db) of true -> @@ -313,21 +357,7 @@ get_db_info(#{} = Db) -> DbProps = fabric2_fdb:transactional(Db, fun(TxDb) -> fabric2_fdb:get_info(TxDb) end), - - BaseProps = [ - {cluster, {[{n, 0}, {q, 0}, {r, 0}, {w, 0}]}}, - {compact_running, false}, - {data_size, 0}, - {db_name, name(Db)}, - {disk_format_version, 0}, - {disk_size, 0}, - {instance_start_time, <<"0">>}, - {purge_seq, 0} - ], - - {ok, lists:foldl(fun({Key, Val}, Acc) -> - lists:keystore(Key, 1, Acc, {Key, Val}) - end, BaseProps, DbProps)}. + {ok, make_db_info(name(Db), DbProps)}. get_del_doc_count(#{} = Db) -> @@ -944,6 +974,46 @@ maybe_add_sys_db_callbacks(Db) -> }. +make_db_info(DbName, Props) -> + BaseProps = [ + {cluster, {[{n, 0}, {q, 0}, {r, 0}, {w, 0}]}}, + {compact_running, false}, + {data_size, 0}, + {db_name, DbName}, + {disk_format_version, 0}, + {disk_size, 0}, + {instance_start_time, <<"0">>}, + {purge_seq, 0} + ], + + lists:foldl(fun({Key, Val}, Acc) -> + lists:keystore(Key, 1, Acc, {Key, Val}) + end, BaseProps, Props). + + +drain_info_futures(FutureQ, Count, _UserFun, Acc) when Count < 100 -> + {FutureQ, Count, Acc}; + +drain_info_futures(FutureQ, Count, UserFun, Acc) when Count >= 100 -> + {{value, {DbName, Future}}, RestQ} = queue:out(FutureQ), + InfoProps = fabric2_fdb:get_info_wait(Future), + DbInfo = make_db_info(DbName, InfoProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + {RestQ, Count - 1, NewAcc}. + + +drain_all_info_futures(FutureQ, UserFun, Acc) -> + case queue:out(FutureQ) of + {{value, {DbName, Future}}, RestQ} -> + InfoProps = fabric2_fdb:get_info_wait(Future), + DbInfo = make_db_info(DbName, InfoProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + drain_all_info_futures(RestQ, UserFun, NewAcc); + {empty, _} -> + Acc + end. + + new_revid(Db, Doc) -> #doc{ id = DocId, diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 0e7cba859..99611b0a1 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -27,6 +27,7 @@ get_dir/1, list_dbs/4, + list_dbs_info/4, get_info/1, get_info_future/2, @@ -330,6 +331,16 @@ list_dbs(Tx, Callback, AccIn, Options) -> end, AccIn, Options). +list_dbs_info(Tx, Callback, AccIn, Options) -> + LayerPrefix = get_dir(Tx), + Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), + fold_range({tx, Tx}, Prefix, fun({DbNameKey, DbPrefix}, Acc) -> + {DbName} = erlfdb_tuple:unpack(DbNameKey, Prefix), + InfoFuture = get_info_future(Tx, DbPrefix), + Callback(DbName, InfoFuture, Acc) + end, AccIn, Options). + + get_info(#{} = Db) -> #{ tx := Tx, diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index cc44f7d6b..80525513a 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -29,7 +29,8 @@ crud_test_() -> ?TDEF(create_db), ?TDEF(open_db), ?TDEF(delete_db), - ?TDEF(list_dbs) + ?TDEF(list_dbs), + ?TDEF(list_dbs_info) ]) } }. @@ -84,3 +85,31 @@ list_dbs(_) -> ?assertEqual(ok, fabric2_db:delete(DbName, [])), AllDbs3 = fabric2_db:list_dbs(), ?assert(not lists:member(DbName, AllDbs3)). + + +list_dbs_info(_) -> + DbName = ?tempdb(), + {ok, AllDbInfos1} = fabric2_db:list_dbs_info(), + + ?assert(is_list(AllDbInfos1)), + ?assert(not is_db_info_member(DbName, AllDbInfos1)), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + {ok, AllDbInfos2} = fabric2_db:list_dbs_info(), + ?assert(is_db_info_member(DbName, AllDbInfos2)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + {ok, AllDbInfos3} = fabric2_db:list_dbs_info(), + ?assert(not is_db_info_member(DbName, AllDbInfos3)). + + +is_db_info_member(_, []) -> + false; + +is_db_info_member(DbName, [DbInfo | RestInfos]) -> + case lists:keyfind(db_name, 1, DbInfo) of + {db_name, DbName} -> + true; + _E -> + is_db_info_member(DbName, RestInfos) + end. -- cgit v1.2.1 From 4248ef769384f0987d2f25de239efdfb98686284 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 3 Dec 2019 13:44:35 -0600 Subject: Support `GET /_dbs_info` endpoint Previously only `POST` with a list of keys was supported. The new `GET` support just dumps all database info blobs in a single ordered response. --- src/chttpd/src/chttpd_misc.erl | 49 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index f245875f2..ca1e58ad2 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -162,6 +162,37 @@ all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), {ok, Acc#vacc{resp=Resp1}}. +handle_dbs_info_req(#httpd{method = 'GET'} = Req) -> + ok = chttpd:verify_is_server_admin(Req), + + #mrargs{ + start_key = StartKey, + end_key = EndKey, + direction = Dir, + limit = Limit, + skip = Skip + } = couch_mrview_http:parse_params(Req, undefined), + + Options = [ + {start_key, StartKey}, + {end_key, EndKey}, + {dir, Dir}, + {limit, Limit}, + {skip, Skip} + ], + + % TODO: Figure out if we can't calculate a valid + % ETag for this request. \xFFmetadataVersion won't + % work as we don't bump versions on size changes + + {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, []), + Callback = fun dbs_info_callback/2, + Acc = #vacc{req = Req, resp = Resp}, + {ok, Resp} = fabric2_db:list_dbs_info(Callback, Acc, Options), + case is_record(Resp, vacc) of + true -> {ok, Resp#vacc.resp}; + _ -> {ok, Resp} + end; handle_dbs_info_req(#httpd{method='POST', user_ctx=UserCtx}=Req) -> chttpd:validate_ctype(Req, "application/json"), Props = chttpd:json_body_obj(Req), @@ -193,7 +224,23 @@ handle_dbs_info_req(#httpd{method='POST', user_ctx=UserCtx}=Req) -> send_chunk(Resp, "]"), chttpd:end_json_response(Resp); handle_dbs_info_req(Req) -> - send_method_not_allowed(Req, "POST"). + send_method_not_allowed(Req, "GET,HEAD,POST"). + +dbs_info_callback({meta, _Meta}, #vacc{resp = Resp0} = Acc) -> + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), + {ok, Acc#vacc{resp = Resp1}}; +dbs_info_callback({row, Props}, #vacc{resp = Resp0} = Acc) -> + Prepend = couch_mrview_http:prepend_val(Acc), + Chunk = [Prepend, ?JSON_ENCODE({Props})], + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, Chunk), + {ok, Acc#vacc{prepend = ",", resp = Resp1}}; +dbs_info_callback(complete, #vacc{resp = Resp0} = Acc) -> + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), + {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), + {ok, Acc#vacc{resp = Resp2}}; +dbs_info_callback({error, Reason}, #vacc{resp = Resp0} = Acc) -> + {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), + {ok, Acc#vacc{resp = Resp1}}. handle_task_status_req(#httpd{method='GET'}=Req) -> ok = chttpd:verify_is_server_admin(Req), -- cgit v1.2.1 From f297fe312b0b416ed875fbd4c62a97d801dd2613 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 31 Jan 2020 14:58:41 -0500 Subject: Implement mult-transactional iterators for _changes feeds Previously changes feeds would fail if they streamed data for more than five seconds. This was because of the FoundationDB's transaction time limit. After the timeout fired, an 1007 (transaction_too_long) error was raised, and transaction was retried. The emitted changes feed would often crash or simple hang because the HTTP state would be garbled as response data was re-sent over the same socket stream again. To fix the issue introduce a new `{restart_tx, true}` option for `fold_range/4`. This option sets up a new transaction to continue iterating over the range from where the last one left off. To avoid data being resent in the response stream, user callback functions must first read all the data they plan on sending during that callback, send it out, and then after that it must not do any more db reads so as not to trigger a `transaction_too_old` error. --- src/fabric/include/fabric2.hrl | 4 + src/fabric/src/fabric2_db.erl | 7 +- src/fabric/src/fabric2_fdb.erl | 174 +++++++++++++++--- src/fabric/test/fabric2_changes_fold_tests.erl | 241 +++++++++++++++++++++---- src/fabric/test/fabric2_test.hrl | 8 + 5 files changed, 364 insertions(+), 70 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 828a51b8f..d07a73793 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -57,6 +57,10 @@ -define(PDICT_TX_ID_KEY, '$fabric_tx_id'). -define(PDICT_TX_RES_KEY, '$fabric_tx_result'). -define(PDICT_ON_COMMIT_FUN, '$fabric_on_commit_fun'). +-define(PDICT_FOLD_ACC_STATE, '$fabric_fold_acc_state'). + +% Let's keep these in ascending order +-define(TRANSACTION_TOO_OLD, 1007). -define(COMMIT_UNKNOWN_RESULT, 1021). diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 17c899d27..3349722ad 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -872,6 +872,11 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> _ -> fwd end, + RestartTx = case fabric2_util:get_value(restart_tx, Options) of + undefined -> [{restart_tx, true}]; + _AlreadySet -> [] + end, + StartKey = get_since_seq(TxDb, Dir, SinceSeq), EndKey = case Dir of rev -> fabric2_util:seq_zero_vs(); @@ -880,7 +885,7 @@ fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> FoldOpts = [ {start_key, StartKey}, {end_key, EndKey} - ] ++ Options, + ] ++ RestartTx ++ Options, {ok, fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> {SeqVS} = erlfdb_tuple:unpack(K, Prefix), diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 99611b0a1..00bb4855a 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -72,6 +72,23 @@ -include("fabric2.hrl"). +-define(MAX_FOLD_RANGE_RETRIES, 3). + + +-record(fold_acc, { + db, + restart_tx, + start_key, + end_key, + limit, + skip, + retries, + base_opts, + user_fun, + user_acc +}). + + transactional(Fun) -> do_transaction(Fun, undefined). @@ -835,25 +852,49 @@ get_last_change(#{} = Db) -> end. -fold_range(#{} = Db, RangePrefix, Callback, Acc, Options) -> - #{ - tx := Tx - } = ensure_current(Db), - fold_range({tx, Tx}, RangePrefix, Callback, Acc, Options); - -fold_range({tx, Tx}, RangePrefix, UserCallback, UserAcc, Options) -> - case fabric2_util:get_value(limit, Options) of - 0 -> - % FoundationDB treats a limit of 0 as unlimited - % so we have to guard for that here. - UserAcc; - _ -> - {Start, End, Skip, FoldOpts} = get_fold_opts(RangePrefix, Options), - Callback = fun fold_range_cb/2, - Acc = {skip, Skip, UserCallback, UserAcc}, - {skip, _, UserCallback, OutAcc} = - erlfdb:fold_range(Tx, Start, End, Callback, Acc, FoldOpts), - OutAcc +fold_range(TxOrDb, RangePrefix, UserFun, UserAcc, Options) -> + {Db, Tx} = case TxOrDb of + {tx, TxObj} -> + {undefined, TxObj}; + #{} = DbObj -> + DbObj1 = #{tx := TxObj} = ensure_current(DbObj), + {DbObj1, TxObj} + end, + % FoundationDB treats a limit 0 of as unlimited so we guard against it + case fabric2_util:get_value(limit, Options) of 0 -> UserAcc; _ -> + FAcc = get_fold_acc(Db, RangePrefix, UserFun, UserAcc, Options), + try + fold_range(Tx, FAcc) + after + erase(?PDICT_FOLD_ACC_STATE) + end + end. + + +fold_range(Tx, FAcc) -> + #fold_acc{ + start_key = Start, + end_key = End, + limit = Limit, + base_opts = BaseOpts, + restart_tx = DoRestart + } = FAcc, + case DoRestart of false -> ok; true -> + ok = erlfdb:set_option(Tx, disallow_writes) + end, + Opts = [{limit, Limit} | BaseOpts], + Callback = fun fold_range_cb/2, + try + #fold_acc{ + user_acc = FinalUserAcc + } = erlfdb:fold_range(Tx, Start, End, Callback, FAcc, Opts), + FinalUserAcc + catch error:{erlfdb_error, ?TRANSACTION_TOO_OLD} when DoRestart -> + % Possibly handle cluster_version_changed and future_version as well to + % continue iteration instead fallback to transactional and retrying + % from the beginning which is bound to fail when streaming data out to a + % socket. + fold_range(Tx, restart_fold(Tx, FAcc)) end. @@ -1297,7 +1338,9 @@ chunkify_binary(Data) -> end. -get_fold_opts(RangePrefix, Options) -> +get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) + when is_map(Db) orelse Db =:= undefined -> + Reverse = case fabric2_util:get_value(dir, Options) of rev -> true; _ -> false @@ -1362,8 +1405,8 @@ get_fold_opts(RangePrefix, Options) -> end, Limit = case fabric2_util:get_value(limit, Options) of - L when is_integer(L), L >= 0 -> [{limit, L + Skip}]; - undefined -> [] + L when is_integer(L), L >= 0 -> L + Skip; + undefined -> 0 end, TargetBytes = case fabric2_util:get_value(target_bytes, Options) of @@ -1381,21 +1424,68 @@ get_fold_opts(RangePrefix, Options) -> B when is_boolean(B) -> [{snapshot, B}] end, - OutOpts = [{reverse, Reverse}] - ++ Limit + BaseOpts = [{reverse, Reverse}] ++ TargetBytes ++ StreamingMode ++ Snapshot, - {StartKey3, EndKey3, Skip, OutOpts}. + RestartTx = fabric2_util:get_value(restart_tx, Options, false), + + #fold_acc{ + db = Db, + start_key = StartKey3, + end_key = EndKey3, + skip = Skip, + limit = Limit, + retries = 0, + base_opts = BaseOpts, + restart_tx = RestartTx, + user_fun = UserCallback, + user_acc = UserAcc + }. + +fold_range_cb({K, V}, #fold_acc{} = Acc) -> + #fold_acc{ + skip = Skip, + limit = Limit, + user_fun = UserFun, + user_acc = UserAcc, + base_opts = Opts + } = Acc, + Acc1 = case Skip =:= 0 of + true -> + UserAcc1 = UserFun({K, V}, UserAcc), + Acc#fold_acc{limit = max(0, Limit - 1), user_acc = UserAcc1}; + false -> + Acc#fold_acc{skip = Skip - 1, limit = Limit - 1} + end, + Acc2 = case fabric2_util:get_value(reverse, Opts, false) of + true -> Acc1#fold_acc{end_key = erlfdb_key:last_less_or_equal(K)}; + false -> Acc1#fold_acc{start_key = erlfdb_key:first_greater_than(K)} + end, + put(?PDICT_FOLD_ACC_STATE, Acc2), + Acc2. -fold_range_cb(KV, {skip, 0, Callback, Acc}) -> - NewAcc = Callback(KV, Acc), - {skip, 0, Callback, NewAcc}; -fold_range_cb(_KV, {skip, N, Callback, Acc}) when is_integer(N), N > 0 -> - {skip, N - 1, Callback, Acc}. +restart_fold(Tx, #fold_acc{} = Acc) -> + erase(?PDICT_CHECKED_MD_IS_CURRENT), + % Not actually committing anyting so we skip on-commit handlers here. Those + % are usually to refresh db handles in the cache. If the iterator runs for + % a while it might be inserting a stale handle in there anyway. + erase({?PDICT_ON_COMMIT_FUN, Tx}), + + ok = erlfdb:reset(Tx), + + case {erase(?PDICT_FOLD_ACC_STATE), Acc#fold_acc.retries} of + {#fold_acc{db = Db} = Acc1, _} -> + Acc1#fold_acc{db = check_db_instance(Db), retries = 0}; + {undefined, Retries} when Retries < ?MAX_FOLD_RANGE_RETRIES -> + Db = check_db_instance(Acc#fold_acc.db), + Acc#fold_acc{db = Db, retries = Retries + 1}; + {undefined, _} -> + error(fold_range_not_progressing) + end. get_db_handle() -> @@ -1435,6 +1525,30 @@ ensure_current(#{} = Db0, CheckDbVersion) -> end. +check_db_instance(undefined) -> + undefined; + +check_db_instance(#{} = Db) -> + require_transaction(Db), + case check_metadata_version(Db) of + {current, Db1} -> + Db1; + {stale, Db1} -> + #{ + tx := Tx, + uuid := UUID, + name := DbName, + layer_prefix := LayerPrefix + } = Db1, + DbPrefix = erlfdb_tuple:pack({?DBS, DbName}, LayerPrefix), + UUIDKey = erlfdb_tuple:pack({?DB_CONFIG, <<"uuid">>}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, UUIDKey)) of + UUID -> Db1; + _ -> error(database_does_not_exist) + end + end. + + is_transaction_applied(Tx) -> is_commit_unknown_result() andalso has_transaction_id() diff --git a/src/fabric/test/fabric2_changes_fold_tests.erl b/src/fabric/test/fabric2_changes_fold_tests.erl index 8a29bcb00..fddf1802b 100644 --- a/src/fabric/test/fabric2_changes_fold_tests.erl +++ b/src/fabric/test/fabric2_changes_fold_tests.erl @@ -21,28 +21,55 @@ -define(DOC_COUNT, 25). +-define(PDICT_ERROR_IN_FOLD_RANGE, '$fabric2_error_in_fold_range'). +-define(PDICT_ERROR_IN_USER_FUN, '$fabric2_error_throw_in_user_fun'). + changes_fold_test_() -> { "Test changes fold operations", { setup, - fun setup/0, - fun cleanup/1, - with([ - ?TDEF(fold_changes_basic), - ?TDEF(fold_changes_since_now), - ?TDEF(fold_changes_since_seq), - ?TDEF(fold_changes_basic_rev), - ?TDEF(fold_changes_since_now_rev), - ?TDEF(fold_changes_since_seq_rev) - ]) + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(fold_changes_basic), + ?TDEF_FE(fold_changes_since_now), + ?TDEF_FE(fold_changes_since_seq), + ?TDEF_FE(fold_changes_basic_rev), + ?TDEF_FE(fold_changes_since_now_rev), + ?TDEF_FE(fold_changes_since_seq_rev), + ?TDEF_FE(fold_changes_basic_tx_too_long), + ?TDEF_FE(fold_changes_reverse_tx_too_long), + ?TDEF_FE(fold_changes_tx_too_long_with_single_row_emits), + ?TDEF_FE(fold_changes_since_seq_tx_too_long), + ?TDEF_FE(fold_changes_not_progressing) + ] + } } }. -setup() -> +setup_all() -> Ctx = test_util:start_couch([fabric]), + meck:new(erlfdb, [passthrough]), + Ctx. + + +teardown_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +setup() -> + meck:expect(erlfdb, fold_range, fun(Tx, Start, End, Callback, Acc, Opts) -> + maybe_tx_too_long(?PDICT_ERROR_IN_FOLD_RANGE), + meck:passthrough([Tx, Start, End, Callback, Acc, Opts]) + end), {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), Rows = lists:map(fun(Val) -> DocId = fabric2_util:uuid(), @@ -59,57 +86,193 @@ setup() -> rev_id => RevId } end, lists:seq(1, ?DOC_COUNT)), - {Db, Rows, Ctx}. + {Db, Rows}. -cleanup({Db, _DocIdRevs, Ctx}) -> - ok = fabric2_db:delete(fabric2_db:name(Db), []), - test_util:stop_couch(Ctx). +cleanup({Db, _DocIdRevs}) -> + reset_error_counts(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). -fold_changes_basic({Db, DocRows, _}) -> - {ok, Rows} = fabric2_db:fold_changes(Db, 0, fun fold_fun/2, []), - ?assertEqual(lists:reverse(DocRows), Rows). +fold_changes_basic({Db, DocRows}) -> + ?assertEqual(lists:reverse(DocRows), changes(Db)). -fold_changes_since_now({Db, _, _}) -> - {ok, Rows} = fabric2_db:fold_changes(Db, now, fun fold_fun/2, []), - ?assertEqual([], Rows). +fold_changes_since_now({Db, _}) -> + ?assertEqual([], changes(Db, now, [])). -fold_changes_since_seq({_, [], _}) -> +fold_changes_since_seq({_, []}) -> ok; -fold_changes_since_seq({Db, [Row | RestRows], _}) -> +fold_changes_since_seq({Db, [Row | RestRows]}) -> #{sequence := Since} = Row, - {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, []), - ?assertEqual(lists:reverse(RestRows), Rows), - fold_changes_since_seq({Db, RestRows, nil}). + ?assertEqual(lists:reverse(RestRows), changes(Db, Since, [])), + fold_changes_since_seq({Db, RestRows}). -fold_changes_basic_rev({Db, _, _}) -> - Opts = [{dir, rev}], - {ok, Rows} = fabric2_db:fold_changes(Db, 0, fun fold_fun/2, [], Opts), - ?assertEqual([], Rows). +fold_changes_basic_rev({Db, _}) -> + ?assertEqual([], changes(Db, 0, [{dir, rev}])). -fold_changes_since_now_rev({Db, DocRows, _}) -> - Opts = [{dir, rev}], - {ok, Rows} = fabric2_db:fold_changes(Db, now, fun fold_fun/2, [], Opts), - ?assertEqual(DocRows, Rows). +fold_changes_since_now_rev({Db, DocRows}) -> + ?assertEqual(DocRows, changes(Db, now, [{dir, rev}])). -fold_changes_since_seq_rev({_, [], _}) -> +fold_changes_since_seq_rev({_, []}) -> ok; -fold_changes_since_seq_rev({Db, DocRows, _}) -> +fold_changes_since_seq_rev({Db, DocRows}) -> #{sequence := Since} = lists:last(DocRows), Opts = [{dir, rev}], - {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, [], Opts), - ?assertEqual(DocRows, Rows), + ?assertEqual(DocRows, changes(Db, Since, Opts)), RestRows = lists:sublist(DocRows, length(DocRows) - 1), - fold_changes_since_seq_rev({Db, RestRows, nil}). + fold_changes_since_seq_rev({Db, RestRows}). + + +fold_changes_basic_tx_too_long({Db, DocRows0}) -> + DocRows = lists:reverse(DocRows0), + + tx_too_long_errors(0, 1), + ?assertEqual(DocRows, changes(Db)), + + tx_too_long_errors(1, 0), + ?assertEqual(DocRows, changes(Db)), + + % Blow up in user fun but after emitting one row successfully. + tx_too_long_errors({1, 1}, 0), + ?assertEqual(DocRows, changes(Db)), + + % Blow up before last document + tx_too_long_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual(DocRows, changes(Db)), + + % Emit one value, then blow up in user function and then blow up twice in + % fold_range. But it is not enough to stop the iteration. + tx_too_long_errors({1, 1}, {1, 2}), + ?assertEqual(DocRows, changes(Db)). + + +fold_changes_reverse_tx_too_long({Db, DocRows}) -> + Opts = [{dir, rev}], + + tx_too_long_errors(0, 1), + ?assertEqual([], changes(Db, 0, Opts)), + + tx_too_long_errors(1, 0), + ?assertEqual([], changes(Db, 0, Opts)), + + tx_too_long_errors(1, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + tx_too_long_errors(1, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + % Blow up in user fun but after emitting one row successfully. + tx_too_long_errors({1, 1}, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + % Blow up before last document + tx_too_long_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + % Emit value, blow up in user function, and twice in fold_range + tx_too_long_errors({1, 1}, {1, 2}), + ?assertEqual(DocRows, changes(Db, now, Opts)). + + +fold_changes_tx_too_long_with_single_row_emits({Db, DocRows0}) -> + % This test does a few basic operations while forcing erlfdb range fold to + % emit a single row at a time, thus forcing it to use continuations while + % also inducing tx errors + Opts = [{target_bytes, 1}], + DocRows = lists:reverse(DocRows0), + + tx_too_long_errors(0, 1), + ?assertEqual(DocRows, changes(Db, 0, Opts)), + + tx_too_long_errors(1, 0), + ?assertEqual(DocRows, changes(Db, 0, Opts)), + + % Blow up in user fun but after emitting one row successfully. + tx_too_long_errors({1, 1}, 0), + ?assertEqual(DocRows, changes(Db, 0, Opts)), + + % Blow up before last document + tx_too_long_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual(DocRows, changes(Db, 0, Opts)). + + +fold_changes_since_seq_tx_too_long({Db, Rows}) -> + % Blow up after after a successful emit, then twice + % in range fold call. Also re-use already existing basic + % fold_changes_since_seq test function. + tx_too_long_errors({1, 1}, {1, 2}), + fold_changes_since_seq({Db, Rows}). + + +fold_changes_not_progressing({Db, _}) -> + % Fail in first fold range call. + tx_too_long_errors(5, 0), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Fail in first user fun call. + tx_too_long_errors(0, 5), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Blow up in last user fun call + tx_too_long_errors({?DOC_COUNT - 1, 5}, 0), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Blow up in user function after one success. + tx_too_long_errors({1, 5}, 0), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Emit value, blow up in user function, then keep blowing up in fold_range. + tx_too_long_errors({1, 1}, {1, 4}), + ?assertError(fold_range_not_progressing, changes(Db)). fold_fun(#{} = Change, Acc) -> + maybe_tx_too_long(?PDICT_ERROR_IN_USER_FUN), {ok, [Change | Acc]}. + + +tx_too_long_errors(UserFunCount, FoldErrors) when is_integer(UserFunCount) -> + tx_too_long_errors({0, UserFunCount}, FoldErrors); + +tx_too_long_errors(UserFunErrors, FoldCount) when is_integer(FoldCount) -> + tx_too_long_errors(UserFunErrors, {0, FoldCount}); + +tx_too_long_errors({UserFunSkip, UserFunCount}, {FoldSkip, FoldCount}) -> + reset_error_counts(), + put(?PDICT_ERROR_IN_USER_FUN, {UserFunSkip, UserFunCount}), + put(?PDICT_ERROR_IN_FOLD_RANGE, {FoldSkip, FoldCount}). + + +reset_error_counts() -> + erase(?PDICT_ERROR_IN_FOLD_RANGE), + erase(?PDICT_ERROR_IN_USER_FUN). + + +changes(Db) -> + changes(Db, 0, []). + + +changes(Db, Since, Opts) -> + {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, [], Opts), + Rows. + + +maybe_tx_too_long(Key) -> + case get(Key) of + {Skip, Count} when is_integer(Skip), Skip > 0 -> + put(Key, {Skip - 1, Count}); + {0, Count} when is_integer(Count), Count > 0 -> + put(Key, {0, Count - 1}), + error({erlfdb_error, 1007}); + {0, 0} -> + ok; + undefined -> + ok + end. diff --git a/src/fabric/test/fabric2_test.hrl b/src/fabric/test/fabric2_test.hrl index a0532b360..9239096fc 100644 --- a/src/fabric/test/fabric2_test.hrl +++ b/src/fabric/test/fabric2_test.hrl @@ -10,9 +10,17 @@ % License for the specific language governing permissions and limitations under % the License. + +% Some test modules do not use with, so squash the unused fun compiler warning +-compile([{nowarn_unused_function, [{with, 1}]}]). + + -define(TDEF(Name), {atom_to_list(Name), fun Name/1}). -define(TDEF(Name, Timeout), {atom_to_list(Name), Timeout, fun Name/1}). +-define(TDEF_FE(Name), fun(Arg) -> {atom_to_list(Name), ?_test(Name(Arg))} end). +-define(TDEF_FE(Name, Timeout), fun(Arg) -> {atom_to_list(Name), {timeout, Timeout, ?_test(Name(Arg))}} end). + with(Tests) -> fun(ArgsTuple) -> -- cgit v1.2.1 From cad91ad448fead74dd0f0aa83aed8515b865278f Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sat, 15 Feb 2020 12:27:25 -0500 Subject: Use {restart_tx, false} option in view index builder changes feed Index builder performs writes in the same transaction as the changes feed so we can't use iterators as they disable writes. --- src/couch_views/src/couch_views_indexer.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 75e4b368f..31cd8e6f1 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -211,7 +211,8 @@ fold_changes(State) -> } = State, Fun = fun process_changes/2, - fabric2_db:fold_changes(TxDb, SinceSeq, Fun, State, [{limit, Limit}]). + Opts = [{limit, Limit}, {restart_tx, false}], + fabric2_db:fold_changes(TxDb, SinceSeq, Fun, State, Opts). process_changes(Change, Acc) -> -- cgit v1.2.1 From f4a9e60eee99dd9d4f87153a32c88b78788b49c2 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Sat, 15 Feb 2020 12:15:54 -0600 Subject: Remove attachment headers field I accidentally ported part of the old couch_att test suite into an actual "feature" that's not actually accessible through any API. --- src/couch/src/couch_att.erl | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index 2c3336291..837170c99 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -88,8 +88,7 @@ md5 := binary() | undefined, revpos := non_neg_integer(), data := data_prop_type(), - encoding := identity | gzip | undefined, - headers := [{binary(), binary()}] | undefined + encoding := identity | gzip | undefined }. @@ -102,8 +101,7 @@ new() -> md5 => undefined, revpos => 0, data => undefined, - encoding => undefined, - headers => undefined + encoding => undefined }. @@ -203,8 +201,7 @@ to_disk_term(Att) -> fetch(disk_len, Att), fetch(revpos, Att), fetch(md5, Att), - fetch(encoding, Att), - fetch(headers, Att) + fetch(encoding, Att) }}. @@ -217,8 +214,7 @@ from_disk_term(#{} = Db, DocId, {?CURRENT_ATT_FORMAT, Props}) -> DiskLen, RevPos, Md5, - Encoding, - Headers + Encoding } = Props, new([ {name, Name}, @@ -228,8 +224,7 @@ from_disk_term(#{} = Db, DocId, {?CURRENT_ATT_FORMAT, Props}) -> {disk_len, DiskLen}, {revpos, RevPos}, {md5, Md5}, - {encoding, Encoding}, - {headers, Headers} + {encoding, Encoding} ]). @@ -329,8 +324,7 @@ to_json(Att, OutputData, DataToFollow, ShowEncoding) -> att_len := AttLen, revpos := RevPos, md5 := Md5, - encoding := Encoding, - headers := Headers + encoding := Encoding } = Att, Props = [ {<<"content_type">>, Type}, @@ -361,11 +355,7 @@ to_json(Att, OutputData, DataToFollow, ShowEncoding) -> true -> [] end, - HeadersProp = case Headers of - undefined -> []; - Headers -> [{<<"headers">>, Headers}] - end, - {Name, {Props ++ DigestProp ++ DataProps ++ EncodingProps ++ HeadersProp}}. + {Name, {Props ++ DigestProp ++ DataProps ++ EncodingProps}}. flush(Db, DocId, Att1) -> -- cgit v1.2.1 From 53518b69b348b3c02501043e4582cb537deab30d Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 4 Dec 2019 11:38:48 -0600 Subject: Track the size of data stored in a database This tracks the number of bytes that would be required to store the contents of a database as flat files on disk. Currently the following items are tracked: * Doc ids * Revisions * Doc body as JSON * Attachment names * Attachment type * Attachment length * Attachment md5s * Attachment headers * Local doc id * Local doc revision * Local doc bodies --- src/couch/src/couch_att.erl | 15 +++ src/fabric/include/fabric2.hrl | 7 +- src/fabric/src/fabric2_db.erl | 11 ++- src/fabric/src/fabric2_fdb.erl | 144 ++++++++++++++++++++++++----- src/fabric/src/fabric2_util.erl | 52 +++++++++++ src/fabric/test/fabric2_doc_crud_tests.erl | 5 +- 6 files changed, 205 insertions(+), 29 deletions(-) diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index 837170c99..d41ab5bf2 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -27,6 +27,7 @@ ]). -export([ + external_size/1, size_info/1, to_disk_term/1, from_disk_term/3 @@ -177,6 +178,20 @@ merge_stubs([], _, Merged) -> {ok, lists:reverse(Merged)}. +external_size(Att) -> + NameSize = size(fetch(name, Att)), + TypeSize = case fetch(type, Att) of + undefined -> 0; + Type -> size(Type) + end, + AttSize = fetch(att_len, Att), + Md5Size = case fetch(md5, Att) of + undefined -> 0; + Md5 -> size(Md5) + end, + NameSize + TypeSize + AttSize + Md5Size. + + size_info([]) -> {ok, []}; size_info(Atts) -> diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index d07a73793..b1bd30629 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -45,8 +45,13 @@ % 0 - Initial implementation % 1 - Added attachment hash +% 2 - Added size information --define(CURR_REV_FORMAT, 1). +-define(CURR_REV_FORMAT, 2). + +% 0 - Adding local doc versions + +-define(CURR_LDOC_FORMAT, 0). % Misc constants diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 3349722ad..b0f7849e2 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1422,12 +1422,14 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> NewRevInfo = #{ winner => undefined, + exists => false, deleted => NewDeleted, rev_id => {NewRevPos, NewRev}, rev_path => NewRevPath, sequence => undefined, branch_count => undefined, - att_hash => fabric2_util:hash_atts(Atts) + att_hash => fabric2_util:hash_atts(Atts), + rev_size => fabric2_util:rev_size(Doc4) }, % Gather the list of possible winnig revisions @@ -1478,12 +1480,14 @@ update_doc_replicated(Db, Doc0, _Options) -> DocRevInfo0 = #{ winner => undefined, + exists => false, deleted => Deleted, rev_id => {RevPos, Rev}, rev_path => RevPath, sequence => undefined, branch_count => undefined, - att_hash => <<>> + att_hash => <<>>, + rev_size => null }, AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId), @@ -1523,7 +1527,8 @@ update_doc_replicated(Db, Doc0, _Options) -> Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo), Doc3 = flush_doc_atts(Db, Doc2), DocRevInfo2 = DocRevInfo1#{ - atts_hash => fabric2_util:hash_atts(Doc3#doc.atts) + atts_hash => fabric2_util:hash_atts(Doc3#doc.atts), + rev_size => fabric2_util:rev_size(Doc3) }, % Possible winners are the previous winner and diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 00bb4855a..e51b8de5d 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -36,6 +36,7 @@ get_stat/2, incr_stat/3, + incr_stat/4, get_all_revs/2, get_winning_revs/3, @@ -471,6 +472,19 @@ incr_stat(#{} = Db, StatKey, Increment) when is_integer(Increment) -> erlfdb:add(Tx, Key, Increment). +incr_stat(_Db, _Section, _Key, 0) -> + ok; + +incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + BinKey = erlfdb_tuple:pack({?DB_STATS, Section, Key}, DbPrefix), + erlfdb:add(Tx, BinKey, Increment). + + get_all_revs(#{} = Db, DocId) -> #{ tx := Tx, @@ -590,6 +604,15 @@ get_local_doc(#{} = Db0, <> = DocId) -> get_local_doc_rev(_Db0, <> = DocId, Val) -> case Val of + <<255, RevBin/binary>> -> + % Versioned local docs + try + case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, Rev, _Size} -> Rev + end + catch _:_ -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end; <<131, _/binary>> -> % Compatibility clause for an older encoding format try binary_to_term(Val, [safe]) of @@ -656,7 +679,9 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> % Revision tree - NewWinner = NewWinner0#{winner := true}, + NewWinner = NewWinner0#{ + winner := true + }, NewRevId = maps:get(rev_id, NewWinner), {WKey, WVal, WinnerVS} = revinfo_to_fdb(Tx, DbPrefix, DocId, NewWinner), @@ -718,7 +743,7 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> NewSeqVal = erlfdb_tuple:pack({DocId, Deleted, NewRevId}), erlfdb:set_versionstamped_key(Tx, NewSeqKey, NewSeqVal), - % And all the rest... + % Bump db version on design doc changes IsDDoc = case Doc#doc.id of <> -> true; @@ -729,6 +754,8 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> bump_db_version(Db) end, + % Update our document counts + case UpdateStatus of created -> if not IsDDoc -> ok; true -> @@ -755,6 +782,11 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> ok end, + % Update database size + AddSize = sum_add_rev_sizes([NewWinner | ToUpdate]), + RemSize = sum_rem_rev_sizes(ToRemove), + incr_stat(Db, <<"sizes">>, <<"external">>, AddSize - RemSize), + ok. @@ -766,11 +798,18 @@ write_local_doc(#{} = Db0, Doc) -> Id = Doc#doc.id, - {LDocKey, LDocVal, Rows} = local_doc_to_fdb(Db, Doc), + {LDocKey, LDocVal, NewSize, Rows} = local_doc_to_fdb(Db, Doc), - WasDeleted = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of - <<_/binary>> -> false; - not_found -> true + {WasDeleted, PrevSize} = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of + <<255, RevBin/binary>> -> + case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, _Rev, Size} -> + {false, Size} + end; + <<_/binary>> -> + {false, 0}; + not_found -> + {true, 0} end, BPrefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id}, DbPrefix), @@ -796,6 +835,8 @@ write_local_doc(#{} = Db0, Doc) -> ok end, + incr_stat(Db, <<"sizes">>, <<"external">>, NewSize - PrevSize), + ok. @@ -1086,9 +1127,10 @@ write_doc_body(#{} = Db0, #doc{} = Doc) -> tx := Tx } = Db = ensure_current(Db0), + Rows = doc_to_fdb(Db, Doc), lists:foreach(fun({Key, Value}) -> ok = erlfdb:set(Tx, Key, Value) - end, doc_to_fdb(Db, Doc)). + end, Rows). clear_doc_body(_Db, _DocId, not_found) -> @@ -1164,7 +1206,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> rev_id := {RevPos, Rev}, rev_path := RevPath, branch_count := BranchCount, - att_hash := AttHash + att_hash := AttHash, + rev_size := RevSize } = RevId, VS = new_versionstamp(Tx), Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, @@ -1173,7 +1216,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> VS, BranchCount, list_to_tuple(RevPath), - AttHash + AttHash, + RevSize }, KBin = erlfdb_tuple:pack(Key, DbPrefix), VBin = erlfdb_tuple:pack_vs(Val), @@ -1184,39 +1228,44 @@ revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) -> deleted := Deleted, rev_id := {RevPos, Rev}, rev_path := RevPath, - att_hash := AttHash + att_hash := AttHash, + rev_size := RevSize } = RevId, Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, - Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash}, + Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash, RevSize}, KBin = erlfdb_tuple:pack(Key, DbPrefix), VBin = erlfdb_tuple:pack(Val), {KBin, VBin, undefined}. -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) -> +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, Sequence, BranchCount, RevPath, AttHash} = Val, + {_RevFormat, Sequence, BranchCount, RevPath, AttHash, RevSize} = Val, #{ winner => true, + exists => true, deleted => not NotDeleted, rev_id => {RevPos, Rev}, rev_path => tuple_to_list(RevPath), sequence => Sequence, branch_count => BranchCount, - att_hash => AttHash + att_hash => AttHash, + rev_size => RevSize }; -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val) -> +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, RevPath, AttHash} = Val, + {_RevFormat, RevPath, AttHash, RevSize} = Val, #{ winner => false, + exists => true, deleted => not NotDeleted, rev_id => {RevPos, Rev}, rev_path => tuple_to_list(RevPath), sequence => undefined, branch_count => undefined, - att_hash => AttHash + att_hash => AttHash, + rev_size => RevSize }; fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> @@ -1225,6 +1274,14 @@ fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> fdb_to_revinfo(Key, {0, RPath}) -> Val = {?CURR_REV_FORMAT, RPath, <<>>}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) -> + Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, AttHash, 0}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {1, RPath, AttHash}) -> + Val = {?CURR_REV_FORMAT, RPath, AttHash, 0}, fdb_to_revinfo(Key, Val). @@ -1244,11 +1301,13 @@ doc_to_fdb(Db, #doc{} = Doc) -> DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]), + Chunks = chunkify_binary(Value), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix), {{Key, Chunk}, ChunkId + 1} - end, 0, chunkify_binary(Value)), + end, 0, Chunks), + Rows. @@ -1299,8 +1358,17 @@ local_doc_to_fdb(Db, #doc{} = Doc) -> {{K, Chunk}, ChunkId + 1} end, 0, chunkify_binary(BVal)), - {Key, StoreRev, Rows}. + NewSize = fabric2_util:ldoc_size(Doc), + RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, NewSize}), + + % Prefix our tuple encoding to make upgrades easier + Value = <<255, RawValue/binary>>, + {Key, Value, NewSize, Rows}. + + +fdb_to_local_doc(_Db, _DocId, not_found, []) -> + {not_found, missing}; fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) -> % This is an upgrade clause for the old encoding. We allow reading the old @@ -1313,18 +1381,48 @@ fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) -> body = Body }; -fdb_to_local_doc(_Db, _DocId, not_found, []) -> - {not_found, missing}; +fdb_to_local_doc(_Db, DocId, <<255, RevBin/binary>>, Rows) when is_list(Rows) -> + Rev = case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, Rev0, _Size} -> Rev0 + end, -fdb_to_local_doc(_Db, DocId, Rev, Rows) when is_list(Rows), is_binary(Rev) -> BodyBin = iolist_to_binary(Rows), Body = binary_to_term(BodyBin, [safe]), + #doc{ id = DocId, revs = {0, [Rev]}, deleted = false, body = Body - }. + }; + +fdb_to_local_doc(Db, DocId, RawRev, Rows) -> + BaseRev = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, RawRev, 0}), + Rev = <<255, BaseRev/binary>>, + fdb_to_local_doc(Db, DocId, Rev, Rows). + + +sum_add_rev_sizes(RevInfos) -> + lists:foldl(fun(RI, Acc) -> + #{ + exists := Exists, + rev_size := Size + } = RI, + case Exists of + true -> Acc; + false -> Size + Acc + end + end, 0, RevInfos). + + +sum_rem_rev_sizes(RevInfos) -> + lists:foldl(fun(RI, Acc) -> + #{ + exists := true, + rev_size := Size + } = RI, + Size + Acc + end, 0, RevInfos). chunkify_binary(Data) -> diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 2a940659e..a4faf3987 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -17,6 +17,8 @@ revinfo_to_revs/1, revinfo_to_path/1, sort_revinfos/1, + rev_size/1, + ldoc_size/1, seq_zero_vs/0, seq_max_vs/0, @@ -80,6 +82,56 @@ rev_sort_key(#{} = RevInfo) -> {not Deleted, RevPos, Rev}. +rev_size(#doc{} = Doc) -> + #doc{ + id = Id, + revs = Revs, + body = Body, + atts = Atts + } = Doc, + + {Start, Rev} = case Revs of + {0, []} -> {0, <<>>}; + {N, [RevId | _]} -> {N, RevId} + end, + + lists:sum([ + size(Id), + size(erlfdb_tuple:pack({Start})), + size(Rev), + 1, % FDB tuple encoding of booleans for deleted flag is 1 byte + couch_ejson_size:encoded_size(Body), + lists:foldl(fun(Att, Acc) -> + couch_att:external_size(Att) + Acc + end, 0, Atts) + ]). + + +ldoc_size(#doc{id = <<"_local/", _/binary>>} = Doc) -> + #doc{ + id = Id, + revs = {0, [Rev]}, + deleted = Deleted, + body = Body + } = Doc, + + StoreRev = case Rev of + _ when is_integer(Rev) -> integer_to_binary(Rev); + _ when is_binary(Rev) -> Rev + end, + + case Deleted of + true -> + 0; + false -> + lists:sum([ + size(Id), + size(StoreRev), + couch_ejson_size:encoded_size(Body) + ]) + end. + + seq_zero_vs() -> {versionstamp, 0, 0, 0}. diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index 184eb4a66..46cd4fcfd 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -884,11 +884,12 @@ local_doc_with_previous_encoding({Db, _}) -> ?assertEqual(NewBody, Doc3#doc.body), % Old doc now has only the rev number in it - OldDocBin = fabric2_fdb:transactional(Db, fun(TxDb) -> + <<255, OldDocBin/binary>> = fabric2_fdb:transactional(Db, fun(TxDb) -> #{tx := Tx} = TxDb, erlfdb:wait(erlfdb:get(Tx, Key)) end), - ?assertEqual(<<"2">> , OldDocBin). + Unpacked = erlfdb_tuple:unpack(OldDocBin), + ?assertMatch({?CURR_LDOC_FORMAT, <<"2">>, _}, Unpacked). before_doc_update_skips_local_docs({Db0, _}) -> -- cgit v1.2.1 From 9cad194dc4d57484e407524583c056714ded2696 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 13 Feb 2020 15:27:20 -0600 Subject: Add tests for database size tracking --- src/fabric/test/fabric2_db_size_tests.erl | 918 +++++++++++++++++++++++++++++ src/fabric/test/fabric2_doc_size_tests.erl | 320 ++++++++++ 2 files changed, 1238 insertions(+) create mode 100644 src/fabric/test/fabric2_db_size_tests.erl create mode 100644 src/fabric/test/fabric2_doc_size_tests.erl diff --git a/src/fabric/test/fabric2_db_size_tests.erl b/src/fabric/test/fabric2_db_size_tests.erl new file mode 100644 index 000000000..0bb9c7a8e --- /dev/null +++ b/src/fabric/test/fabric2_db_size_tests.erl @@ -0,0 +1,918 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_size_tests). + +-export([ + random_body/0 +]). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +db_size_test_() -> + { + "Test database size calculations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(new_doc), + ?TDEF(replicate_new_doc), + ?TDEF(edit_doc), + ?TDEF(delete_doc), + ?TDEF(create_conflict), + ?TDEF(replicate_new_winner), + ?TDEF(replicate_deep_deleted), + ?TDEF(delete_winning_revision), + ?TDEF(delete_conflict_revision), + ?TDEF(replicate_existing_revision), + ?TDEF(replicate_shared_history), + ?TDEF(create_doc_with_attachment), + ?TDEF(add_attachment_in_update), + ?TDEF(add_second_attachment), + ?TDEF(delete_attachment), + ?TDEF(delete_one_attachment), + ?TDEF(delete_all_attachments), + ?TDEF(re_add_attachment), + ?TDEF(update_and_remove_attachment), + ?TDEF(replicate_new_doc_with_attachment), + ?TDEF(replicate_remove_attachment), + ?TDEF(replicate_stub_attachment), + ?TDEF(replicate_stub_and_new_attachment), + ?TDEF(replicate_new_att_to_winner), + ?TDEF(replicate_change_att_to_winner), + ?TDEF(replicate_rem_att_from_winner), + ?TDEF(replicate_stub_to_winner), + ?TDEF(replicate_new_att_to_conflict), + ?TDEF(replicate_change_att_to_conflict), + ?TDEF(replicate_rem_att_from_conflict), + ?TDEF(replicate_stub_to_conflict), + ?TDEF(create_local_doc), + ?TDEF(update_local_doc), + ?TDEF(delete_local_doc), + ?TDEF(recreate_local_doc) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +new_doc({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}} + ]). + + +replicate_new_doc({Db, _}) -> + check(Db, [ + {replicate, #{tgt => rev1}} + ]). + + +edit_doc({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}} + ]). + + +delete_doc({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {delete, #{src => rev1, tgt => rev2}} + ]). + + +create_conflict({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}} + ]). + + +replicate_new_winner({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2, depth => 3}} + ]). + + +replicate_deep_deleted({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1, depth => 2}}, + {replicate, #{tgt => rev2, depth => 5, deleted => true}} + ]). + + +delete_winning_revision({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {delete, #{src => {winner, [rev1, rev2]}, tgt => rev3}} + ]). + + +delete_conflict_revision({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {delete, #{src => {conflict, [rev1, rev2]}, tgt => rev3}} + ]). + + +replicate_existing_revision({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{src => rev1, tgt => rev2, depth => 0}} + ]). + + +replicate_shared_history({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1, depth => 5}}, + {update, #{src => rev1, tgt => rev2, depth => 5}}, + {replicate, #{ + src => rev1, + src_exists => false, + tgt => rev3, + depth => 5 + }} + ]). + + +create_doc_with_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}} + ]). + + +add_attachment_in_update({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2, atts => [att1]}} + ]). + + +add_second_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1]}}, + {update, #{src => rev1, tgt => rev2, atts => [att1, att2]}} + ]). + + +delete_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {update, #{src => rev1, tgt => rev2}} + ]). + + +delete_one_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {mk_att, #{tgt => att3, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1, att2]}}, + {update, #{src => rev1, tgt => rev2, atts => [att3]}} + ]). + + +delete_all_attachments({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1, att2]}}, + {update, #{src => rev1, tgt => rev2, atts => []}} + ]). + + +re_add_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {update, #{src => rev1, tgt => rev2}}, + {update, #{src => rev2, tgt => rev3, atts => [att1]}} + ]). + + +update_and_remove_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {mk_att, #{tgt => att3, stub => att1, revpos => 1}}, + {mk_att, #{tgt => att4}}, + {create, #{tgt => rev1, atts => [att1, att2]}}, + {update, #{src => rev1, tgt => rev2, atts => [att3, att4]}} + ]). + + +replicate_new_doc_with_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {replicate, #{tgt => rev1, atts => [att1]}} + ]). + + +replicate_remove_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{src => rev1, tgt => rev2}} + ]). + + +replicate_stub_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{src => rev1, tgt => rev2, atts => [att2]}} + ]). + + +replicate_stub_and_new_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {mk_att, #{tgt => att3}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{src => rev1, tgt => rev2, atts => [att2, att3]}} + ]). + + +replicate_new_att_to_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {replicate, #{ + src => {winner, [rev1, rev2]}, + tgt => rev3, + atts => [att1]} + } + ]). + + +replicate_change_att_to_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {winner, [rev1, rev2]}, + tgt => rev3, + atts => [att2]} + } + ]). + + +replicate_rem_att_from_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{src => {winner, [rev1, rev2]}, tgt => rev3}} + ]). + + +replicate_stub_to_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {winner, [rev1, rev2]}, + tgt => rev3, + atts => [att2]}} + ]). + + +replicate_new_att_to_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {replicate, #{ + src => {conflict, [rev1, rev2]}, + tgt => rev3, + atts => [att1]} + } + ]). + + +replicate_change_att_to_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {conflict, [rev1, rev2]}, + tgt => rev3, + atts => [att2]} + } + ]). + + +replicate_rem_att_from_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{src => {conflict, [rev1, rev2]}, tgt => rev3}} + ]). + + +replicate_stub_to_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {conflict, [rev1, rev2]}, + tgt => rev3, + atts => [att2]}} + ]). + + +create_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}} + ]). + + +update_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}} + ]). + + +delete_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}}, + {delete, #{src => rev2, tgt => rev3}} + ]). + + +recreate_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}}, + {delete, #{src => rev2, tgt => rev3}}, + {create, #{tgt => rev4}} + ]). + + +check(Db, Actions) -> + check(Db, #{}, Actions). + + +check(Db, CheckOpts, Actions) -> + DocId = case maps:get(local, CheckOpts, false) of + true -> + Base = couch_uuids:random(), + <<"_local/", Base/binary>>; + false -> + couch_uuids:random() + end, + InitSt = #{ + doc_id => DocId, + revs => #{}, + atts => #{}, + size => db_size(Db) + }, + lists:foldl(fun({Action, Opts}, StAcc) -> + case Action of + create -> create_doc(Db, Opts, StAcc); + update -> update_doc(Db, Opts, StAcc); + delete -> delete_doc(Db, Opts, StAcc); + replicate -> replicate_doc(Db, Opts, StAcc); + mk_att -> make_attachment(Opts, StAcc); + log_state -> log_state(Opts, StAcc) + end + end, InitSt, Actions). + + +create_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := InitDbSize + } = St, + + ?assert(maps:is_key(tgt, Opts)), + + Tgt = maps:get(tgt, Opts), + AttKeys = maps:get(atts, Opts, []), + Depth = maps:get(depth, Opts, 1), + + ?assert(not maps:is_key(Tgt, Revs)), + lists:foreach(fun(AttKey) -> + ?assert(maps:is_key(AttKey, Atts)) + end, AttKeys), + ?assert(Depth >= 1), + + AttRecords = lists:map(fun(AttKey) -> + maps:get(AttKey, Atts) + end, AttKeys), + + InitDoc = #doc{id = DocId}, + FinalDoc = lists:foldl(fun(Iter, Doc0) -> + #doc{ + revs = {_OldStart, OldRevs} + } = Doc1 = randomize_doc(Doc0), + Doc2 = if Iter < Depth -> Doc1; true -> + Doc1#doc{atts = AttRecords} + end, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc2), + Doc2#doc{revs = {Pos, [Rev | OldRevs]}} + end, InitDoc, lists:seq(1, Depth)), + + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + ?assertEqual(FinalDbSize - InitDbSize, FinalDocSize), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +update_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := InitDbSize + } = St, + + IsLocal = case DocId of + <<"_local/", _/binary>> -> true; + _ -> false + end, + + ?assert(maps:is_key(src, Opts)), + ?assert(maps:is_key(tgt, Opts)), + + Src = pick_rev(Revs, maps:get(src, Opts)), + Tgt = maps:get(tgt, Opts), + AttKeys = maps:get(atts, Opts, []), + Depth = maps:get(depth, Opts, 1), + + ?assert(maps:is_key(Src, Revs)), + ?assert(not maps:is_key(Tgt, Revs)), + lists:foreach(fun(AttKey) -> + ?assert(maps:is_key(AttKey, Atts)) + end, AttKeys), + ?assert(Depth >= 1), + + AttRecords = lists:map(fun(AttKey) -> + maps:get(AttKey, Atts) + end, AttKeys), + + InitDoc = maps:get(Src, Revs), + FinalDoc = lists:foldl(fun(Iter, Doc0) -> + #doc{ + revs = {_OldStart, OldRevs} + } = Doc1 = randomize_doc(Doc0), + Doc2 = if Iter < Depth -> Doc1; true -> + Doc1#doc{atts = AttRecords} + end, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc2), + case IsLocal of + true -> Doc2#doc{revs = {Pos, [Rev]}}; + false -> Doc2#doc{revs = {Pos, [Rev | OldRevs]}} + end + end, InitDoc, lists:seq(1, Depth)), + + InitDocSize = doc_size(InitDoc), + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + ?assertEqual(FinalDbSize - InitDbSize, FinalDocSize - InitDocSize), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +delete_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + size := InitDbSize + } = St, + + IsLocal = case DocId of + <<"_local/", _/binary>> -> true; + _ -> false + end, + + ?assert(maps:is_key(src, Opts)), + ?assert(maps:is_key(tgt, Opts)), + + Src = pick_rev(Revs, maps:get(src, Opts)), + Tgt = maps:get(tgt, Opts), + + ?assert(maps:is_key(Src, Revs)), + ?assert(not maps:is_key(Tgt, Revs)), + + InitDoc = maps:get(Src, Revs), + #doc{ + revs = {_OldStart, OldRevs} + } = UpdateDoc = randomize_deleted_doc(InitDoc), + + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, UpdateDoc), + + FinalDoc = case IsLocal of + true -> UpdateDoc#doc{revs = {Pos, [Rev]}}; + false -> UpdateDoc#doc{revs = {Pos, [Rev | OldRevs]}} + end, + + InitDocSize = doc_size(InitDoc), + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + ?assertEqual(FinalDbSize - InitDbSize, FinalDocSize - InitDocSize), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +replicate_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := InitDbSize + } = St, + + ?assert(maps:is_key(tgt, Opts)), + + Src = pick_rev(Revs, maps:get(src, Opts, undefined)), + SrcExists = maps:get(src_exists, Opts, true), + Tgt = maps:get(tgt, Opts), + Deleted = maps:get(deleted, Opts, false), + AttKeys = maps:get(atts, Opts, []), + Depth = maps:get(depth, Opts, 1), + + if Src == undefined -> ok; true -> + ?assert(maps:is_key(Src, Revs)) + end, + ?assert(not maps:is_key(Tgt, Revs)), + ?assert(is_boolean(Deleted)), + lists:foreach(fun(AttKey) -> + ?assert(maps:is_key(AttKey, Atts)) + end, AttKeys), + ?assert(Depth >= 0), + + if Depth > 0 -> ok; true -> + ?assert(length(AttKeys) == 0) + end, + + InitDoc = maps:get(Src, Revs, #doc{id = DocId}), + NewRevsDoc = lists:foldl(fun(_, Doc0) -> + #doc{ + revs = {RevStart, RevIds} + } = Doc0, + NewRev = crypto:strong_rand_bytes(16), + Doc0#doc{ + revs = {RevStart + 1, [NewRev | RevIds]} + } + end, InitDoc, lists:seq(1, Depth)), + + FinalDoc = if NewRevsDoc == InitDoc -> NewRevsDoc; true -> + UpdateDoc = case Deleted of + true -> randomize_deleted_doc(NewRevsDoc); + false -> randomize_doc(NewRevsDoc) + end, + #doc{ + revs = {RevPos, _} + } = UpdateDoc, + AttRecords = lists:map(fun(AttKey) -> + BaseAtt = maps:get(AttKey, Atts), + case couch_att:fetch(data, BaseAtt) of + stub -> BaseAtt; + <<_/binary>> -> couch_att:store(revpos, RevPos, BaseAtt) + end + end, AttKeys), + UpdateDoc#doc{atts = AttRecords} + end, + + try + {ok, _} = fabric2_db:update_doc(Db, FinalDoc, [replicated_changes]) + catch throw:{missing_stub, _} -> + log_state(#{}, St), + ?debugFmt("Replicated: ~p~n", [FinalDoc]), + ?assert(false) + end, + + InitDocSize = doc_size(InitDoc), + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + SizeChange = case {Src, SrcExists} of + {undefined, _} -> FinalDocSize; + {_, false} -> FinalDocSize; + {_, _} -> FinalDocSize - InitDocSize + end, + ?assertEqual(FinalDbSize - InitDbSize, SizeChange), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +make_attachment(Opts, St) -> + #{ + atts := Atts + } = St, + + ?assert(maps:is_key(tgt, Opts)), + + Tgt = maps:get(tgt, Opts), + Stub = maps:get(stub, Opts, undefined), + RevPos = maps:get(revpos, Opts, undefined), + NameRaw = maps:get(name, Opts, undefined), + + ?assert(not maps:is_key(Tgt, Atts)), + if Stub == undefined -> ok; true -> + ?assert(maps:is_key(Stub, Atts)) + end, + ?assert(RevPos == undefined orelse RevPos >= 0), + + Name = if + NameRaw == undefined -> undefined; + is_atom(NameRaw) -> atom_to_binary(NameRaw, utf8); + is_binary(NameRaw) -> NameRaw; + is_list(NameRaw) -> list_to_binary(NameRaw) + end, + + Att0 = case Stub of + undefined -> + random_attachment(Name); + _ -> + SrcAtt = maps:get(Stub, Atts), + couch_att:store(data, stub, SrcAtt) + end, + Att1 = if RevPos == undefined -> Att0; true -> + couch_att:store(revpos, RevPos, Att0) + end, + + St#{atts := maps:put(Tgt, Att1, Atts)}. + + +log_state(_Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := DbSize + } = St, + + ?debugFmt("~nDocId: ~p~n", [DocId]), + ?debugFmt("Db Size: ~p~n~n", [DbSize]), + + RevKeys = maps:keys(Revs), + lists:foreach(fun(RevKey) -> + #doc{ + id = RevDocId, + revs = {Pos, [Rev | RestRevs]}, + body = Body, + deleted = Deleted, + atts = DocAtts, + meta = Meta + } = Doc = maps:get(RevKey, Revs), + ?debugFmt("Doc: ~p (~p)~n", [RevKey, doc_size(Doc)]), + ?debugFmt("Id: ~p~n", [RevDocId]), + ?debugFmt("Rev: ~p ~w~n", [Pos, Rev]), + lists:foreach(fun(R) -> + ?debugFmt(" ~p~n", [R]) + end, RestRevs), + ?debugFmt("Deleted: ~p~n", [Deleted]), + ?debugFmt("Atts:~n", []), + lists:foreach(fun(Att) -> + ?debugFmt(" ~p~n", [Att]) + end, DocAtts), + ?debugFmt("Body: ~p~n", [Body]), + ?debugFmt("Meta: ~p~n", [Meta]), + ?debugFmt("~n", []) + end, lists:sort(RevKeys)), + + AttKeys = maps:keys(Atts), + ?debugFmt("~n~nAtts:~n", []), + lists:foreach(fun(AttKey) -> + Att = maps:get(AttKey, Atts), + ?debugFmt("Att: ~p (~p)~n", [AttKey, couch_att:external_size(Att)]), + ?debugFmt(" ~p~n", [Att]) + end, lists:sort(AttKeys)), + + St. + + +pick_rev(_Revs, Rev) when is_atom(Rev) -> + Rev; +pick_rev(Revs, {Op, RevList}) when Op == winner; Op == conflict -> + ChooseFrom = lists:map(fun(Rev) -> + #doc{ + revs = {S, [R | _]}, + deleted = Deleted + } = maps:get(Rev, Revs), + #{ + deleted => Deleted, + rev_id => {S, R}, + name => Rev + } + end, RevList), + Sorted = fabric2_util:sort_revinfos(ChooseFrom), + RetRev = case Op of + winner -> hd(Sorted); + conflict -> choose(tl(Sorted)) + end, + maps:get(name, RetRev). + + +store_rev(Db, St, DbSize, Tgt, #doc{id = <<"_local/", _/binary>>} = Doc) -> + DbDoc = case fabric2_db:open_doc(Db, Doc#doc.id) of + {ok, Found} -> Found; + {not_found, _} -> not_found + end, + store_rev(St, DbSize, Tgt, DbDoc); + +store_rev(Db, St, DbSize, Tgt, #doc{} = Doc) -> + #doc{ + id = DocId, + revs = {Pos, [Rev | _]} + } = Doc, + RevId = {Pos, Rev}, + {ok, [{ok, DbDoc}]} = fabric2_db:open_doc_revs(Db, DocId, [RevId], []), + store_rev(St, DbSize, Tgt, DbDoc). + + +store_rev(St, DbSize, Tgt, Doc) -> + #{ + revs := Revs + } = St, + ?assert(not maps:is_key(Tgt, Revs)), + St#{ + revs := maps:put(Tgt, Doc, Revs), + size := DbSize + }. + + +randomize_doc(#doc{} = Doc) -> + Doc#doc{ + deleted = false, + body = random_body() + }. + + +randomize_deleted_doc(Doc) -> + NewDoc = case rand:uniform() < 0.05 of + true -> randomize_doc(Doc); + false -> Doc#doc{body = {[]}} + end, + NewDoc#doc{deleted = true}. + + +db_size(Info) when is_list(Info) -> + {sizes, {Sizes}} = lists:keyfind(sizes, 1, Info), + {<<"external">>, External} = lists:keyfind(<<"external">>, 1, Sizes), + External; +db_size(Db) when is_map(Db) -> + {ok, Info} = fabric2_db:get_db_info(Db), + db_size(Info). + + +doc_size(#doc{id = <<"_local/", _/binary>>} = Doc) -> + fabric2_util:ldoc_size(Doc); +doc_size(#doc{} = Doc) -> + fabric2_util:rev_size(Doc). + + +-define(MAX_JSON_ELEMENTS, 5). +-define(MAX_STRING_LEN, 10). +-define(MAX_INT, 4294967296). + + +random_body() -> + Elems = rand:uniform(?MAX_JSON_ELEMENTS), + {Obj, _} = random_json_object(Elems), + Obj. + + +random_json(MaxElems) -> + case choose([object, array, terminal]) of + object -> random_json_object(MaxElems); + array -> random_json_array(MaxElems); + terminal -> {random_json_terminal(), MaxElems} + end. + + +random_json_object(MaxElems) -> + NumKeys = rand:uniform(MaxElems + 1) - 1, + {Props, RemElems} = lists:mapfoldl(fun(_, Acc1) -> + {Value, Acc2} = random_json(Acc1), + {{random_json_string(), Value}, Acc2} + end, MaxElems - NumKeys, lists:seq(1, NumKeys)), + {{Props}, RemElems}. + + +random_json_array(MaxElems) -> + NumItems = rand:uniform(MaxElems + 1) - 1, + lists:mapfoldl(fun(_, Acc1) -> + random_json(Acc1) + end, MaxElems - NumItems, lists:seq(1, NumItems)). + + +random_json_terminal() -> + case choose([null, true, false, number, string]) of + null -> null; + true -> true; + false -> false; + number -> random_json_number(); + string -> random_json_string() + end. + + +random_json_number() -> + AbsValue = case choose([integer, double]) of + integer -> rand:uniform(?MAX_INT); + double -> rand:uniform() * rand:uniform() + end, + case choose([pos, neg]) of + pos -> AbsValue; + neg -> -1 * AbsValue + end. + + +random_json_string() -> + random_string(0, ?MAX_STRING_LEN). + + +random_attachment(undefined) -> + random_attachment(random_string(1, 32)); + +random_attachment(Name) when is_binary(Name) -> + Type = random_string(1, 32), + Data = random_string(1, 512), + Md5 = erlang:md5(Data), + couch_att:new([ + {name, Name}, + {type, Type}, + {att_len, size(Data)}, + {data, Data}, + {encoding, identity}, + {md5, Md5} + ]). + + +random_string(MinLen, MaxLen) -> + Alphabet = [ + $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, + $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, + $A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, + $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $Y, $X, $Z, + $1, $2, $3, $4, $5, $6, $7, $8, $9, $0, + $!, $@, $#, $$, $%, $^, $&, $*, $(, $), + $ , ${, $}, $[, $], $", $', $-, $_, $+, $=, $,, $., + $\x{1}, $\x{a2}, $\x{20ac}, $\x{10348} + ], + Len = MinLen + rand:uniform(MaxLen - MinLen) - 1, + Str = lists:map(fun(_) -> + choose(Alphabet) + end, lists:seq(1, Len)), + unicode:characters_to_binary(Str). + + +choose(Options) -> + Pos = rand:uniform(length(Options)), + lists:nth(Pos, Options). diff --git a/src/fabric/test/fabric2_doc_size_tests.erl b/src/fabric/test/fabric2_doc_size_tests.erl new file mode 100644 index 000000000..1e3dca4f6 --- /dev/null +++ b/src/fabric/test/fabric2_doc_size_tests.erl @@ -0,0 +1,320 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_size_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +% Doc body size calculations +% ID: size(Doc#doc.id) +% Rev: size(erlfdb_tuple:encode(Start)) + size(Rev) % where Rev is usually 16 +% Deleted: 1 % (binary value is one byte) +% Body: couch_ejson_size:external_size(Body) % Where empty is {} which is 2) + + +-define(NUM_RANDOM_TESTS, 1000). + + +-define(DOC_IDS, [ + {0, <<>>}, + {1, <<"a">>}, + {3, <<"foo">>}, + {6, <<"foobar">>}, + {32, <<"af196ae095631b020eedf8f69303e336">>} +]). + +-define(REV_STARTS, [ + {1, 0}, + {2, 1}, + {2, 255}, + {3, 256}, + {3, 65535}, + {4, 65536}, + {4, 16777215}, + {5, 16777216}, + {5, 4294967295}, + {6, 4294967296}, + {6, 1099511627775}, + {7, 1099511627776}, + {7, 281474976710655}, + {8, 281474976710656}, + {8, 72057594037927935}, + {9, 72057594037927936}, + {9, 18446744073709551615}, + + % The jump from 9 to 11 bytes is because when we + % spill over into the bigint range of 9-255 + % bytes we have an extra byte that encodes the + % length of the bigint. + {11, 18446744073709551616} +]). + +-define(REVS, [ + {0, <<>>}, + {8, <<"foobarba">>}, + {16, <<"foobarbazbambang">>} +]). + +-define(DELETED, [ + {1, true}, + {1, false} +]). + +-define(BODIES, [ + {2, {[]}}, + {13, {[{<<"foo">>, <<"bar">>}]}}, + {28, {[{<<"b">>, <<"a">>}, {<<"c">>, [true, null, []]}]}} +]). + +-define(ATT_NAMES, [ + {5, <<"a.txt">>}, + {7, <<"foo.csv">>}, + {29, <<"a-longer-name-for-example.bat">>} +]). + +-define(ATT_TYPES, [ + {24, <<"application/octet-stream">>}, + {10, <<"text/plain">>}, + {9, <<"image/png">>} +]). + +-define(ATT_BODIES, [ + {0, <<>>}, + {1, <<"g">>}, + {6, <<"foobar">>}, + {384, << + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + >>} +]). + +-define(LDOC_IDS, [ + {8, <<"_local/a">>}, + {10, <<"_local/foo">>}, + {13, <<"_local/foobar">>}, + {39, <<"_local/af196ae095631b020eedf8f69303e336">>} +]). + +-define(LDOC_REVS, [ + {1, <<"0">>}, + {2, <<"10">>}, + {3, <<"100">>}, + {4, <<"1000">>}, + {5, <<"10000">>}, + {6, <<"100000">>}, + {7, <<"1000000">>} +]). + + +empty_doc_test() -> + ?assertEqual(4, fabric2_util:rev_size(#doc{})). + + +docid_size_test() -> + lists:foreach(fun({Size, DocId}) -> + ?assertEqual(4 + Size, fabric2_util:rev_size(#doc{id = DocId})) + end, ?DOC_IDS). + + +rev_size_test() -> + lists:foreach(fun({StartSize, Start}) -> + lists:foreach(fun({RevSize, Rev}) -> + Doc = #doc{ + revs = {Start, [Rev]} + }, + ?assertEqual(3 + StartSize + RevSize, fabric2_util:rev_size(Doc)) + end, ?REVS) + end, ?REV_STARTS). + + +deleted_size_test() -> + lists:foreach(fun({Size, Deleted}) -> + ?assertEqual(3 + Size, fabric2_util:rev_size(#doc{deleted = Deleted})) + end, ?DELETED). + + +body_size_test() -> + lists:foreach(fun({Size, Body}) -> + ?assertEqual(2 + Size, fabric2_util:rev_size(#doc{body = Body})) + end, ?BODIES). + + +att_names_test() -> + lists:foreach(fun({Size, AttName}) -> + Att = mk_att(AttName, <<>>, <<>>, false), + Doc = #doc{atts = [Att]}, + ?assertEqual(4 + Size, fabric2_util:rev_size(Doc)) + end, ?ATT_NAMES). + + +att_types_test() -> + lists:foreach(fun({Size, AttType}) -> + Att = mk_att(<<"foo">>, AttType, <<>>, false), + Doc = #doc{atts = [Att]}, + ?assertEqual(7 + Size, fabric2_util:rev_size(Doc)) + end, ?ATT_TYPES). + + +att_bodies_test() -> + lists:foreach(fun({Size, AttBody}) -> + Att1 = mk_att(<<"foo">>, <<>>, AttBody, false), + Doc1 = #doc{atts = [Att1]}, + ?assertEqual(7 + Size, fabric2_util:rev_size(Doc1)), + + Att2 = mk_att(<<"foo">>, <<>>, AttBody, true), + Doc2 = #doc{atts = [Att2]}, + ?assertEqual(7 + 16 + Size, fabric2_util:rev_size(Doc2)) + end, ?ATT_BODIES). + + +local_doc_ids_test() -> + lists:foreach(fun({Size, LDocId}) -> + ?assertEqual(3 + Size, fabric2_util:ldoc_size(mk_ldoc(LDocId, 0))) + end, ?LDOC_IDS). + + +local_doc_revs_test() -> + lists:foreach(fun({Size, Rev}) -> + Doc = mk_ldoc(<<"_local/foo">>, Rev), + ?assertEqual(12 + Size, fabric2_util:ldoc_size(Doc)) + end, ?LDOC_REVS). + + +local_doc_bodies_test() -> + lists:foreach(fun({Size, Body}) -> + Doc = mk_ldoc(<<"_local/foo">>, 0, Body), + ?assertEqual(11 + Size, fabric2_util:ldoc_size(Doc)) + end, ?BODIES). + + +doc_combinatorics_test() -> + Elements = [ + {?DOC_IDS, fun(Doc, DocId) -> Doc#doc{id = DocId} end}, + {?REV_STARTS, fun(Doc, RevStart) -> + #doc{revs = {_, RevIds}} = Doc, + Doc#doc{revs = {RevStart, RevIds}} + end}, + {?REVS, fun(Doc, Rev) -> + #doc{revs = {Start, _}} = Doc, + Doc#doc{revs = {Start, [Rev]}} + end}, + {?DELETED, fun(Doc, Deleted) -> Doc#doc{deleted = Deleted} end}, + {?BODIES, fun(Doc, Body) -> Doc#doc{body = Body} end} + ], + doc_combine(Elements, 0, #doc{}). + + +doc_combine([], TotalSize, Doc) -> + ?assertEqual(TotalSize, fabric2_util:rev_size(Doc)); + +doc_combine([{Elems, UpdateFun} | Rest], TotalSize, Doc) -> + lists:foreach(fun({Size, Elem}) -> + doc_combine(Rest, TotalSize + Size, UpdateFun(Doc, Elem)) + end, Elems). + + +local_doc_combinatorics_test() -> + Elements = [ + {?LDOC_IDS, fun(Doc, DocId) -> Doc#doc{id = DocId} end}, + {?LDOC_REVS, fun(Doc, Rev) -> Doc#doc{revs = {0, [Rev]}} end}, + {?BODIES, fun(Doc, Body) -> Doc#doc{body = Body} end} + ], + local_doc_combine(Elements, 0, #doc{}). + + +local_doc_combine([], TotalSize, Doc) -> + ?assertEqual(TotalSize, fabric2_util:ldoc_size(Doc)); + +local_doc_combine([{Elems, UpdateFun} | Rest], TotalSize, Doc) -> + lists:foreach(fun({Size, Elem}) -> + local_doc_combine(Rest, TotalSize + Size, UpdateFun(Doc, Elem)) + end, Elems). + + +random_docs_test() -> + lists:foreach(fun(_) -> + {DocIdSize, DocId} = choose(?DOC_IDS), + {RevStartSize, RevStart} = choose(?REV_STARTS), + {RevSize, Rev} = choose(?REVS), + {DeletedSize, Deleted} = choose(?DELETED), + {BodySize, Body} = choose(?BODIES), + NumAtts = choose([0, 1, 2, 5]), + {Atts, AttSize} = lists:mapfoldl(fun(_, Acc) -> + {S, A} = random_att(), + {A, Acc + S} + end, 0, lists:seq(1, NumAtts)), + Doc = #doc{ + id = DocId, + revs = {RevStart, [Rev]}, + deleted = Deleted, + body = Body, + atts = Atts + }, + Expect = lists:sum([ + DocIdSize, + RevStartSize, + RevSize, + DeletedSize, + BodySize, + AttSize + ]), + ?assertEqual(Expect, fabric2_util:rev_size(Doc)) + end, lists:seq(1, ?NUM_RANDOM_TESTS)). + + +random_att() -> + {NameSize, Name} = choose(?ATT_NAMES), + {TypeSize, Type} = choose(?ATT_TYPES), + {BodySize, Body} = choose(?ATT_BODIES), + {Md5Size, AddMd5} = choose([{0, false}, {16, true}]), + AttSize = lists:sum([NameSize, TypeSize, BodySize, Md5Size]), + {AttSize, mk_att(Name, Type, Body, AddMd5)}. + + +mk_att(Name, Type, Data, AddMd5) -> + Md5 = if not AddMd5 -> <<>>; true -> + erlang:md5(Data) + end, + couch_att:new([ + {name, Name}, + {type, Type}, + {att_len, size(Data)}, + {data, Data}, + {encoding, identity}, + {md5, Md5} + ]). + + +mk_ldoc(DocId, Rev) -> + mk_ldoc(DocId, Rev, {[]}). + + +mk_ldoc(DocId, Rev, Body) -> + #doc{ + id = DocId, + revs = {0, [Rev]}, + body = Body + }. + + +choose(Options) -> + Pos = rand:uniform(length(Options)), + lists:nth(Pos, Options). -- cgit v1.2.1 From 86d87cdd99e3e98d2466207a0aecb301d52e0afd Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 14 Feb 2020 11:16:19 -0600 Subject: Convert versionstamps to binaries Versionstamp sequences should always be binaries when retrieved from a rev info map. --- src/couch/src/couch_doc.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 073006c77..7224921d4 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -388,7 +388,7 @@ rev_info({#{} = RevInfo, {Pos, [RevId | _]}}) -> #rev_info{ deleted = Deleted, body_sp = undefined, - seq = Sequence, + seq = fabric2_fdb:vs_to_seq(Sequence), rev = {Pos, RevId} }. -- cgit v1.2.1 From 193c342d3850487066cda4c6bdaf87fa704f6ca4 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 14 Feb 2020 11:17:15 -0600 Subject: Test coverage: list_dbs and list_dbs_info --- src/fabric/test/fabric2_db_crud_tests.erl | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 80525513a..943b55f3f 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -30,7 +30,10 @@ crud_test_() -> ?TDEF(open_db), ?TDEF(delete_db), ?TDEF(list_dbs), - ?TDEF(list_dbs_info) + ?TDEF(list_dbs_user_fun), + ?TDEF(list_dbs_user_fun_partial), + ?TDEF(list_dbs_info), + ?TDEF(list_dbs_info_partial) ]) } }. @@ -87,6 +90,26 @@ list_dbs(_) -> ?assert(not lists:member(DbName, AllDbs3)). +list_dbs_user_fun(_) -> + ?assertMatch({ok, _}, fabric2_db:create(?tempdb(), [])), + + UserFun = fun(Row, Acc) -> {ok, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_dbs(UserFun, [], []), + + Base = lists:foldl(fun(DbName, Acc) -> + [{row, [{id, DbName}]} | Acc] + end, [{meta, []}], fabric2_db:list_dbs()), + Expect = lists:reverse(Base, [complete]), + + ?assertEqual(Expect, lists:reverse(UserAcc)). + + +list_dbs_user_fun_partial(_) -> + UserFun = fun(Row, Acc) -> {stop, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_dbs(UserFun, [], []), + ?assertEqual([{meta, []}], UserAcc). + + list_dbs_info(_) -> DbName = ?tempdb(), {ok, AllDbInfos1} = fabric2_db:list_dbs_info(), @@ -103,6 +126,12 @@ list_dbs_info(_) -> ?assert(not is_db_info_member(DbName, AllDbInfos3)). +list_dbs_info_partial(_) -> + UserFun = fun(Row, Acc) -> {stop, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_dbs_info(UserFun, [], []), + ?assertEqual([{meta, []}], UserAcc). + + is_db_info_member(_, []) -> false; -- cgit v1.2.1 From 8ebebcf1c1ef501b5e6f612d42676014aa3b8314 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 14 Feb 2020 11:17:52 -0600 Subject: Test coverage: get_full_doc_info --- src/fabric/test/fabric2_db_misc_tests.erl | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 12fc3e50b..f1ee8a893 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -33,6 +33,11 @@ misc_test_() -> ?TDEF(set_revs_limit), ?TDEF(set_security), ?TDEF(is_system_db), + ?TDEF(get_doc_info), + ?TDEF(get_doc_info_not_found), + ?TDEF(get_full_doc_info), + ?TDEF(get_full_doc_info_not_found), + ?TDEF(get_full_doc_infos), ?TDEF(ensure_full_commit), ?TDEF(metadata_bump), ?TDEF(db_version_bump) @@ -109,6 +114,88 @@ is_system_db({DbName, Db, _}) -> ?assertEqual(false, fabric2_db:is_system_db_name(<<"foo/bar">>)). +get_doc_info({_, Db, _}) -> + DocId = couch_uuids:random(), + InsertDoc = #doc{ + id = DocId, + body = {[{<<"foo">>, true}]} + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, InsertDoc, []), + + DI = fabric2_db:get_doc_info(Db, DocId), + ?assert(is_record(DI, doc_info)), + #doc_info{ + id = DIDocId, + high_seq = HighSeq, + revs = Revs + } = DI, + + ?assertEqual(DocId, DIDocId), + ?assert(is_binary(HighSeq)), + ?assertMatch([#rev_info{}], Revs), + + [#rev_info{ + rev = DIRev, + seq = Seq, + deleted = Deleted, + body_sp = BodySp + }] = Revs, + + ?assertEqual({Pos, Rev}, DIRev), + ?assert(is_binary(Seq)), + ?assert(not Deleted), + ?assertMatch(undefined, BodySp). + + +get_doc_info_not_found({_, Db, _}) -> + DocId = couch_uuids:random(), + ?assertEqual(not_found, fabric2_db:get_doc_info(Db, DocId)). + + +get_full_doc_info({_, Db, _}) -> + DocId = couch_uuids:random(), + InsertDoc = #doc{ + id = DocId, + body = {[{<<"foo">>, true}]} + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, InsertDoc, []), + FDI = fabric2_db:get_full_doc_info(Db, DocId), + + ?assert(is_record(FDI, full_doc_info)), + #full_doc_info{ + id = FDIDocId, + update_seq = UpdateSeq, + deleted = Deleted, + rev_tree = RevTree, + sizes = SizeInfo + } = FDI, + + ?assertEqual(DocId, FDIDocId), + ?assert(is_binary(UpdateSeq)), + ?assert(not Deleted), + ?assertMatch([{Pos, {Rev, _, []}}], RevTree), + ?assertEqual(#size_info{}, SizeInfo). + + +get_full_doc_info_not_found({_, Db, _}) -> + DocId = couch_uuids:random(), + ?assertEqual(not_found, fabric2_db:get_full_doc_info(Db, DocId)). + + +get_full_doc_infos({_, Db, _}) -> + DocIds = lists:map(fun(_) -> + DocId = couch_uuids:random(), + Doc = #doc{id = DocId}, + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + DocId + end, lists:seq(1, 5)), + + FDIs = fabric2_db:get_full_doc_infos(Db, DocIds), + lists:zipwith(fun(DocId, FDI) -> + ?assertEqual(DocId, FDI#full_doc_info.id) + end, DocIds, FDIs). + + ensure_full_commit({_, Db, _}) -> ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db)), ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db, 5)). -- cgit v1.2.1 From e946ed3981ab91ec7a5725f72ff7ae9e1a47e981 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 14 Feb 2020 12:42:28 -0600 Subject: Test coverage: validate_dbname, validate_docid --- src/fabric/test/fabric2_db_misc_tests.erl | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index f1ee8a893..c48ee2c73 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -33,6 +33,8 @@ misc_test_() -> ?TDEF(set_revs_limit), ?TDEF(set_security), ?TDEF(is_system_db), + ?TDEF(validate_dbname), + ?TDEF(validate_doc_ids), ?TDEF(get_doc_info), ?TDEF(get_doc_info_not_found), ?TDEF(get_full_doc_info), @@ -54,6 +56,7 @@ setup() -> cleanup({_DbName, Db, Ctx}) -> + meck:unload(), ok = fabric2_db:delete(fabric2_db:name(Db), []), test_util:stop_couch(Ctx). @@ -114,6 +117,78 @@ is_system_db({DbName, Db, _}) -> ?assertEqual(false, fabric2_db:is_system_db_name(<<"foo/bar">>)). +validate_dbname(_) -> + Tests = [ + {ok, <<"foo">>}, + {ok, "foo"}, + {ok, <<"_replicator">>}, + {error, illegal_database_name, <<"Foo">>}, + {error, illegal_database_name, <<"foo|bar">>}, + {error, illegal_database_name, <<"Foo">>}, + {error, database_name_too_long, << + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + >>} + ], + CheckFun = fun + ({ok, DbName}) -> + ?assertEqual(ok, fabric2_db:validate_dbname(DbName)); + ({error, Reason, DbName}) -> + Expect = {error, {Reason, DbName}}, + ?assertEqual(Expect, fabric2_db:validate_dbname(DbName)) + end, + lists:foreach(CheckFun, Tests). + + +validate_doc_ids(_) -> + % Basic test with default max infinity length + ?assertEqual(ok, fabric2_db:validate_docid(<<"foo">>)), + + Tests = [ + {ok, <<"_local/foo">>}, + {ok, <<"_design/foo">>}, + {ok, <<"0123456789012345">>}, + {illegal_docid, <<"">>}, + {illegal_docid, <<"_design/">>}, + {illegal_docid, <<"_local/">>}, + {illegal_docid, <<"01234567890123456">>}, + {illegal_docid, <<16#FF>>}, + {illegal_docid, <<"_bad">>}, + {illegal_docid, null} + ], + CheckFun = fun + ({ok, DocId}) -> + ?assertEqual(ok, fabric2_db:validate_docid(DocId)); + ({illegal_docid, DocId}) -> + ?assertThrow({illegal_docid, _}, fabric2_db:validate_docid(DocId)) + end, + + try + meck:new(config, [passthrough]), + meck:expect( + config, + get, + ["couchdb", "max_document_id_length", "infinity"], + "16" + ), + lists:foreach(CheckFun, Tests), + + % Check that fabric2_db_plugin can't allow for + % underscore prefixed dbs + meck:new(fabric2_db_plugin, [passthrough]), + meck:expect(fabric2_db_plugin, validate_docid, ['_'], true), + ?assertEqual(ok, fabric2_db:validate_docid(<<"_wheee">>)) + after + % Unloading within the test as the config mock + % interferes with the db version bump test. + meck:unload() + end. + + get_doc_info({_, Db, _}) -> DocId = couch_uuids:random(), InsertDoc = #doc{ -- cgit v1.2.1 From 78d7a9fd5e91dbd3d5f3b64b637802abb214dc90 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 14 Feb 2020 14:48:48 -0600 Subject: Test coverage: apply_open_doc_opts --- src/fabric/test/fabric2_doc_crud_tests.erl | 99 +++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index 46cd4fcfd..ce3757d55 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -67,7 +67,8 @@ doc_crud_test_() -> ?TDEF(create_a_large_local_doc), ?TDEF(create_2_large_local_docs), ?TDEF(local_doc_with_previous_encoding), - ?TDEF(before_doc_update_skips_local_docs) + ?TDEF(before_doc_update_skips_local_docs), + ?TDEF(open_doc_opts) ]) } }. @@ -911,3 +912,99 @@ before_doc_update_skips_local_docs({Db0, _}) -> ?assertEqual({[]}, LDoc2#doc.body), ?assertEqual({[<<"bdu_was_here">>, true]}, Doc2#doc.body). + + +open_doc_opts({Db, _}) -> + % Build out state so that we can exercise each doc + % open option. This requires a live revision with + % an attachment, a conflict, and a deleted conflict. + DocId = couch_uuids:random(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, identity}, + {md5, <<>>} + ]), + Doc1A = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, {Pos1, Rev1A}} = fabric2_db:update_doc(Db, Doc1A), + Att2 = couch_att:store([ + {data, stub}, + {revpos, 1} + ], Att1), + Doc1B = Doc1A#doc{ + revs = {Pos1, [Rev1A]}, + atts = [Att2] + }, + {ok, {Pos2, Rev1B}} = fabric2_db:update_doc(Db, Doc1B), + + Rev2 = crypto:strong_rand_bytes(16), + Rev3 = crypto:strong_rand_bytes(16), + Rev4 = crypto:strong_rand_bytes(16), + + % Create a live conflict + Doc2 = #doc{ + id = DocId, + revs = {1, [Rev2]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + % Create a deleted conflict + Doc3 = #doc{ + id = DocId, + revs = {1, [Rev3]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc3, [replicated_changes]), + Doc4 = #doc{ + id = DocId, + revs = {2, [Rev4, Rev3]}, + deleted = true + }, + {ok, _} = fabric2_db:update_doc(Db, Doc4, [replicated_changes]), + + OpenOpts1 = [ + revs_info, + conflicts, + deleted_conflicts, + local_seq, + {atts_since, [{Pos1, Rev1A}]} + ], + {ok, OpenedDoc1} = fabric2_db:open_doc(Db, DocId, OpenOpts1), + + #doc{ + id = DocId, + revs = {2, [Rev1B, Rev1A]}, + atts = [Att3], + meta = Meta + } = OpenedDoc1, + ?assertEqual(stub, couch_att:fetch(data, Att3)), + ?assertEqual( + {revs_info, Pos2, [{Rev1B, available}, {Rev1A, missing}]}, + lists:keyfind(revs_info, 1, Meta) + ), + ?assertEqual( + {conflicts, [{1, Rev2}]}, + lists:keyfind(conflicts, 1, Meta) + ), + ?assertEqual( + {deleted_conflicts, [{2, Rev4}]}, + lists:keyfind(deleted_conflicts, 1, Meta) + ), + ?assertMatch({_, <<_/binary>>}, lists:keyfind(local_seq, 1, Meta)), + + % Empty atts_since list + {ok, OpenedDoc2} = fabric2_db:open_doc(Db, DocId, [{atts_since, []}]), + #doc{atts = [Att4]} = OpenedDoc2, + ?assertNotEqual(stub, couch_att:fetch(data, Att4)), + + % Missing ancestor + Rev5 = crypto:strong_rand_bytes(16), + OpenOpts2 = [{atts_since, [{5, Rev5}]}], + {ok, OpenedDoc3} = fabric2_db:open_doc(Db, DocId, OpenOpts2), + #doc{atts = [Att5]} = OpenedDoc3, + ?assertNotEqual(stub, couch_att:fetch(data, Att5)). + -- cgit v1.2.1 From 293d3d51779a786452d2c4fd9e72ca9db6224e62 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 17 Feb 2020 16:34:50 -0500 Subject: Re-use changes feed main transaction when including docs Previously each doc was read in a separate transaction. It turns out that size limits do not apply to read-only transactions so we don't have to worry about that here. Also transaction restart are already implemented so we don't have to worry about timeout either. --- src/chttpd/src/chttpd_db.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 3951fdb33..50a9effdb 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -86,13 +86,17 @@ handle_request(#httpd{path_parts=[DbName|RestParts],method=Method}=Req)-> handle_changes_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - handle_changes_req1(Req, Db); + fabric2_fdb:transactional(Db, fun(TxDb) -> + handle_changes_req_tx(Req, TxDb) + end); handle_changes_req(#httpd{method='GET'}=Req, Db) -> - handle_changes_req1(Req, Db); + fabric2_fdb:transactional(Db, fun(TxDb) -> + handle_changes_req_tx(Req, TxDb) + end); handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> send_method_not_allowed(Req, "GET,POST,HEAD"). -handle_changes_req1(#httpd{}=Req, Db) -> +handle_changes_req_tx(#httpd{}=Req, Db) -> ChangesArgs = parse_changes_query(Req), ChangesFun = chttpd_changes:handle_db_changes(ChangesArgs, Req, Db), Max = chttpd:chunked_response_buffer_size(), -- cgit v1.2.1 From e0bdb7ede9f62ca23729d21b24cc4b57d1be8a3c Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 18 Feb 2020 11:53:42 -0500 Subject: Handle spurious 1009 (future_version) errors in couch_jobs pending We already handle them in couch_jobs_type_monitor so let's do it in `couch_jobs:wait_pending` as well. Recent fixes in FDB 6.2 didn't completely fix the issue and ther are still spurious 1009 errors dumped in the logs. They seem to be benign as far as couch_jobs operation goes as type monitor code already showed, so let's not pollute the logs with them. --- src/couch_jobs/src/couch_jobs.erl | 5 +++++ src/couch_jobs/src/couch_jobs_type_monitor.erl | 2 +- src/fabric/include/fabric2.hrl | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index d469ed41a..c134f5ac5 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -296,6 +296,8 @@ accept_loop(Type, NoSched, MaxSchedTime, Timeout) -> case wait_pending(PendingWatch, MaxSchedTime, Timeout) of {error, not_found} -> {error, not_found}; + retry -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout); ok -> accept_loop(Type, NoSched, MaxSchedTime, Timeout) end @@ -318,6 +320,9 @@ wait_pending(PendingWatch, MaxSTime, UserTimeout) -> erlfdb:wait(PendingWatch, [{timeout, Timeout}]), ok catch + error:{erlfdb_error, ?FUTURE_VERSION} -> + erlfdb:cancel(PendingWatch, [flush]), + retry; error:{timeout, _} -> erlfdb:cancel(PendingWatch, [flush]), {error, not_found} diff --git a/src/couch_jobs/src/couch_jobs_type_monitor.erl b/src/couch_jobs/src/couch_jobs_type_monitor.erl index 562a866da..04ad60acc 100644 --- a/src/couch_jobs/src/couch_jobs_type_monitor.erl +++ b/src/couch_jobs/src/couch_jobs_type_monitor.erl @@ -55,7 +55,7 @@ loop(#st{vs = VS, timeout = Timeout} = St) -> try erlfdb:wait(Watch, [{timeout, Timeout}]) catch - error:{erlfdb_error, 1009} -> + error:{erlfdb_error, ?FUTURE_VERSION} -> erlfdb:cancel(Watch, [flush]), ok; error:{timeout, _} -> diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index b1bd30629..f526d7b34 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -66,6 +66,7 @@ % Let's keep these in ascending order -define(TRANSACTION_TOO_OLD, 1007). +-define(FUTURE_VERSION, 1009). -define(COMMIT_UNKNOWN_RESULT, 1021). -- cgit v1.2.1 From bab35666a0223f184e2cc32bbc592584d05b0917 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 18 Feb 2020 17:20:15 -0500 Subject: Let couch_jobs use its own metadata key Previously, if the metadata key is bumped in a transaction, the same transaction could not be used to add jobs with `couch_jobs`. That's because metadata is a versionstamped value, and when set, it cannot be read back until that transaction has committed. In `fabric2_fdb` there is a process dict key that is set which declares that metadata was already read, which happens before any db update, however `couch_jobs` uses it's own caching mechanism and doesn't know about that pdict key. Ideally we'd implement a single `couch_fdb` module to be shared between `couch_jobs` and `fabric2_db` but until then it maybe simpler to just let `couch_jobs` use its own metadata key. This way, it doesn't get invalidated or bumped every time dbs get recreated or design docs are updated. The only time it would be bumped is if the FDB layer prefix changed at runtime. --- src/couch_jobs/src/couch_jobs.hrl | 1 + src/couch_jobs/src/couch_jobs_fdb.erl | 27 +++++++++++++++++++++++---- src/couch_jobs/test/couch_jobs_tests.erl | 22 +++++++++++++++++++++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs.hrl b/src/couch_jobs/src/couch_jobs.hrl index 055bf091c..bb561b136 100644 --- a/src/couch_jobs/src/couch_jobs.hrl +++ b/src/couch_jobs/src/couch_jobs.hrl @@ -35,6 +35,7 @@ -define(ACTIVITY, 6). +-define(COUCH_JOBS_MD_VERSION, <<"couch_jobs_md_version">>). -define(COUCH_JOBS_EVENT, '$couch_jobs_event'). -define(COUCH_JOBS_CURRENT, '$couch_jobs_current'). -define(UNDEFINED_MAX_SCHEDULED_TIME, 1 bsl 36). diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index a08b78fc1..a81a313d8 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -46,7 +46,10 @@ tx/2, get_job/2, - get_jobs/0 + get_jobs/0, + + bump_metadata_version/0, + bump_metadata_version/1 ]). @@ -485,6 +488,19 @@ get_jobs() -> end). +% Call this function if the top level "couchdb" FDB directory layer +% changes. +% +bump_metadata_version() -> + fabric2_fdb:transactional(fun(Tx) -> + bump_metadata_version(Tx) + end). + + +bump_metadata_version(Tx) -> + erlfdb:set_versionstamped_value(Tx, ?COUCH_JOBS_MD_VERSION, <<0:112>>). + + % Private helper functions maybe_enqueue(#{jtx := true} = JTx, Type, JobId, STime, Resubmit, Data) -> @@ -617,7 +633,6 @@ init_jtx(undefined) -> init_jtx({erlfdb_transaction, _} = Tx) -> LayerPrefix = fabric2_fdb:get_dir(Tx), Jobs = erlfdb_tuple:pack({?JOBS}, LayerPrefix), - Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), % layer_prefix, md_version and tx here match db map fields in fabric2_fdb % but we also assert that this is a job transaction using the jtx => true % field @@ -626,7 +641,7 @@ init_jtx({erlfdb_transaction, _} = Tx) -> tx => Tx, layer_prefix => LayerPrefix, jobs_path => Jobs, - md_version => Version + md_version => get_metadata_version(Tx) }. @@ -641,13 +656,17 @@ ensure_current(#{jtx := true, tx := Tx} = JTx) -> end. +get_metadata_version({erlfdb_transaction, _} = Tx) -> + erlfdb:wait(erlfdb:get_ss(Tx, ?COUCH_JOBS_MD_VERSION)). + + update_current(#{tx := Tx, md_version := Version} = JTx) -> case get_md_version_age(Version) of Age when Age =< ?MD_VERSION_MAX_AGE_SEC -> % Looked it up not too long ago. Avoid looking it up to frequently JTx; _ -> - case erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) of + case get_metadata_version(Tx) of Version -> update_md_version_timestamp(Version), JTx; diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl index a7e085e40..62a75c83e 100644 --- a/src/couch_jobs/test/couch_jobs_tests.erl +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -54,7 +54,8 @@ couch_jobs_basic_test_() -> fun enqueue_inactive/1, fun remove_running_job/1, fun check_get_jobs/1, - fun use_fabric_transaction_object/1 + fun use_fabric_transaction_object/1, + fun metadata_version_bump/1 ] } } @@ -604,3 +605,22 @@ use_fabric_transaction_object(#{t1 := T1, j1 := J1, dbname := DbName}) -> ok = couch_jobs:remove(#{tx => undefined}, T1, J1), ok = fabric2_db:delete(DbName, []) end). + + +metadata_version_bump(_) -> + ?_test(begin + JTx1 = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(Tx) -> Tx end), + ?assertMatch(#{md_version := not_found}, JTx1), + + ets:delete_all_objects(couch_jobs_fdb), + couch_jobs_fdb:bump_metadata_version(), + JTx2 = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(Tx) -> Tx end), + ?assertMatch(#{md_version := Bin} when is_binary(Bin), JTx2), + + ets:delete_all_objects(couch_jobs_fdb), + couch_jobs_fdb:bump_metadata_version(), + JTx3 = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(Tx) -> Tx end), + OldMdv = maps:get(md_version, JTx2), + NewMdv = maps:get(md_version, JTx3), + ?assert(NewMdv > OldMdv) + end). -- cgit v1.2.1 From c95d8c15584ce240f8d498a8faca2af5893bc06e Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 20 Feb 2020 22:06:43 -0800 Subject: Improve validate_dbname test It's possible for other couch_epi plugins to interfere with this test, so mock `couch_epi:decide/5` to always return `no_decision`. --- src/fabric/test/fabric2_db_misc_tests.erl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index c48ee2c73..42a63e2f9 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -141,7 +141,15 @@ validate_dbname(_) -> Expect = {error, {Reason, DbName}}, ?assertEqual(Expect, fabric2_db:validate_dbname(DbName)) end, - lists:foreach(CheckFun, Tests). + try + % Don't allow epi plugins to interfere with test results + meck:new(couch_epi, [passthrough]), + meck:expect(couch_epi, decide, 5, no_decision), + lists:foreach(CheckFun, Tests) + after + % Unload within the test to minimize interference with other tests + meck:unload() + end. validate_doc_ids(_) -> -- cgit v1.2.1 From aa3b738537f1203ab25025a0d3c179f7d74be185 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 21 Feb 2020 10:59:03 -0500 Subject: Add 410 status code to stats_descriptions We started to emit that in CouchDB 4.x for temporary views and possibly other endpoints. --- src/couch/priv/stats_descriptions.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg index 7c8fd94cb..fd6468ffa 100644 --- a/src/couch/priv/stats_descriptions.cfg +++ b/src/couch/priv/stats_descriptions.cfg @@ -230,6 +230,10 @@ {type, counter}, {desc, <<"number of HTTP 409 Conflict responses">>} ]}. +{[couchdb, httpd_status_codes, 410], [ + {type, counter}, + {desc, <<"number of HTTP 410 Gone responses">>} +]}. {[couchdb, httpd_status_codes, 412], [ {type, counter}, {desc, <<"number of HTTP 412 Precondition Failed responses">>} -- cgit v1.2.1 From 2e13cffa24042f54ed4b7cd522020c7afc0df48f Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 21 Feb 2020 09:25:49 -0800 Subject: Support setting base_url in Couch test helper (take 2) The reason why previous attempt failed is because it overrode [important logic in `process_url/2`](https://github.com/myfreeweb/httpotion/blob/v3.1.2/lib/httpotion.ex#L34:L35): ``` def process_url(url, options) do process_url(url) |> prepend_protocol |> append_query_string(options) end ``` This PR fixes the problem by adding the `prepend_protocol` and `append_query_string`. It also refactor the way base_url is passed around. --- src/chttpd/test/exunit/tracing_test.exs | 2 +- test/elixir/lib/couch.ex | 44 ++++++++++++++++++++++----------- test/elixir/test/auth_cache_test.exs | 2 +- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/chttpd/test/exunit/tracing_test.exs b/src/chttpd/test/exunit/tracing_test.exs index b50ef936e..f66fb87a2 100644 --- a/src/chttpd/test/exunit/tracing_test.exs +++ b/src/chttpd/test/exunit/tracing_test.exs @@ -29,7 +29,7 @@ defmodule Couch.Test.OpenTracing do setup context do db_name = Utils.random_name("db") - session = Couch.login(context.base_url, context.user, context.pass) + session = Couch.login(context.user, context.pass, base_url: context.base_url) on_exit(fn -> delete_db(session, db_name) diff --git a/test/elixir/lib/couch.ex b/test/elixir/lib/couch.ex index 3aef07f01..ed5862331 100644 --- a/test/elixir/lib/couch.ex +++ b/test/elixir/lib/couch.ex @@ -3,7 +3,7 @@ defmodule Couch.Session do CouchDB session helpers. """ - defstruct [:cookie, :error] + defstruct [:cookie, :error, :base_url] def new(cookie, error \\ "") do %Couch.Session{cookie: cookie, error: error} @@ -42,12 +42,12 @@ defmodule Couch.Session do # if the need arises. def go(%Couch.Session{} = sess, method, url, opts) do - opts = Keyword.merge(opts, cookie: sess.cookie) + opts = Keyword.merge(opts, cookie: sess.cookie, base_url: sess.base_url) Couch.request(method, url, opts) end def go!(%Couch.Session{} = sess, method, url, opts) do - opts = Keyword.merge(opts, cookie: sess.cookie) + opts = Keyword.merge(opts, cookie: sess.cookie, base_url: sess.base_url) Couch.request!(method, url, opts) end end @@ -71,9 +71,10 @@ defmodule Couch do url end - def process_url(url) do - base_url = System.get_env("EX_COUCH_URL") || "http://127.0.0.1:15984" - base_url <> url + def process_url(url, options) do + (Keyword.get(options, :base_url) <> url) + |> prepend_protocol + |> append_query_string(options) end def process_request_headers(headers, _body, options) do @@ -96,10 +97,13 @@ defmodule Couch do end def process_options(options) do + base_url = System.get_env("EX_COUCH_URL") || "http://127.0.0.1:15984" + options = Keyword.put_new(options, :base_url, base_url) + options - |> set_auth_options() - |> set_inactivity_timeout() - |> set_request_timeout() + |> set_auth_options() + |> set_inactivity_timeout() + |> set_request_timeout() end def process_request_body(body) do @@ -161,17 +165,29 @@ defmodule Couch do login(user, pass) end - def login(user, pass, expect \\ :success) do - resp = Couch.post("/_session", body: %{:username => user, :password => pass}) + def login(user, pass, options \\ []) do + options = options |> Enum.into(%{}) + + base_url = + Map.get_lazy(options, :base_url, fn -> + System.get_env("EX_COUCH_URL") || "http://127.0.0.1:15984" + end) + + resp = + Couch.post( + "/_session", + body: %{:username => user, :password => pass}, + base_url: base_url + ) - if expect == :success do + if Map.get(options, :expect, :success) == :success do true = resp.body["ok"] cookie = resp.headers[:"set-cookie"] [token | _] = String.split(cookie, ";") - %Couch.Session{cookie: token} + %Couch.Session{cookie: token, base_url: base_url} else true = Map.has_key?(resp.body, "error") - %Couch.Session{error: resp.body["error"]} + %Couch.Session{error: resp.body["error"], base_url: base_url} end end end diff --git a/test/elixir/test/auth_cache_test.exs b/test/elixir/test/auth_cache_test.exs index 2ba396de7..5582b2f96 100644 --- a/test/elixir/test/auth_cache_test.exs +++ b/test/elixir/test/auth_cache_test.exs @@ -56,7 +56,7 @@ defmodule AuthCacheTest do end defp login_fail(user, password) do - resp = Couch.login(user, password, :fail) + resp = Couch.login(user, password, expect: :fail) assert resp.error, "Login error is expected." end -- cgit v1.2.1 From 47b6b3378221ed52a71469826bd87eca98010fc0 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 21 Feb 2020 21:49:14 -0500 Subject: Do not use the ddoc cache to load _changes filter design documents Since we started re-using the main changes feed transaction to open the docs https://github.com/apache/couchdb/commit/1bceb552594af404961e4ab8e6f88cffa1548f69, we also started to pass the transactional db handle to the ddoc cache. However ddoc cache uses a `spawn_monitor` to open docs, and since our transaction objects are tied to the owner process the open was failing with a `badarg` error from erlfdb. This could be seen by running the replication elixir tests: ``` make elixir tests=test/elixir/test/replication_test.exs:214 ``` --- src/chttpd/src/chttpd_changes.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index b2084fab6..3a13f81e4 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -309,7 +309,7 @@ check_fields(_Fields) -> open_ddoc(Db, DDocId) -> - case ddoc_cache:open_doc(Db, DDocId) of + case fabric2_db:open_doc(Db, DDocId, [ejson_body, ?ADMIN_CTX]) of {ok, _} = Resp -> Resp; Else -> throw(Else) end. -- cgit v1.2.1 From 42062b6b2de1e00f41a9ba528653d2f06fcc62f9 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 18 Feb 2020 21:40:32 -0800 Subject: Add fdb_to_revinfo compatibility with 5-tuple Val Currently, a 5-tuple `Val` parameter crashes fdb_to_revinfo with a function_clause error. This adds a compatible function clause, and also a catchall clause to log an error and throw an informative exception. --- src/fabric/src/fabric2_fdb.erl | 26 +++++++++++++++++++++- src/fabric/test/fabric2_db_misc_tests.erl | 37 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index e51b8de5d..c18122533 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -38,6 +38,8 @@ incr_stat/3, incr_stat/4, + fdb_to_revinfo/2, + get_all_revs/2, get_winning_revs/3, get_winning_revs_future/3, @@ -1253,6 +1255,21 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) -> rev_size => RevSize }; +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) -> + {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, + {_RevFormat, Sequence, RevSize, RevPath, AttHash} = Val, + #{ + winner => true, + exists => true, + deleted => not NotDeleted, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + sequence => Sequence, + branch_count => undefined, + att_hash => AttHash, + rev_size => RevSize + }; + fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, {_RevFormat, RevPath, AttHash, RevSize} = Val, @@ -1282,7 +1299,14 @@ fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) -> fdb_to_revinfo(Key, {1, RPath, AttHash}) -> Val = {?CURR_REV_FORMAT, RPath, AttHash, 0}, - fdb_to_revinfo(Key, Val). + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, Val) -> + couch_log:error( + "~p:fdb_to_revinfo unsupported val format " + "rev_format=~p key_size=~p val_size=~p", + [?MODULE, element(1, Val), tuple_size(Key), tuple_size(Val)]), + throw({unsupported_data_format, fdb_to_revinfo_val}). doc_to_fdb(Db, #doc{} = Doc) -> diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 42a63e2f9..7c88b4ac4 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -333,3 +333,40 @@ db_version_bump({DbName, _, _}) -> % Check that db handle in the cache got the new metadata version ?assertMatch(#{db_version := NewDbVersion}, Db2). + + +fdb_to_revinfo_test_() -> + { + "Test fdb_to_revinfo compatibility", + { + setup, + fun() -> ok end, + fun(_) -> ok end, + with([ + ?TDEF(fdb_to_revinfo) + ]) + } + }. + + +fdb_to_revinfo(_) -> + Sequence = {versionstamp, 10873034897377, 0, 0}, + Rev = <<60,84,174,140,210,120,192,18,100,148,9,181,129,165,248,92>>, + Key = {20, <<"d491280e-feab-42ce-909e-a7287d7b078-bluemix">>, true, 1, Rev}, + FiveTupleVal = {2, Sequence, 1, {}, <<>>}, + Expect = #{ + att_hash => <<>>, + branch_count => undefined, + deleted => false, + exists => true, + rev_id => + {1, <<60,84,174,140,210,120,192,18,100,148,9,181,129,165,248,92>>}, + rev_path => [], + rev_size => 1, + sequence => Sequence, + winner => true + }, + ?assertEqual(Expect, fabric2_fdb:fdb_to_revinfo(Key, FiveTupleVal)), + ?assertThrow({unsupported_data_format, fdb_to_revinfo_val}, + fabric2_fdb:fdb_to_revinfo({bad}, {input})), + ok. -- cgit v1.2.1 From d2a670bb4affe965dc806e6f7dac4adffd0648bf Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 20 Feb 2020 01:41:33 -0800 Subject: Fix bug in fdb_to_revinfo compatibility clauses A bug was introduced in https://github.com/apache/couchdb/commit/eb1a09e114dafc55fa9511d477b9ada4350134eb#diff-32274bcb4785f432811085d2e08c3580L1227 when `CURR_REV_FORMAT` was bumped from `1->2`, but `?CURR_REV_FORMAT` in compatibility clauses going from format version `0->1` were left unchanged, when those values of `?CURR_REV_FORMAT` should have changed to `1`. This fixes the compatibility clauses for rev format versions from `0->1` --- src/fabric/src/fabric2_fdb.erl | 32 +++++--------------------- src/fabric/test/fabric2_db_misc_tests.erl | 37 ------------------------------- 2 files changed, 5 insertions(+), 64 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index c18122533..ed4371a55 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -38,8 +38,6 @@ incr_stat/3, incr_stat/4, - fdb_to_revinfo/2, - get_all_revs/2, get_winning_revs/3, get_winning_revs_future/3, @@ -1255,21 +1253,6 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) -> rev_size => RevSize }; -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) -> - {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, Sequence, RevSize, RevPath, AttHash} = Val, - #{ - winner => true, - exists => true, - deleted => not NotDeleted, - rev_id => {RevPos, Rev}, - rev_path => tuple_to_list(RevPath), - sequence => Sequence, - branch_count => undefined, - att_hash => AttHash, - rev_size => RevSize - }; - fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, {_RevFormat, RevPath, AttHash, RevSize} = Val, @@ -1286,27 +1269,22 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> }; fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> - Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, <<>>}, + Val = {1, Seq, BCount, RPath, <<>>}, fdb_to_revinfo(Key, Val); fdb_to_revinfo(Key, {0, RPath}) -> - Val = {?CURR_REV_FORMAT, RPath, <<>>}, + Val = {1, RPath, <<>>}, fdb_to_revinfo(Key, Val); fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) -> + % Don't forget to change ?CURR_REV_FORMAT to 2 here when it increments Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, AttHash, 0}, fdb_to_revinfo(Key, Val); fdb_to_revinfo(Key, {1, RPath, AttHash}) -> + % Don't forget to change ?CURR_REV_FORMAT to 2 here when it increments Val = {?CURR_REV_FORMAT, RPath, AttHash, 0}, - fdb_to_revinfo(Key, Val); - -fdb_to_revinfo(Key, Val) -> - couch_log:error( - "~p:fdb_to_revinfo unsupported val format " - "rev_format=~p key_size=~p val_size=~p", - [?MODULE, element(1, Val), tuple_size(Key), tuple_size(Val)]), - throw({unsupported_data_format, fdb_to_revinfo_val}). + fdb_to_revinfo(Key, Val). doc_to_fdb(Db, #doc{} = Doc) -> diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 7c88b4ac4..42a63e2f9 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -333,40 +333,3 @@ db_version_bump({DbName, _, _}) -> % Check that db handle in the cache got the new metadata version ?assertMatch(#{db_version := NewDbVersion}, Db2). - - -fdb_to_revinfo_test_() -> - { - "Test fdb_to_revinfo compatibility", - { - setup, - fun() -> ok end, - fun(_) -> ok end, - with([ - ?TDEF(fdb_to_revinfo) - ]) - } - }. - - -fdb_to_revinfo(_) -> - Sequence = {versionstamp, 10873034897377, 0, 0}, - Rev = <<60,84,174,140,210,120,192,18,100,148,9,181,129,165,248,92>>, - Key = {20, <<"d491280e-feab-42ce-909e-a7287d7b078-bluemix">>, true, 1, Rev}, - FiveTupleVal = {2, Sequence, 1, {}, <<>>}, - Expect = #{ - att_hash => <<>>, - branch_count => undefined, - deleted => false, - exists => true, - rev_id => - {1, <<60,84,174,140,210,120,192,18,100,148,9,181,129,165,248,92>>}, - rev_path => [], - rev_size => 1, - sequence => Sequence, - winner => true - }, - ?assertEqual(Expect, fabric2_fdb:fdb_to_revinfo(Key, FiveTupleVal)), - ?assertThrow({unsupported_data_format, fdb_to_revinfo_val}, - fabric2_fdb:fdb_to_revinfo({bad}, {input})), - ok. -- cgit v1.2.1 From a35af0f4716dc2e7a2e49cca4f5c4e33516c8b82 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Thu, 20 Feb 2020 15:09:49 -0800 Subject: Add fdb_to_revinfo version compatibility unit test This test adds coverage for the 4 fdb_to_revinfo compatibility clauses, and will help ensure any additional future clauses will not break backwards compatibility. Module coverage without this test: fabric2_fdb : 92% Module coverage with this test: fabric2_fdb : 94% --- src/fabric/src/fabric2_fdb.erl | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index ed4371a55..c34b33cbc 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -1746,3 +1746,47 @@ with_span(Operation, ExtraTags, Fun) -> false -> Fun() end. + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +fdb_to_revinfo_version_compatibility_test() -> + DocId = <<"doc_id">>, + FirstRevFormat = 0, + RevPos = 1, + Rev = <<60,84,174,140,210,120,192,18,100,148,9,181,129,165,248,92>>, + RevPath = {}, + NotDeleted = true, + Sequence = {versionstamp, 10873034897377, 0, 0}, + BranchCount = 1, + + KeyWinner = {?DB_REVS, DocId, NotDeleted, RevPos, Rev}, + ValWinner = {FirstRevFormat, Sequence, BranchCount, RevPath}, + ExpectedWinner = expected( + true, BranchCount, NotDeleted, RevPos, Rev, RevPath, Sequence), + ?assertEqual(ExpectedWinner, fdb_to_revinfo(KeyWinner, ValWinner)), + + KeyLoser = {?DB_REVS, DocId, NotDeleted, RevPos, Rev}, + ValLoser = {FirstRevFormat, RevPath}, + ExpectedLoser = expected( + false, undefined, NotDeleted, RevPos, Rev, RevPath, undefined), + ?assertEqual(ExpectedLoser, fdb_to_revinfo(KeyLoser, ValLoser)), + ok. + + +expected(Winner, BranchCount, NotDeleted, RevPos, Rev, RevPath, Sequence) -> + #{ + att_hash => <<>>, + branch_count => BranchCount, + deleted => not NotDeleted, + exists => true, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + rev_size => 0, + sequence => Sequence, + winner => Winner + }. + + +-endif. -- cgit v1.2.1 From f30c83c49240c9042b31ef275a5166a08c5f36f7 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 2 Mar 2020 09:15:28 -0600 Subject: Fix catching of Jiffy error exceptions The Reason term in the exception is no longer wrapped with an `{error, Reason}` tuple since its now an `error` exception. --- src/couch_jobs/src/couch_jobs_fdb.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index a81a313d8..e59387ee1 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -400,7 +400,7 @@ encode_data(#{} = JobData) -> throw:{error, Error} -> % legacy clause since new versions of jiffy raise error instead error({json_encoding_error, Error}); - error:{error, Error} -> + error:Error -> error({json_encoding_error, Error}) end. -- cgit v1.2.1 From bdd0578b998e1fec2ae2345d6aa85350b24f899b Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 2 Mar 2020 09:31:53 -0600 Subject: Temporary shortcut to running the fdb test suite --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index b5f6441ee..2f5df90b9 100644 --- a/Makefile +++ b/Makefile @@ -158,6 +158,11 @@ else subdirs=$(shell ls src) endif +.PHONY: check-fdb +check-fdb: + make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric + make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs + .PHONY: eunit # target: eunit - Run EUnit tests, use EUNIT_OPTS to provide custom options eunit: export BUILDDIR = $(shell pwd) -- cgit v1.2.1 From 86ec2f3be092f6b6c1a4aa5757c87ea32da28d9c Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 2 Mar 2020 22:35:46 +0100 Subject: Port _design_docs tests, design_options and design_paths from js to elixir (#2596) * Port _design_docs tests from js to elixir * Port design_options and design_paths tests from js to elixir --- test/elixir/README.md | 5 +- test/elixir/test/design_docs_query_test.exs | 273 ++++++++++++++++++++++++++++ test/elixir/test/design_docs_test.exs | 108 ----------- test/elixir/test/design_options_test.exs | 74 ++++++++ test/elixir/test/design_paths_test.exs | 76 ++++++++ test/javascript/tests/design_docs_query.js | 2 + test/javascript/tests/design_options.js | 3 +- test/javascript/tests/design_paths.js | 1 + 8 files changed, 431 insertions(+), 111 deletions(-) create mode 100644 test/elixir/test/design_docs_query_test.exs delete mode 100644 test/elixir/test/design_docs_test.exs create mode 100644 test/elixir/test/design_options_test.exs create mode 100644 test/elixir/test/design_paths_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index ee087c0b8..1fc0ce630 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -43,8 +43,9 @@ X means done, - means partially - [X] Port cookie_auth.js - [X] Port copy_doc.js - [ ] Port design_docs.js - - [ ] Port design_options.js - - [ ] Port design_paths.js + - [X] Port design_docs_query.js + - [X] Port design_options.js + - [X] Port design_paths.js - [X] Port erlang_views.js - [X] Port etags_head.js - [ ] ~~Port etags_views.js~~ (skipped in js test suite) diff --git a/test/elixir/test/design_docs_query_test.exs b/test/elixir/test/design_docs_query_test.exs new file mode 100644 index 000000000..b439a2e02 --- /dev/null +++ b/test/elixir/test/design_docs_query_test.exs @@ -0,0 +1,273 @@ +defmodule DesignDocsQueryTest do + use CouchTestCase + + @moduletag :design_docs + + @moduledoc """ + Test CouchDB /{db}/_design_docs + """ + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + bulk_save(db_name, make_docs(1..5)) + + Enum.each(1..5, fn x -> create_ddoc(db_name, x) end) + + {:ok, [db_name: db_name]} + end + + defp create_ddoc(db_name, idx) do + ddoc = %{ + _id: "_design/ddoc0#{idx}", + views: %{ + testing: %{ + map: "function(){emit(1,1)}" + } + } + } + + create_doc(db_name, ddoc) + end + + test "query _design_docs (GET with no parameters)", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs") + assert resp.status_code == 200, "standard get should be 200" + assert resp.body["total_rows"] == 5, "total_rows mismatch" + assert length(resp.body["rows"]) == 5, "amount of rows mismatch" + end + + test "query _design_docs with single key", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?key=\"_design/ddoc03\"") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 1, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 0)["key"] == "_design/ddoc03" + end + + test "query _design_docs with multiple key", context do + resp = + Couch.get( + "/#{context[:db_name]}/_design_docs", + query: %{ + :keys => "[\"_design/ddoc02\", \"_design/ddoc03\"]" + } + ) + + assert resp.status_code == 200 + assert length(Map.get(resp, :body)["rows"]) == 2 + end + + test "POST with empty body", context do + resp = + Couch.post( + "/#{context[:db_name]}/_design_docs", + body: %{} + ) + + assert resp.status_code == 200 + assert length(Map.get(resp, :body)["rows"]) == 5 + end + + test "POST with keys and limit", context do + resp = + Couch.post( + "/#{context[:db_name]}/_design_docs", + body: %{ + :keys => ["_design/ddoc02", "_design/ddoc03"], + :limit => 1 + } + ) + + assert resp.status_code == 200 + assert length(Map.get(resp, :body)["rows"]) == 1 + end + + test "POST with query parameter and JSON body", context do + resp = + Couch.post( + "/#{context[:db_name]}/_design_docs", + query: %{ + :limit => 1 + }, + body: %{ + :keys => ["_design/ddoc02", "_design/ddoc03"] + } + ) + + assert resp.status_code == 200 + assert length(Map.get(resp, :body)["rows"]) == 1 + end + + test "POST edge case with colliding parameters - query takes precedence", context do + resp = + Couch.post( + "/#{context[:db_name]}/_design_docs", + query: %{ + :limit => 0 + }, + body: %{ + :keys => ["_design/ddoc02", "_design/ddoc03"], + :limit => 2 + } + ) + + assert resp.status_code == 200 + assert Enum.empty?(Map.get(resp, :body)["rows"]) + end + + test "query _design_docs descending=true", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?descending=true") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 5, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 0)["key"] == "_design/ddoc05" + end + + test "query _design_docs descending=false", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?descending=false") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 5, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 0)["key"] == "_design/ddoc01" + end + + test "query _design_docs end_key", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?end_key=\"_design/ddoc03\"") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 3, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 2)["key"] == "_design/ddoc03" + end + + test "query _design_docs endkey", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?endkey=\"_design/ddoc03\"") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 3, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 2)["key"] == "_design/ddoc03" + end + + test "query _design_docs start_key", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?start_key=\"_design/ddoc03\"") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 3, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 0)["key"] == "_design/ddoc03" + end + + test "query _design_docs startkey", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design_docs?startkey=\"_design/ddoc03\"") + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 3, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 0)["key"] == "_design/ddoc03" + end + + test "query _design_docs end_key inclusive_end=true", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design_docs", + query: [end_key: "\"_design/ddoc03\"", inclusive_end: true] + ) + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 3, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 2)["key"] == "_design/ddoc03" + end + + test "query _design_docs end_key inclusive_end=false", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design_docs", + query: [end_key: "\"_design/ddoc03\"", inclusive_end: false] + ) + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 2, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 1)["key"] == "_design/ddoc02" + end + + test "query _design_docs end_key inclusive_end=false descending", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design_docs", + query: [end_key: "\"_design/ddoc03\"", inclusive_end: false, descending: true] + ) + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 2, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 1)["key"] == "_design/ddoc04" + end + + test "query _design_docs end_key limit", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design_docs", + query: [end_key: "\"_design/ddoc05\"", limit: 2] + ) + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 2, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 1)["key"] == "_design/ddoc02" + end + + test "query _design_docs end_key skip", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design_docs", + query: [end_key: "\"_design/ddoc05\"", skip: 2] + ) + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 3, "amount of rows mismatch" + assert Enum.at(resp.body["rows"], 0)["key"] == "_design/ddoc03" + assert Enum.at(resp.body["rows"], 2)["key"] == "_design/ddoc05" + end + + test "query _design_docs update_seq", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design_docs", + query: [end_key: "\"_design/ddoc05\"", update_seq: true] + ) + + assert resp.status_code == 200, "standard get should be 200" + assert Map.has_key?(resp.body, "update_seq") + end + + test "query _design_docs post with keys", context do + db_name = context[:db_name] + + resp = + Couch.post("/#{db_name}/_design_docs", + headers: ["Content-Type": "application/json"], + body: %{keys: ["_design/ddoc02", "_design/ddoc03"]} + ) + + keys = + resp.body["rows"] + |> Enum.map(fn p -> p["key"] end) + + assert resp.status_code == 200, "standard get should be 200" + assert length(resp.body["rows"]) == 2, "amount of rows mismatch" + assert Enum.member?(keys, "_design/ddoc03") + assert Enum.member?(keys, "_design/ddoc02") + end +end diff --git a/test/elixir/test/design_docs_test.exs b/test/elixir/test/design_docs_test.exs deleted file mode 100644 index ed0a0dfb5..000000000 --- a/test/elixir/test/design_docs_test.exs +++ /dev/null @@ -1,108 +0,0 @@ -defmodule DesignDocsTest do - use CouchTestCase - - @moduletag :design_docs - - @moduledoc """ - Test CouchDB /{db}/_design_docs - """ - - setup_all do - db_name = random_db_name() - {:ok, _} = create_db(db_name) - on_exit(fn -> delete_db(db_name) end) - - {:ok, _} = create_doc( - db_name, - %{ - _id: "_design/foo", - bar: "baz" - } - ) - - {:ok, _} = create_doc( - db_name, - %{ - _id: "_design/foo2", - bar: "baz2" - } - ) - - {:ok, [db_name: db_name]} - end - - test "GET with no parameters", context do - resp = Couch.get( - "/#{context[:db_name]}/_design_docs" - ) - - assert resp.status_code == 200 - assert length(Map.get(resp, :body)["rows"]) == 2 - end - - test "GET with multiple keys", context do - resp = Couch.get( - "/#{context[:db_name]}/_design_docs", - query: %{ - :keys => "[\"_design/foo\", \"_design/foo2\"]", - } - ) - - assert resp.status_code == 200 - assert length(Map.get(resp, :body)["rows"]) == 2 - end - - test "POST with empty body", context do - resp = Couch.post( - "/#{context[:db_name]}/_design_docs", - body: %{} - ) - - assert resp.status_code == 200 - assert length(Map.get(resp, :body)["rows"]) == 2 - end - - test "POST with keys and limit", context do - resp = Couch.post( - "/#{context[:db_name]}/_design_docs", - body: %{ - :keys => ["_design/foo", "_design/foo2"], - :limit => 1 - } - ) - - assert resp.status_code == 200 - assert length(Map.get(resp, :body)["rows"]) == 1 - end - - test "POST with query parameter and JSON body", context do - resp = Couch.post( - "/#{context[:db_name]}/_design_docs", - query: %{ - :limit => 1 - }, - body: %{ - :keys => ["_design/foo", "_design/foo2"] - } - ) - - assert resp.status_code == 200 - assert length(Map.get(resp, :body)["rows"]) == 1 - end - - test "POST edge case with colliding parameters - query takes precedence", context do - resp = Couch.post( - "/#{context[:db_name]}/_design_docs", - query: %{ - :limit => 0 - }, - body: %{ - :keys => ["_design/foo", "_design/foo2"], - :limit => 2 - } - ) - - assert resp.status_code == 200 - assert Enum.empty?(Map.get(resp, :body)["rows"]) - end -end diff --git a/test/elixir/test/design_options_test.exs b/test/elixir/test/design_options_test.exs new file mode 100644 index 000000000..95a938e38 --- /dev/null +++ b/test/elixir/test/design_options_test.exs @@ -0,0 +1,74 @@ +defmodule DesignOptionsTest do + use CouchTestCase + + @moduletag :design_docs + + @moduledoc """ + Test CouchDB design documents options include_design and local_seq + """ + @tag :with_db + test "design doc options - include_desing=true", context do + db_name = context[:db_name] + + create_test_view(db_name, "_design/fu", %{include_design: true}) + + resp = Couch.get("/#{db_name}/_design/fu/_view/data") + assert resp.status_code == 200 + assert length(Map.get(resp, :body)["rows"]) == 1 + assert Enum.at(resp.body["rows"], 0)["value"] == "_design/fu" + end + + @tag :with_db + test "design doc options - include_desing=false", context do + db_name = context[:db_name] + + create_test_view(db_name, "_design/bingo", %{include_design: false}) + + resp = Couch.get("/#{db_name}/_design/bingo/_view/data") + assert resp.status_code == 200 + assert Enum.empty?(Map.get(resp, :body)["rows"]) + end + + @tag :with_db + test "design doc options - include_design default value", context do + db_name = context[:db_name] + + create_test_view(db_name, "_design/bango", %{}) + + resp = Couch.get("/#{db_name}/_design/bango/_view/data") + assert resp.status_code == 200 + assert Enum.empty?(Map.get(resp, :body)["rows"]) + end + + @tag :with_db + test "design doc options - local_seq=true", context do + db_name = context[:db_name] + + create_test_view(db_name, "_design/fu", %{include_design: true, local_seq: true}) + create_doc(db_name, %{}) + resp = Couch.get("/#{db_name}/_design/fu/_view/with_seq") + + row_with_key = + resp.body["rows"] + |> Enum.filter(fn p -> p["key"] != :null end) + + assert length(row_with_key) == 2 + end + + defp create_test_view(db_name, id, options) do + map = "function (doc) {emit(null, doc._id);}" + withseq = "function(doc) {emit(doc._local_seq, null)}" + + design_doc = %{ + _id: id, + language: "javascript", + options: options, + views: %{ + data: %{map: map}, + with_seq: %{map: withseq} + } + } + + create_doc(db_name, design_doc) + end +end diff --git a/test/elixir/test/design_paths_test.exs b/test/elixir/test/design_paths_test.exs new file mode 100644 index 000000000..b3e10c165 --- /dev/null +++ b/test/elixir/test/design_paths_test.exs @@ -0,0 +1,76 @@ +defmodule DesignPathTest do + use CouchTestCase + + @moduletag :design_docs + + @moduledoc """ + Test CouchDB design documents path + """ + @tag :with_db + test "design doc path", context do + db_name = context[:db_name] + ddoc_path_test(db_name) + end + + @tag :with_db_name + test "design doc path with slash in db name", context do + db_name = URI.encode_www_form(context[:db_name] <> "/with_slashes") + create_db(db_name) + ddoc_path_test(db_name) + end + + defp ddoc_path_test(db_name) do + create_test_view(db_name, "_design/test") + + resp = Couch.get("/#{db_name}/_design/test") + assert resp.body["_id"] == "_design/test" + + resp = + Couch.get(Couch.process_url("/#{db_name}/_design%2Ftest"), + follow_redirects: true + ) + + assert resp.body["_id"] == "_design/test" + + resp = Couch.get("/#{db_name}/_design/test/_view/testing") + assert Enum.empty?(Map.get(resp, :body)["rows"]) + + design_doc2 = %{ + _id: "_design/test2", + views: %{ + testing: %{ + map: "function(){emit(1,1)}" + } + } + } + + resp = Couch.put("/#{db_name}/_design/test2", body: design_doc2) + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/_design/test2") + assert resp.body["_id"] == "_design/test2" + + resp = + Couch.get(Couch.process_url("/#{db_name}/_design%2Ftest2"), + follow_redirects: true + ) + + assert resp.body["_id"] == "_design/test2" + + resp = Couch.get("/#{db_name}/_design/test2/_view/testing") + assert Enum.empty?(Map.get(resp, :body)["rows"]) + end + + defp create_test_view(db_name, id) do + design_doc = %{ + _id: id, + views: %{ + testing: %{ + map: "function(){emit(1,1)}" + } + } + } + + create_doc(db_name, design_doc) + end +end diff --git a/test/javascript/tests/design_docs_query.js b/test/javascript/tests/design_docs_query.js index 07e6577ab..2aefe49b4 100644 --- a/test/javascript/tests/design_docs_query.js +++ b/test/javascript/tests/design_docs_query.js @@ -11,6 +11,8 @@ // the License. couchTests.design_docs_query = function(debug) { + return console.log('done in test/elixir/test/design_docs_query_test.exs'); + var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); db.createDb(); diff --git a/test/javascript/tests/design_options.js b/test/javascript/tests/design_options.js index cc2571f6b..d3f8594d4 100644 --- a/test/javascript/tests/design_options.js +++ b/test/javascript/tests/design_options.js @@ -11,6 +11,7 @@ // the License. couchTests.design_options = function(debug) { + return console.log('done in test/elixir/test/design_options.exs'); var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); db.createDb(); @@ -36,7 +37,7 @@ couchTests.design_options = function(debug) { T(db.save(designDoc).ok); // should work for temp views - // no more there on cluster - pointless test + // no more there on cluster - pointless test //var rows = db.query(map, null, {options:{include_design: true}}).rows; //T(rows.length == 1); //T(rows[0].value == "_design/fu"); diff --git a/test/javascript/tests/design_paths.js b/test/javascript/tests/design_paths.js index 6e816991a..b85426acf 100644 --- a/test/javascript/tests/design_paths.js +++ b/test/javascript/tests/design_paths.js @@ -11,6 +11,7 @@ // the License. couchTests.design_paths = function(debug) { + return console.log('done in test/elixir/test/design_paths.exs'); if (debug) debugger; var db_name = get_random_db_name() var dbNames = [db_name, db_name + "/with_slashes"]; -- cgit v1.2.1 From c9175af16b267fc3ec43859a5a2aba101ffb148d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 3 Mar 2020 14:41:46 -0500 Subject: Fix couch_views type in wait_for_job view_sig -> view_seq --- src/couch_views/src/couch_views_jobs.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 7e0ac9765..937146ce8 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -53,7 +53,7 @@ wait_for_job(JobId, UpdateSeq) -> wait_for_job(JobId, Subscription, UpdateSeq); {ok, finished, Data} -> case Data of - #{<<"view_sig">> := ViewSeq} when ViewSeq >= UpdateSeq -> + #{<<"view_seq">> := ViewSeq} when ViewSeq >= UpdateSeq -> ok; _ -> retry -- cgit v1.2.1 From ec3cf2000c3bbf9e7340480def66f35b43d86ca1 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Fri, 21 Feb 2020 13:51:42 -0800 Subject: Clean up mango test dbs After mango python tests are run, a bunch of dbs are typically left around, e.g. `mango_test_048b290b574d4039981893097ab71912` This deletes those test dbs after they are no longer in use. (cherry picked from commit e05e3cdc8d16d88e7c7af8fbcc4b671b81ac2693) --- src/mango/test/mango.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index 03cb85f48..1212b732f 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -309,6 +309,10 @@ class DbPerClass(unittest.TestCase): klass.db = Database(random_db_name()) klass.db.create(q=1, n=1) + @classmethod + def tearDownClass(klass): + klass.db.delete() + def setUp(self): self.db = self.__class__.db -- cgit v1.2.1 From deca8686107b2c15bfd507c092b318d985833d7b Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Fri, 21 Feb 2020 22:46:50 -0800 Subject: Clean up mango test user docs Tests based on class `UsersDbTests` don't clean up the user docs it puts in the `_users` db. This uses the classmethod `tearDownClass` to delete those docs. (cherry picked from commit 3d559eb14fd709662d3eb5cda8afe9a45687c3b1) --- src/mango/test/mango.py | 4 ++++ src/mango/test/user_docs.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index 1212b732f..e78160f57 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -299,6 +299,10 @@ class UsersDbTests(unittest.TestCase): klass.db = Database("_users") user_docs.setup_users(klass.db) + @classmethod + def tearDownClass(klass): + user_docs.teardown_users(klass.db) + def setUp(self): self.db = self.__class__.db diff --git a/src/mango/test/user_docs.py b/src/mango/test/user_docs.py index 8f0ed2e04..316ca7841 100644 --- a/src/mango/test/user_docs.py +++ b/src/mango/test/user_docs.py @@ -59,6 +59,10 @@ def setup_users(db, **kwargs): db.save_docs(copy.deepcopy(USERS_DOCS)) +def teardown_users(db): + [db.delete_doc(doc['_id']) for doc in USERS_DOCS] + + def setup(db, index_type="view", **kwargs): db.recreate() db.save_docs(copy.deepcopy(DOCS)) -- cgit v1.2.1 From 6ec8c714c1c46379e3df2ccce0ca8990e764c8eb Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 25 Feb 2020 22:06:19 -0800 Subject: Add coverage to Mango eunit tests --- src/mango/rebar.config | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/mango/rebar.config diff --git a/src/mango/rebar.config b/src/mango/rebar.config new file mode 100644 index 000000000..e0d18443b --- /dev/null +++ b/src/mango/rebar.config @@ -0,0 +1,2 @@ +{cover_enabled, true}. +{cover_print_enabled, true}. -- cgit v1.2.1 From db3aa0f53bb132a4988772f02c7b30152e12328b Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 3 Mar 2020 15:53:20 -0800 Subject: Improve mango markdown This primarily wraps long lines and removes trailing whitespace in the README.md and TODO.md files. In `test/README.md`, it updates the default admin username and password used by `dev/run`. --- src/mango/README.md | 328 +++++++++++++++++++++++++++++++++++------------ src/mango/TODO.md | 19 ++- src/mango/test/README.md | 17 ++- 3 files changed, 274 insertions(+), 90 deletions(-) diff --git a/src/mango/README.md b/src/mango/README.md index 4c4bb60a6..7cec1af35 100644 --- a/src/mango/README.md +++ b/src/mango/README.md @@ -7,18 +7,37 @@ A MongoDB inspired query language interface for Apache CouchDB. Motivation ---------- -Mango provides a single HTTP API endpoint that accepts JSON bodies via HTTP POST. These bodies provide a set of instructions that will be handled with the results being returned to the client in the same order as they were specified. The general principle of this API is to be simple to implement on the client side while providing users a more natural conversion to Apache CouchDB than would otherwise exist using the standard RESTful HTTP interface that already exists. +Mango provides a single HTTP API endpoint that accepts JSON bodies via +HTTP POST. These bodies provide a set of instructions that will be +handled with the results being returned to the client in the same +order as they were specified. The general principle of this API is to +be simple to implement on the client side while providing users a more +natural conversion to Apache CouchDB than would otherwise exist using +the standard RESTful HTTP interface that already exists. Actions ------- -The general API exposes a set of actions that are similar to what MongoDB exposes (although not all of MongoDB's API is supported). These are meant to be loosely and obviously inspired by MongoDB but without too much attention to maintaining the exact behavior. - -Each action is specified as a JSON object with a number of keys that affect the behavior. Each action object has at least one field named "action" which must -have a string value indicating the action to be performed. For each action there are zero or more fields that will affect behavior. Some of these fields are required and some are optional. - -For convenience, the HTTP API will accept a JSON body that is either a single JSON object which specifies a single action or a JSON array that specifies a list of actions that will then be invoked serially. While multiple commands can be batched into a single HTTP request, there are no guarantees about atomicity or isolation for a batch of commands. +The general API exposes a set of actions that are similar to what +MongoDB exposes (although not all of MongoDB's API is +supported). These are meant to be loosely and obviously inspired by +MongoDB but without too much attention to maintaining the exact +behavior. + +Each action is specified as a JSON object with a number of keys that +affect the behavior. Each action object has at least one field named +"action" which must have a string value indicating the action to be +performed. For each action there are zero or more fields that will +affect behavior. Some of these fields are required and some are +optional. + +For convenience, the HTTP API will accept a JSON body that is either a +single JSON object which specifies a single action or a JSON array +that specifies a list of actions that will then be invoked +serially. While multiple commands can be batched into a single HTTP +request, there are no guarantees about atomicity or isolation for a +batch of commands. Activating Query on a cluster -------------------------------------------- @@ -32,24 +51,36 @@ rpc:multicall(config, set, ["native_query_servers", "query", "{mango_native_proc HTTP API ======== -This API adds a single URI endpoint to the existing CouchDB HTTP API. Creating databases, authentication, Map/Reduce views, etc are all still supported exactly as currently document. No existing behavior is changed. +This API adds a single URI endpoint to the existing CouchDB HTTP +API. Creating databases, authentication, Map/Reduce views, etc are all +still supported exactly as currently document. No existing behavior is +changed. -The endpoint added is for the URL pattern `/dbname/_query` and has the following characteristics: +The endpoint added is for the URL pattern `/dbname/_query` and has the +following characteristics: * The only HTTP method supported is `POST`. * The request `Content-Type` must be `application/json`. * The response status code will either be `200`, `4XX`, or `5XX` * The response `Content-Type` will be `application/json` * The response `Transfer-Encoding` will be `chunked`. -* The response is a single JSON object or array that matches to the single command or list of commands that exist in the request. +* The response is a single JSON object or array that matches to the + single command or list of commands that exist in the request. -This is intended to be a significantly simpler use of HTTP than the current APIs. This is motivated by the fact that this entire API is aimed at customers who are not as savvy at HTTP or non-relational document stores. Once a customer is comfortable using this API we hope to expose any other "power features" through the existing HTTP API and its adherence to HTTP semantics. +This is intended to be a significantly simpler use of HTTP than the +current APIs. This is motivated by the fact that this entire API is +aimed at customers who are not as savvy at HTTP or non-relational +document stores. Once a customer is comfortable using this API we hope +to expose any other "power features" through the existing HTTP API and +its adherence to HTTP semantics. Supported Actions ================= -This is a list of supported actions that Mango understands. For the time being it is limited to the four normal CRUD actions plus one meta action to create indices on the database. +This is a list of supported actions that Mango understands. For the +time being it is limited to the four normal CRUD actions plus one meta +action to create indices on the database. insert ------ @@ -62,9 +93,15 @@ Keys: * docs - The JSON document to insert * w (optional) (default: 2) - An integer > 0 for the write quorum size -If the provided document or documents do not contain an "\_id" field one will be added using an automatically generated UUID. +If the provided document or documents do not contain an "\_id" field +one will be added using an automatically generated UUID. -It is more performant to specify multiple documents in the "docs" field than it is to specify multiple independent insert actions. Each insert action is submitted as a single bulk update (ie, \_bulk\_docs in CouchDB terminology). This, however, does not make any guarantees on the isolation or atomicity of the bulk operation. It is merely a performance benefit. +It is more performant to specify multiple documents in the "docs" +field than it is to specify multiple independent insert actions. Each +insert action is submitted as a single bulk update (ie, \_bulk\_docs +in CouchDB terminology). This, however, does not make any guarantees +on the isolation or atomicity of the bulk operation. It is merely a +performance benefit. find @@ -76,18 +113,41 @@ Keys: * action - "find" * selector - JSON object following selector syntax, described below -* limit (optional) (default: 25) - integer >= 0, Limit the number of rows returned -* skip (optional) (default: 0) - integer >= 0, Skip the specified number of rows -* sort (optional) (default: []) - JSON array following sort syntax, described below -* fields (optional) (default: null) - JSON array following the field syntax, described below -* r (optional) (default: 1) - By default a find will return the document that was found when traversing the index. Optionally there can be a quorum read for each document using `r` as the read quorum. This is obviously less performant than using the document local to the index. -* conflicts (optional) (default: false) - boolean, whether or not to include information about any existing conflicts for the document. - -The important thing to note about the find command is that it must execute over a generated index. If a selector is provided that cannot be satisfied using an existing index the list of basic indices that could be used will be returned. - -For the most part, indices are generated in response to the "create\_index" action (described below) although there are two special indices that can be used as well. The "\_id" is automatically indexed and is similar to every other index. There is also a special "\_seq" index to retrieve documents in the order of their update sequence. - -Its also quite possible to generate a query that can't be satisfied by any index. In this case an error will be returned stating that fact. Generally speaking the easiest way to stumble onto this is to attempt to OR two separate fields which would require a complete table scan. In the future I expect to support these more complicated queries using an extended indexing API (which deviates from the current MongoDB model a bit). +* limit (optional) (default: 25) - integer >= 0, Limit the number of + rows returned +* skip (optional) (default: 0) - integer >= 0, Skip the specified + number of rows +* sort (optional) (default: []) - JSON array following sort syntax, + described below +* fields (optional) (default: null) - JSON array following the field + syntax, described below +* r (optional) (default: 1) - By default a find will return the + document that was found when traversing the index. Optionally there + can be a quorum read for each document using `r` as the read + quorum. This is obviously less performant than using the document + local to the index. +* conflicts (optional) (default: false) - boolean, whether or not to + include information about any existing conflicts for the document. + +The important thing to note about the find command is that it must +execute over a generated index. If a selector is provided that cannot +be satisfied using an existing index the list of basic indices that +could be used will be returned. + +For the most part, indices are generated in response to the +"create\_index" action (described below) although there are two +special indices that can be used as well. The "\_id" is automatically +indexed and is similar to every other index. There is also a special +"\_seq" index to retrieve documents in the order of their update +sequence. + +Its also quite possible to generate a query that can't be satisfied by +any index. In this case an error will be returned stating that +fact. Generally speaking the easiest way to stumble onto this is to +attempt to OR two separate fields which would require a complete table +scan. In the future I expect to support these more complicated queries +using an extended indexing API (which deviates from the current +MongoDB model a bit). update @@ -100,15 +160,24 @@ Keys: * action - "update" * selector - JSON object following selector syntax, described below * update - JSON object following update syntax, described below -* upsert - (optional) (default: false) - boolean, Whether or not to create a new document if the selector does not match any documents in the database -* limit (optional) (default: 1) - integer > 0, How many documents returned from the selector should be modified. Currently has a maximum value of 100 -* sort - (optional) (default: []) - JSON array following sort syntax, described below +* upsert - (optional) (default: false) - boolean, Whether or not to + create a new document if the selector does not match any documents + in the database +* limit (optional) (default: 1) - integer > 0, How many documents + returned from the selector should be modified. Currently has a + maximum value of 100 +* sort - (optional) (default: []) - JSON array following sort syntax, + described below * r (optional) (default: 1) - integer > 0, read quorum constant * w (optional) (default: 2) - integer > 0, write quorum constant -Updates are fairly straightforward other than to mention that the selector (like find) must be satisifiable using an existing index. +Updates are fairly straightforward other than to mention that the +selector (like find) must be satisifiable using an existing index. -On the update field, if the provided JSON object has one or more update operator (described below) then the operation is applied onto the existing document (if one exists) else the entire contents are replaced with exactly the value of the `update` field. +On the update field, if the provided JSON object has one or more +update operator (described below) then the operation is applied onto +the existing document (if one exists) else the entire contents are +replaced with exactly the value of the `update` field. delete @@ -120,15 +189,24 @@ Keys: * action - "delete" * selector - JSON object following selector syntax, described below -* force (optional) (default: false) - Delete all conflicted versions of the document as well -* limit - (optional) (default: 1) - integer > 0, How many documents to delete from the database. Currently has a maximum value of 100 -* sort - (optional) (default: []) - JSON array following sort syntax, described below +* force (optional) (default: false) - Delete all conflicted versions + of the document as well +* limit - (optional) (default: 1) - integer > 0, How many documents to + delete from the database. Currently has a maximum value of 100 +* sort - (optional) (default: []) - JSON array following sort syntax, + described below * r (optional) (default: 1) - integer > 1, read quorum constant * w (optional) (default: 2) - integer > 0, write quorum constant -Deletes behave quite similarly to update except they attempt to remove documents from the database. Its important to note that if a document has conflicts it may "appear" that delete's aren't having an effect. This is because the delete operation by default only removes a single revision. Specify `"force":true` if you would like to attempt to delete all live revisions. +Deletes behave quite similarly to update except they attempt to remove +documents from the database. Its important to note that if a document +has conflicts it may "appear" that delete's aren't having an +effect. This is because the delete operation by default only removes a +single revision. Specify `"force":true` if you would like to attempt +to delete all live revisions. -If you wish to delete a specific revision of the document, you can specify it in the selector using the special "\_rev" field. +If you wish to delete a specific revision of the document, you can +specify it in the selector using the special "\_rev" field. create\_index @@ -140,17 +218,43 @@ Keys: * action - "create\_index" * index - JSON array following sort syntax, described below -* type (optional) (default: "json") - string, specifying the index type to create. Currently only "json" indexes are supported but in the future we will provide full-text indexes as well as Geo spatial indexes -* name (optional) - string, optionally specify a name for the index. If a name is not provided one will be automatically generated -* ddoc (optional) - Indexes can be grouped into design documents underneath the hood for efficiency. This is an advanced feature. Don't specify a design document here unless you know the consequences of index invalidation. By default each index is placed in its own separate design document for isolation. - -Anytime an operation is required to locate a document in the database it is required that an index must exist that can be used to locate it. By default the only two indices that exist are for the document "\_id" and the special "\_seq" index. - -Indices are created in the background. If you attempt to create an index on a large database and then immediately utilize it, the request may block for a considerable amount of time before the request completes. - -Indices can specify multiple fields to index simultaneously. This is roughly analogous to a compound index in SQL with the corresponding tradeoffs. For instance, an index may contain the (ordered set of) fields "foo", "bar", and "baz". If a selector specifying "bar" is received, it can not be answered. Although if a selector specifying "foo" and "bar" is received, it can be answered more efficiently than if there were only an index on "foo" and "bar" independently. - -NB: while the index allows the ability to specify sort directions these are currently not supported. The sort direction must currently be specified as "asc" in the JSON. [INTERNAL]: This will require that we patch the view engine as well as the cluster coordinators in Fabric to follow the specified sort orders. The concepts are straightforward but the implementation may need some thought to fit into the current shape of things. +* type (optional) (default: "json") - string, specifying the index + type to create. Currently only "json" indexes are supported but in + the future we will provide full-text indexes as well as Geo spatial + indexes +* name (optional) - string, optionally specify a name for the + index. If a name is not provided one will be automatically generated +* ddoc (optional) - Indexes can be grouped into design documents + underneath the hood for efficiency. This is an advanced + feature. Don't specify a design document here unless you know the + consequences of index invalidation. By default each index is placed + in its own separate design document for isolation. + +Anytime an operation is required to locate a document in the database +it is required that an index must exist that can be used to locate +it. By default the only two indices that exist are for the document +"\_id" and the special "\_seq" index. + +Indices are created in the background. If you attempt to create an +index on a large database and then immediately utilize it, the request +may block for a considerable amount of time before the request +completes. + +Indices can specify multiple fields to index simultaneously. This is +roughly analogous to a compound index in SQL with the corresponding +tradeoffs. For instance, an index may contain the (ordered set of) +fields "foo", "bar", and "baz". If a selector specifying "bar" is +received, it can not be answered. Although if a selector specifying +"foo" and "bar" is received, it can be answered more efficiently than +if there were only an index on "foo" and "bar" independently. + +NB: while the index allows the ability to specify sort directions +these are currently not supported. The sort direction must currently +be specified as "asc" in the JSON. [INTERNAL]: This will require that +we patch the view engine as well as the cluster coordinators in Fabric +to follow the specified sort orders. The concepts are straightforward +but the implementation may need some thought to fit into the current +shape of things. list\_indexes @@ -172,9 +276,13 @@ Keys: * action - "delete\_index" * name - string, the index to delete -* design\_doc - string, the design doc id from which to delete the index. For auto-generated index names and design docs, you can retrieve this information from the `list\_indexes` action +* design\_doc - string, the design doc id from which to delete the + index. For auto-generated index names and design docs, you can + retrieve this information from the `list\_indexes` action -Indexes require resources to maintain. If you find that an index is no longer necessary then it can be beneficial to remove it from the database. +Indexes require resources to maintain. If you find that an index is no +longer necessary then it can be beneficial to remove it from the +database. describe\_selector @@ -186,36 +294,55 @@ Keys: * action - "describe\_selector" * selector - JSON object in selector syntax, described below -* extended (optional) (default: false) - Show information on what existing indexes could be used with this selector +* extended (optional) (default: false) - Show information on what + existing indexes could be used with this selector -This is a useful debugging utility that will show how a given selector is normalized before execution as well as information on what indexes could be used to satisfy it. +This is a useful debugging utility that will show how a given selector +is normalized before execution as well as information on what indexes +could be used to satisfy it. -If `"extended": true` is included then the list of existing indices that could be used for this selector are also returned. +If `"extended": true` is included then the list of existing indices +that could be used for this selector are also returned. JSON Syntax Descriptions ======================== -This API uses a few defined JSON structures for various operations. Here we'll describe each in detail. +This API uses a few defined JSON structures for various +operations. Here we'll describe each in detail. Selector Syntax --------------- -The Mango query language is expressed as a JSON object describing documents of interest. Within this structure it is also possible to express conditional logic using specially named fields. This is inspired by and intended to maintain a fairly close parity to the existing MongoDB behavior. +The Mango query language is expressed as a JSON object describing +documents of interest. Within this structure it is also possible to +express conditional logic using specially named fields. This is +inspired by and intended to maintain a fairly close parity to the +existing MongoDB behavior. As an example, the simplest selector for Mango might look something like such: +```json {"_id": "Paul"} +``` -Which would match the document named "Paul" (if one exists). Extending this example using other fields might look like such: +Which would match the document named "Paul" (if one exists). Extending +this example using other fields might look like such: +```json {"_id": "Paul", "location": "Boston"} +``` -This would match a document named "Paul" *AND* having a "location" value of "Boston". Seeing as though I'm sitting in my basement in Omaha, this is unlikely. +This would match a document named "Paul" *AND* having a "location" +value of "Boston". Seeing as though I'm sitting in my basement in +Omaha, this is unlikely. -There are two special syntax elements for the object keys in a selector. The first is that the period (full stop, or simply `.`) character denotes subfields in a document. For instance, here are two equivalent examples: +There are two special syntax elements for the object keys in a +selector. The first is that the period (full stop, or simply `.`) +character denotes subfields in a document. For instance, here are two +equivalent examples: {"location": {"city": "Omaha"}} {"location.city": "Omaha"} @@ -224,26 +351,36 @@ If the object's key contains the period it could be escaped with backslash, i.e. {"location\\.city": "Omaha"} -Note that the double backslash here is necessary to encode an actual single backslash. +Note that the double backslash here is necessary to encode an actual +single backslash. -The second important syntax element is the use of a dollar sign (`$`) prefix to denote operators. For example: +The second important syntax element is the use of a dollar sign (`$`) +prefix to denote operators. For example: {"age": {"$gt": 21}} In this example, we have created the boolean expression `age > 21`. -There are two core types of operators in the selector syntax: combination operators and condition operators. In general, combination operators contain groups of condition operators. We'll describe the list of each below. +There are two core types of operators in the selector syntax: +combination operators and condition operators. In general, combination +operators contain groups of condition operators. We'll describe the +list of each below. ### Implicit Operators -For the most part every operator must be of the form `{"$operator": argument}`. Though there are two implicit operators for selectors. +For the most part every operator must be of the form `{"$operator": +argument}`. Though there are two implicit operators for selectors. -First, any JSON object that is not the argument to a condition operator is an implicit `$and` operator on each field. For instance, these two examples are identical: +First, any JSON object that is not the argument to a condition +operator is an implicit `$and` operator on each field. For instance, +these two examples are identical: {"foo": "bar", "baz": true} {"$and": [{"foo": {"$eq": "bar"}}, {"baz": {"$eq": true}}]} -And as shown, any field that contains a JSON value that has no operators in it is an equality condition. For instance, these are equivalent: +And as shown, any field that contains a JSON value that has no +operators in it is an equality condition. For instance, these are +equivalent: {"foo": "bar"} {"foo": {"$eq": "bar"}} @@ -260,9 +397,12 @@ Although, the previous example would actually be normalized internally to this: ### Combination Operators -These operators are responsible for combining groups of condition operators. Most familiar are the standard boolean operators plus a few extra for working with JSON arrays. +These operators are responsible for combining groups of condition +operators. Most familiar are the standard boolean operators plus a few +extra for working with JSON arrays. -Each of the combining operators take a single argument that is either a condition operator or an array of condition operators. +Each of the combining operators take a single argument that is either +a condition operator or an array of condition operators. The list of combining characters: @@ -276,7 +416,13 @@ The list of combining characters: ### Condition Operators -Condition operators are specified on a per field basis and apply to the value indexed for that field. For instance, the basic "$eq" operator matches when the indexed field is equal to its argument. There is currently support for the basic equality and inequality operators as well as a number of meta operators. Some of these operators will accept any JSON argument while some require a specific JSON formatted argument. Each is noted below. +Condition operators are specified on a per field basis and apply to +the value indexed for that field. For instance, the basic "$eq" +operator matches when the indexed field is equal to its +argument. There is currently support for the basic equality and +inequality operators as well as a number of meta operators. Some of +these operators will accept any JSON argument while some require a +specific JSON formatted argument. Each is noted below. The list of conditional arguments: @@ -291,19 +437,28 @@ The list of conditional arguments: Object related operators -* "$exists" - boolean, check whether the field exists or not regardless of its value +* "$exists" - boolean, check whether the field exists or not + regardless of its value * "$type" - string, check the document field's type Array related operators -* "$in" - array of JSON values, the document field must exist in the list provided -* "$nin" - array of JSON values, the document field must not exist in the list provided -* "$size" - integer, special condition to match the length of an array field in a document. Non-array fields cannot match this condition. +* "$in" - array of JSON values, the document field must exist in the + list provided +* "$nin" - array of JSON values, the document field must not exist in + the list provided +* "$size" - integer, special condition to match the length of an array + field in a document. Non-array fields cannot match this condition. Misc related operators -* "$mod" - [Divisor, Remainder], where Divisor and Remainder are both positive integers (ie, greater than 0). Matches documents where (field % Divisor == Remainder) is true. This is false for any non-integer field -* "$regex" - string, a regular expression pattern to match against the document field. Only matches when the field is a string value and matches the supplied matches +* "$mod" - [Divisor, Remainder], where Divisor and Remainder are both + positive integers (ie, greater than 0). Matches documents where + (field % Divisor == Remainder) is true. This is false for any + non-integer field +* "$regex" - string, a regular expression pattern to match against the + document field. Only matches when the field is a string value and + matches the supplied matches Update Syntax @@ -315,19 +470,30 @@ Need to describe the syntax for update operators. Sort Syntax ----------- -The sort syntax is a basic array of field name and direction pairs. It looks like such: +The sort syntax is a basic array of field name and direction pairs. It +looks like such: [{field1: dir1} | ...] -Where field1 can be any field (dotted notation is available for sub-document fields) and dir1 can be "asc" or "desc". +Where field1 can be any field (dotted notation is available for +sub-document fields) and dir1 can be "asc" or "desc". -Note that it is highly recommended that you specify a single key per object in your sort ordering so that the order is not dependent on the combination of JSON libraries between your application and the internals of Mango's indexing engine. +Note that it is highly recommended that you specify a single key per +object in your sort ordering so that the order is not dependent on the +combination of JSON libraries between your application and the +internals of Mango's indexing engine. Fields Syntax ------------- -When retrieving documents from the database you can specify that only a subset of the fields are returned. This allows you to limit your results strictly to the parts of the document that are interesting for the local application logic. The fields returned are specified as an array. Unlike MongoDB only the fields specified are included, there is no automatic inclusion of the "\_id" or other metadata fields when a field list is included. +When retrieving documents from the database you can specify that only +a subset of the fields are returned. This allows you to limit your +results strictly to the parts of the document that are interesting for +the local application logic. The fields returned are specified as an +array. Unlike MongoDB only the fields specified are included, there is +no automatic inclusion of the "\_id" or other metadata fields when a +field list is included. A trivial example: @@ -344,16 +510,20 @@ POST /dbname/\_find Issue a query. -Request body is a JSON object that has the selector and the various options like limit/skip etc. Or we could post the selector and put the other options into the query string. Though I'd probably prefer to have it all in the body for consistency. +Request body is a JSON object that has the selector and the various +options like limit/skip etc. Or we could post the selector and put the +other options into the query string. Though I'd probably prefer to +have it all in the body for consistency. -Response is streamed out like a view. +Response is streamed out like a view. POST /dbname/\_index -------------------------- Request body contains the index definition. -Response body is empty and the result is returned as the status code (200 OK -> created, 3something for exists). +Response body is empty and the result is returned as the status code +(200 OK -> created, 3something for exists). GET /dbname/\_index ------------------------- diff --git a/src/mango/TODO.md b/src/mango/TODO.md index ce2d85f3d..95055dd06 100644 --- a/src/mango/TODO.md +++ b/src/mango/TODO.md @@ -1,9 +1,18 @@ -* Patch the view engine to do alternative sorts. This will include both the lower level couch\_view* modules as well as the fabric coordinators. +* Patch the view engine to do alternative sorts. This will include + both the lower level couch\_view* modules as well as the fabric + coordinators. -* Patch the view engine so we can specify options when returning docs from cursors. We'll want this so that we can delete specific revisions from a document. +* Patch the view engine so we can specify options when returning docs + from cursors. We'll want this so that we can delete specific + revisions from a document. -* Need to figure out how to do raw collation on some indices because at -least the _id index uses it forcefully. +* Need to figure out how to do raw collation on some indices because + at least the _id index uses it forcefully. -* Add lots more to the update API. Mongo appears to be missing some pretty obvious easy functionality here. Things like managing values doing things like multiplying numbers, or common string mutations would be obvious examples. Also it could be interesting to add to the language so that you can do conditional updates based on other document attributes. Definitely not a V1 endeavor. \ No newline at end of file +* Add lots more to the update API. Mongo appears to be missing some + pretty obvious easy functionality here. Things like managing values + doing things like multiplying numbers, or common string mutations + would be obvious examples. Also it could be interesting to add to + the language so that you can do conditional updates based on other + document attributes. Definitely not a V1 endeavor. diff --git a/src/mango/test/README.md b/src/mango/test/README.md index 509e32e47..9eae278b1 100644 --- a/src/mango/test/README.md +++ b/src/mango/test/README.md @@ -11,7 +11,7 @@ To run these, do this in the Mango top level directory: $ venv/bin/nosetests To run an individual test suite: - nosetests --nocapture test/12-use-correct-index.py + nosetests --nocapture test/12-use-correct-index.py To run the tests with text index support: MANGO_TEXT_INDEXES=1 nosetests --nocapture test @@ -22,8 +22,13 @@ Test configuration The following environment variables can be used to configure the test fixtures: - * `COUCH_HOST` - root url (including port) of the CouchDB instance to run the tests against. Default is `"http://127.0.0.1:15984"`. - * `COUCH_USER` - CouchDB username (with admin premissions). Default is `"testuser"`. - * `COUCH_PASSWORD` - CouchDB password. Default is `"testpass"`. - * `COUCH_AUTH_HEADER` - Optional Authorization header value. If specified, this is used instead of basic authentication with the username/password variables above. - * `MANGO_TEXT_INDEXES` - Set to `"1"` to run the tests only applicable to text indexes. + * `COUCH_HOST` - root url (including port) of the CouchDB instance to + run the tests against. Default is `"http://127.0.0.1:15984"`. + * `COUCH_USER` - CouchDB username (with admin premissions). Default + is `"adm"`. + * `COUCH_PASSWORD` - CouchDB password. Default is `"pass"`. + * `COUCH_AUTH_HEADER` - Optional Authorization header value. If + specified, this is used instead of basic authentication with the + username/password variables above. + * `MANGO_TEXT_INDEXES` - Set to `"1"` to run the tests only + applicable to text indexes. -- cgit v1.2.1 From ee150f4251e00e88c00dc38b387c7ee4482f0bf9 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 25 Feb 2020 10:39:45 +0200 Subject: standardize couch_views_indexer_tests --- src/couch_views/test/couch_views_indexer_test.erl | 27 ++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 20ad0dc6b..3070cc0a1 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -16,10 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). - - --define(I_HEART_EUNIT(Tests), [{with, [T]} || T <- Tests]). - +-include_lib("fabric/test/fabric2_test.hrl"). indexer_test_() -> { @@ -32,17 +29,17 @@ indexer_test_() -> foreach, fun foreach_setup/0, fun foreach_teardown/1, - ?I_HEART_EUNIT([ - fun indexed_empty_db/1, - fun indexed_single_doc/1, - fun updated_docs_are_reindexed/1, - fun updated_docs_without_changes_are_reindexed/1, - fun deleted_docs_not_indexed/1, - fun deleted_docs_are_unindexed/1, - fun multipe_docs_with_same_key/1, - fun multipe_keys_from_same_doc/1, - fun multipe_identical_keys_from_same_doc/1 - ]) + [ + with([?TDEF(indexed_empty_db)]), + with([?TDEF(indexed_single_doc)]), + with([?TDEF(updated_docs_are_reindexed)]), + with([?TDEF(updated_docs_without_changes_are_reindexed)]), + with([?TDEF(deleted_docs_not_indexed)]), + with([?TDEF(deleted_docs_are_unindexed)]), + with([?TDEF(multipe_docs_with_same_key)]), + with([?TDEF(multipe_keys_from_same_doc)]), + with([?TDEF(multipe_identical_keys_from_same_doc)]) + ] } } }. -- cgit v1.2.1 From 4bd68d1e03653a0b44ee69818be8e39b511d110d Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 20 Jan 2020 12:08:06 +0200 Subject: Add Key/Value size limit for map indexes --- rel/overlay/etc/default.ini | 4 + src/couch_views/src/couch_views_fdb.erl | 104 +++++++++++---- src/couch_views/src/couch_views_indexer.erl | 4 +- src/couch_views/test/couch_views_indexer_test.erl | 154 ++++++++++++++++++++-- 4 files changed, 227 insertions(+), 39 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index f2a81875c..4c978b29c 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -272,6 +272,10 @@ iterations = 10 ; iterations for password hashing ; Settings for view indexing [couch_views] ; max_workers = 100 +; The maximum allowed key size emitted from a view for a document (in bytes) +; key_size_limit = 8000 +; The maximum allowed value size emitted from a view for a document (in bytes) +; value_size_limit = 64000 ; CSP (Content Security Policy) Support for _utils [csp] diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 5edaa3a5f..47196f7dc 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -32,6 +32,8 @@ -define(LIST_VALUE, 0). -define(JSON_VALUE, 1). -define(VALUE, 2). +-define(MAX_KEY_SIZE_LIMIT, 8000). +-define(MAX_VALUE_SIZE_LIMIT, 64000). -include("couch_views.hrl"). @@ -107,7 +109,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. -write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> +write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> #{ id := DocId } = Doc, @@ -115,13 +117,11 @@ write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> - clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys), - update_kv_size(TxDb, Sig, ViewId, -TotalSize) + lists:foreach(fun(ExistingViewKey) -> + remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) end, ExistingViewKeys); -write_doc(TxDb, Sig, ViewIds, Doc) -> +write_doc(TxDb, Sig, Views, Doc) -> #{ id := DocId, results := Results @@ -130,26 +130,54 @@ write_doc(TxDb, Sig, ViewIds, Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - - lists:foreach(fun({ViewId, NewRows}) -> - update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), - - ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, TotalSize, EKeys} -> - RowChange = length(NewRows) - TotalRows, - SizeChange = calculate_row_size(NewRows) - TotalSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - EKeys; - false -> - RowChange = length(NewRows), - SizeChange = calculate_row_size(NewRows), - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - [] - end, - update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) - end, lists:zip(ViewIds, Results)). + lists:foreach(fun({View, NewRows}) -> + #mrview{ + map_names = MNames, + id_num = ViewId + } = View, + + try + NewRowSize = calculate_row_size(NewRows), + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), + + ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + SizeChange = NewRowSize - TotalSize, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), + EKeys; + false -> + RowChange = length(NewRows), + SizeChange = NewRowSize, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), + [] + end, + update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) + catch + throw:{size_exceeded, Type} -> + case lists:keyfind(ViewId, 1, ExistingViewKeys) of + false -> + ok; + ExistingViewKey -> + remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) + end, + #{ + name := DbName + } = TxDb, + couch_log:error("Db `~s` Doc `~s` exceeded the ~s size " + "for view `~s` and was not indexed.", + [DbName, DocId, Type, MNames]) + end + end, lists:zip(Views, Results)). + + +remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize, + UniqueKeys}) -> + clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize). % For each row in a map view we store the the key/value @@ -352,6 +380,28 @@ process_rows(Rows) -> calculate_row_size(Rows) -> + KeyLimit = key_size_limit(), + ValLimit = value_size_limit(), + lists:foldl(fun({K, V}, Acc) -> - Acc + erlang:external_size(K) + erlang:external_size(V) + KeySize = erlang:external_size(K), + ValSize = erlang:external_size(V), + + if KeySize =< KeyLimit -> ok; true -> + throw({size_exceeded, key}) + end, + + if ValSize =< ValLimit -> ok; true -> + throw({size_exceeded, value}) + end, + + Acc + KeySize + ValSize end, 0, Rows). + + +key_size_limit() -> + config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT). + + +value_size_limit() -> + config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 31cd8e6f1..0a57a70ee 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -297,10 +297,8 @@ write_docs(TxDb, Mrst, Docs, State) -> last_seq := LastSeq } = State, - ViewIds = [View#mrview.id_num || View <- Views], - lists:foreach(fun(Doc) -> - couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc) + couch_views_fdb:write_doc(TxDb, Sig, Views, Doc) end, Docs), couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 3070cc0a1..17adc42ec 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -30,15 +30,17 @@ indexer_test_() -> fun foreach_setup/0, fun foreach_teardown/1, [ - with([?TDEF(indexed_empty_db)]), - with([?TDEF(indexed_single_doc)]), - with([?TDEF(updated_docs_are_reindexed)]), - with([?TDEF(updated_docs_without_changes_are_reindexed)]), - with([?TDEF(deleted_docs_not_indexed)]), - with([?TDEF(deleted_docs_are_unindexed)]), - with([?TDEF(multipe_docs_with_same_key)]), - with([?TDEF(multipe_keys_from_same_doc)]), - with([?TDEF(multipe_identical_keys_from_same_doc)]) + ?TDEF_FE(indexed_empty_db), + ?TDEF_FE(indexed_single_doc), + ?TDEF_FE(updated_docs_are_reindexed), + ?TDEF_FE(updated_docs_without_changes_are_reindexed), + ?TDEF_FE(deleted_docs_not_indexed), + ?TDEF_FE(deleted_docs_are_unindexed), + ?TDEF_FE(multipe_docs_with_same_key), + ?TDEF_FE(multipe_keys_from_same_doc), + ?TDEF_FE(multipe_identical_keys_from_same_doc), + ?TDEF_FE(handle_size_key_limits), + ?TDEF_FE(handle_size_value_limits) ] } } @@ -65,6 +67,7 @@ foreach_setup() -> foreach_teardown(Db) -> + meck:unload(), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -385,6 +388,113 @@ multipe_identical_keys_from_same_doc(Db) -> ], Out). +handle_size_key_limits(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> + case Section == "couch_views" andalso Key == "key_size_limit" of + true -> 15; + _ -> Default + end + end), + + DDoc = create_ddoc(multi_emit_key_limit), + Docs = [doc(1)] ++ [doc(2)], + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"1">>, 1, 1) + ], Out), + + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + Doc2 = Doc#doc { + body = {[{<<"val">>,3}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"1">>, 1, 1), + row(<<"2">>, 3, 3) + ], Out1). + + +handle_size_value_limits(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun(Section, _, Default) -> + case Section of + "couch_views" -> 15; + _ -> Default + end + end), + + DDoc = create_ddoc(multi_emit_key_limit), + Docs = [doc(1, 2)] ++ [doc(2, 3)], + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun2">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"1">>, 2, 2), + row(<<"2">>, 3, 3), + row(<<"1">>, 22, 2), + row(<<"2">>, 23, 3) + ], Out), + + + {ok, Doc} = fabric2_db:open_doc(Db, <<"1">>), + Doc2 = Doc#doc { + body = {[{<<"val">>,1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun2">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"2">>, 3, 3), + row(<<"2">>, 23, 3) + ], Out1). + + +row(Id, Key, Value) -> + {row, [ + {id, Id}, + {key, Key}, + {value, Value} + ]}. + fold_fun({meta, _Meta}, Acc) -> {ok, Acc}; fold_fun({row, _} = Row, Acc) -> @@ -440,6 +550,32 @@ create_ddoc(multi_emit_same) -> {<<"map">>, <<"function(doc) {}">>} ]}} ]}} + ]}); + +create_ddoc(multi_emit_key_limit) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) { " + "if (doc.val === 2) { " + "emit('a very long string to be limited', doc.val);" + "} else {" + "emit(doc.val, doc.val)" + "}" + "}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val + 20, doc.val);" + "if (doc.val === 1) { " + "emit(doc.val, 'a very long string to be limited');" + "} else {" + "emit(doc.val, doc.val)" + "}" + "}">>} + ]}} + ]}} ]}). -- cgit v1.2.1 From 735b3f67b31c55753b618f14a60c644dc088fdda Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Tue, 3 Mar 2020 22:14:55 -0800 Subject: Enable fdb feature --- src/fabric/src/fabric2_sup.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl index 402474c32..2510b13bb 100644 --- a/src/fabric/src/fabric2_sup.erl +++ b/src/fabric/src/fabric2_sup.erl @@ -29,6 +29,7 @@ start_link(Args) -> init([]) -> + config:enable_feature(fdb), Flags = {one_for_one, 1, 5}, Children = [ { -- cgit v1.2.1 From 2fe1666c590d98298e2b57b079d47394e74a9bca Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 4 Mar 2020 15:28:23 -0600 Subject: Clean up view size limit enforcement Both a performance and style cleanup. This reduces the number of ets calls by roughly `2 * 2 * $num_docs * $num_views`. The first factor of two is because this avoids re-calculating the size twice for both the id and map indexes. The second factor of two is because we have to lookup two different settings. Stylistically this also moves the logic out of the fdb modules. Keeping key/value structure and update logic is already pretty complicated so we should avoid mixing external application logic into those modules as much as possible for our sanity. The behavior is slightly changed vs the original patch as well. Originally only the view that contained the errant key or value was affected. However, pre-existing behavior where a document fails to be mapped correctly resulted in it being excluded from all view indexes defined in the design document. That behavior has been restored. --- src/couch_views/src/couch_views_fdb.erl | 114 ++++++---------------- src/couch_views/src/couch_views_indexer.erl | 54 +++++++++- src/couch_views/test/couch_views_indexer_test.erl | 12 +-- 3 files changed, 86 insertions(+), 94 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 47196f7dc..f68dafc41 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -32,8 +32,6 @@ -define(LIST_VALUE, 0). -define(JSON_VALUE, 1). -define(VALUE, 2). --define(MAX_KEY_SIZE_LIMIT, 8000). --define(MAX_VALUE_SIZE_LIMIT, 64000). -include("couch_views.hrl"). @@ -109,7 +107,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. -write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> +write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> #{ id := DocId } = Doc, @@ -117,67 +115,40 @@ write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun(ExistingViewKey) -> - remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) + lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> + clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize) end, ExistingViewKeys); -write_doc(TxDb, Sig, Views, Doc) -> +write_doc(TxDb, Sig, ViewIds, Doc) -> #{ id := DocId, - results := Results + results := Results, + kv_sizes := KVSizes } = Doc, ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({View, NewRows}) -> - #mrview{ - map_names = MNames, - id_num = ViewId - } = View, - - try - NewRowSize = calculate_row_size(NewRows), - update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), - - ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, TotalSize, EKeys} -> - RowChange = length(NewRows) - TotalRows, - SizeChange = NewRowSize - TotalSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - EKeys; - false -> - RowChange = length(NewRows), - SizeChange = NewRowSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - [] - end, - update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) - catch - throw:{size_exceeded, Type} -> - case lists:keyfind(ViewId, 1, ExistingViewKeys) of - false -> - ok; - ExistingViewKey -> - remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) - end, - #{ - name := DbName - } = TxDb, - couch_log:error("Db `~s` Doc `~s` exceeded the ~s size " - "for view `~s` and was not indexed.", - [DbName, DocId, Type, MNames]) - end - end, lists:zip(Views, Results)). - - -remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize, - UniqueKeys}) -> - clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys), - update_kv_size(TxDb, Sig, ViewId, -TotalSize). + + lists:foreach(fun({ViewId, NewRows, KVSize}) -> + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize), + + ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize), + EKeys; + false -> + RowChange = length(NewRows), + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, KVSize), + [] + end, + update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) + end, lists:zip3(ViewIds, Results, KVSizes)). % For each row in a map view we store the the key/value @@ -234,7 +205,7 @@ clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) -> end, ViewKeys). -update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> +update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) -> #{ tx := Tx, db_prefix := DbPrefix @@ -243,8 +214,7 @@ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> Unique = lists:usort([K || {K, _V} <- NewRows]), Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), - RowSize = calculate_row_size(NewRows), - Val = couch_views_encoding:encode([length(NewRows), RowSize, Unique]), + Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]), ok = erlfdb:set(Tx, Key, Val). @@ -377,31 +347,3 @@ process_rows(Rows) -> end, {0, []}, Vals1), Labeled ++ DAcc end, [], Grouped). - - -calculate_row_size(Rows) -> - KeyLimit = key_size_limit(), - ValLimit = value_size_limit(), - - lists:foldl(fun({K, V}, Acc) -> - KeySize = erlang:external_size(K), - ValSize = erlang:external_size(V), - - if KeySize =< KeyLimit -> ok; true -> - throw({size_exceeded, key}) - end, - - if ValSize =< ValLimit -> ok; true -> - throw({size_exceeded, value}) - end, - - Acc + KeySize + ValSize - end, 0, Rows). - - -key_size_limit() -> - config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT). - - -value_size_limit() -> - config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 0a57a70ee..83f7e2851 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -29,6 +29,8 @@ % TODO: % * Handle timeouts of transaction and other errors +-define(KEY_SIZE_LIMIT, 8000). +-define(VALUE_SIZE_LIMIT, 64000). spawn_link() -> proc_lib:spawn_link(?MODULE, init, []). @@ -297,8 +299,13 @@ write_docs(TxDb, Mrst, Docs, State) -> last_seq := LastSeq } = State, - lists:foreach(fun(Doc) -> - couch_views_fdb:write_doc(TxDb, Sig, Views, Doc) + ViewIds = [View#mrview.id_num || View <- Views], + KeyLimit = key_size_limit(), + ValLimit = value_size_limit(), + + lists:foreach(fun(Doc0) -> + Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1) end, Docs), couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). @@ -368,6 +375,41 @@ start_query_server(#mrst{} = Mrst) -> Mrst. +calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> + #mrst{ + db_name = DbName, + idx_name = IdxName + } = Mrst, + #{ + results := Results + } = Doc, + try + KVSizes = lists:map(fun(ViewRows) -> + lists:foldl(fun({K, V}, Acc) -> + KeySize = erlang:external_size(K), + ValSize = erlang:external_size(V), + + if KeySize =< KeyLimit -> ok; true -> + throw({size_error, key}) + end, + + if ValSize =< ValLimit -> ok; true -> + throw({size_error, value}) + end, + + Acc + KeySize + ValSize + end, 0, ViewRows) + end, Results), + Doc#{kv_sizes => KVSizes} + catch throw:{size_error, Type} -> + #{id := DocId} = Doc, + Fmt = "View ~s size error for docid `~s`, excluded from indexing " + "in db `~s` for design doc `~s`", + couch_log:error(Fmt, [Type, DocId, DbName, IdxName]), + Doc#{deleted := true, results := [], kv_sizes => []} + end. + + report_progress(State, UpdateType) -> #{ tx_db := TxDb, @@ -419,3 +461,11 @@ num_changes() -> retry_limit() -> config:get_integer("couch_views", "retry_limit", 3). + + +key_size_limit() -> + config:get_integer("couch_views", "key_size_limit", ?KEY_SIZE_LIMIT). + + +value_size_limit() -> + config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 17adc42ec..9482fdd85 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -398,7 +398,7 @@ handle_size_key_limits(Db) -> end), DDoc = create_ddoc(multi_emit_key_limit), - Docs = [doc(1)] ++ [doc(2)], + Docs = [doc(1, 2)] ++ [doc(2, 1)], {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), @@ -412,12 +412,12 @@ handle_size_key_limits(Db) -> ), ?assertEqual([ - row(<<"1">>, 1, 1) + row(<<"1">>, 2, 2) ], Out), {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), Doc2 = Doc#doc { - body = {[{<<"val">>,3}]} + body = {[{<<"val">>, 2}]} }, {ok, _} = fabric2_db:update_doc(Db, Doc2), @@ -431,8 +431,8 @@ handle_size_key_limits(Db) -> ), ?assertEqual([ - row(<<"1">>, 1, 1), - row(<<"2">>, 3, 3) + row(<<"1">>, 2, 2), + row(<<"2">>, 2, 2) ], Out1). @@ -558,7 +558,7 @@ create_ddoc(multi_emit_key_limit) -> {<<"views">>, {[ {<<"map_fun1">>, {[ {<<"map">>, <<"function(doc) { " - "if (doc.val === 2) { " + "if (doc.val === 1) { " "emit('a very long string to be limited', doc.val);" "} else {" "emit(doc.val, doc.val)" -- cgit v1.2.1 From 377a3d3903bf2166f5e4b95d90f0a237672b812e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 2 Mar 2020 13:39:00 -0600 Subject: Refactor view index layout This moves index meta information to its own key space so that we can scan all existing indexes efficiently. --- src/couch_views/include/couch_views.hrl | 16 ++++++++++------ src/couch_views/src/couch_views_fdb.erl | 23 +++++++++++++++-------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl index 642431dfe..c40bb0212 100644 --- a/src/couch_views/include/couch_views.hrl +++ b/src/couch_views/include/couch_views.hrl @@ -10,14 +10,18 @@ % License for the specific language governing permissions and limitations under % the License. -% indexing +% Index info/data subspaces +-define(VIEW_INFO, 0). +-define(VIEW_DATA, 1). + +% Index info keys -define(VIEW_UPDATE_SEQ, 0). --define(VIEW_ID_INFO, 1). --define(VIEW_ID_RANGE, 2). --define(VIEW_MAP_RANGE, 3). +-define(VIEW_ROW_COUNT, 1). +-define(VIEW_KV_SIZE, 2). --define(VIEW_ROW_COUNT, 0). --define(VIEW_KV_SIZE, 1). +% Data keys +-define(VIEW_ID_RANGE, 0). +-define(VIEW_MAP_RANGE, 1). % jobs api -define(INDEX_JOB_TYPE, <<"views">>). diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index f68dafc41..f2ac01bf3 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -249,7 +249,7 @@ get_view_keys(TxDb, Sig, DocId) -> } = TxDb, {Start, End} = id_idx_range(DbPrefix, Sig, DocId), lists:map(fun({K, V}) -> - {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = + {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = erlfdb_tuple:unpack(K, DbPrefix), [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), {ViewId, TotalKeys, TotalSize, UniqueKeys} @@ -283,17 +283,17 @@ update_kv_size(TxDb, Sig, ViewId, Increment) -> seq_key(DbPrefix, Sig) -> - Key = {?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ}, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}, erlfdb_tuple:pack(Key, DbPrefix). row_count_key(DbPrefix, Sig, ViewId) -> - Key = {?DB_VIEWS, Sig, ?VIEW_ID_INFO, ViewId, ?VIEW_ROW_COUNT}, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, ViewId}, erlfdb_tuple:pack(Key, DbPrefix). kv_size_key(DbPrefix, Sig, ViewId) -> - Key = {?DB_VIEWS, Sig, ?VIEW_ID_INFO, ViewId, ?VIEW_KV_SIZE}, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, ViewId}, erlfdb_tuple:pack(Key, DbPrefix). @@ -303,17 +303,17 @@ db_kv_size_key(DbPrefix) -> id_idx_key(DbPrefix, Sig, DocId, ViewId) -> - Key = {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId}, + Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId}, erlfdb_tuple:pack(Key, DbPrefix). id_idx_range(DbPrefix, Sig, DocId) -> - Key = {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId}, + Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId}, erlfdb_tuple:range(Key, DbPrefix). map_idx_prefix(DbPrefix, Sig, ViewId) -> - Key = {?DB_VIEWS, Sig, ?VIEW_MAP_RANGE, ViewId}, + Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, ViewId}, erlfdb_tuple:pack(Key, DbPrefix). @@ -324,7 +324,14 @@ map_idx_key(MapIdxPrefix, MapKey, DupeId) -> map_idx_range(DbPrefix, Sig, ViewId, MapKey, DocId) -> Encoded = couch_views_encoding:encode(MapKey, key), - Key = {?DB_VIEWS, Sig, ?VIEW_MAP_RANGE, ViewId, {Encoded, DocId}}, + Key = { + ?DB_VIEWS, + ?VIEW_DATA, + Sig, + ?VIEW_MAP_RANGE, + ViewId, + {Encoded, DocId} + }, erlfdb_tuple:range(Key, DbPrefix). -- cgit v1.2.1 From 69fc02bf17545f37c1aad0847552ecefbdc8e539 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 4 Mar 2020 09:54:53 -0600 Subject: Fix handling of duplicate keys If a map function emits duplicate keys for a document this stores multiple rows in the map index differentiated by a `DupeId` counter. Previously we were attempting to save some work avoiding clearing ranges for keys that would be overwritten. However, if a document update causes fewer duplicates to be emitted for the same key we left orphaned rows in the index. --- src/couch_views/src/couch_views_fdb.erl | 5 +-- src/couch_views/test/couch_views_indexer_test.erl | 52 +++++++++++++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index f2ac01bf3..98257f308 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -224,13 +224,10 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> db_prefix := DbPrefix } = TxDb, - Unique = lists:usort([K || {K, _V} <- NewRows]), - - KeysToRem = ExistingKeys -- Unique, lists:foreach(fun(RemKey) -> {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId), ok = erlfdb:clear_range(Tx, Start, End) - end, KeysToRem), + end, ExistingKeys), KVsToAdd = process_rows(NewRows), MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 9482fdd85..c3f704911 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -39,6 +39,7 @@ indexer_test_() -> ?TDEF_FE(multipe_docs_with_same_key), ?TDEF_FE(multipe_keys_from_same_doc), ?TDEF_FE(multipe_identical_keys_from_same_doc), + ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc), ?TDEF_FE(handle_size_key_limits), ?TDEF_FE(handle_size_value_limits) ] @@ -388,6 +389,53 @@ multipe_identical_keys_from_same_doc(Db) -> ], Out). +fewer_multipe_identical_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_same), + Doc0 = #doc{ + id = <<"0">>, + body = {[{<<"val">>, 1}, {<<"extra">>, 3}]} + }, + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2), + row(<<"0">>, 1, 3) + ], Out1), + + Doc1 = #doc{ + id = <<"0">>, + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out2} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2) + ], Out2). + + handle_size_key_limits(Db) -> ok = meck:new(config, [passthrough]), ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> @@ -495,6 +543,7 @@ row(Id, Key, Value) -> {value, Value} ]}. + fold_fun({meta, _Meta}, Acc) -> {ok, Acc}; fold_fun({row, _} = Row, Acc) -> @@ -544,6 +593,9 @@ create_ddoc(multi_emit_same) -> {<<"map">>, <<"function(doc) { " "emit(doc.val, doc.val * 2); " "emit(doc.val, doc.val); " + "if(doc.extra) {" + " emit(doc.val, doc.extra);" + "}" "}">>} ]}}, {<<"map_fun2">>, {[ -- cgit v1.2.1 From e6f6e2d6baf91521e3a756f6d42db4f42fcee8c9 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 4 Mar 2020 13:10:02 -0600 Subject: Don't store `{0,0,[]}` for views with new results This saves us a key/value pair for any document that is not included as part of a view. --- src/couch_views/src/couch_views_fdb.erl | 8 ++++++++ src/couch_views/test/couch_views_indexer_test.erl | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 98257f308..a0224b2b8 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -205,6 +205,14 @@ clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) -> end, ViewKeys). +update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), + ok = erlfdb:clear(Tx, Key); + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) -> #{ tx := Tx, diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index c3f704911..cd5b2b0bf 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -161,7 +161,7 @@ updated_docs_are_reindexed(Db) -> Sig = Mrst#mrst.sig, fabric2_fdb:transactional(Db, fun(TxDb) -> ?assertMatch( - [{0, 1, _, [1]}, {1, 0, 0, []}], + [{0, 1, _, [1]}], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) ) end). @@ -217,7 +217,7 @@ updated_docs_without_changes_are_reindexed(Db) -> Sig = Mrst#mrst.sig, fabric2_fdb:transactional(Db, fun(TxDb) -> ?assertMatch( - [{0, 1, _, [0]}, {1, 0, 0, []}], + [{0, 1, _, [0]}], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) ) end). -- cgit v1.2.1 From 97458c3e906a40949619779caa2c6bc3324e632d Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 2 Mar 2020 16:53:18 -0600 Subject: Test size calculations for view rows These tests exercise that logic when updating rows in a map index. It works by generating two states and a transition between those states. That is, the set of rows that exist in the index, and the set of rows that exist after an update along with how many rows should be kept in common. This is a fairly exhaustive search of the state space in these transitions. A full run takes approximately 60s on my laptop so I have limited the set of tests to a random selection of 100 single and multi transition tests (i.e., updating either one or five documents in a single view build iteration). --- src/couch_views/src/couch_views_indexer.erl | 5 + src/couch_views/test/couch_views_size_test.erl | 564 +++++++++++++++++++++++++ 2 files changed, 569 insertions(+) create mode 100644 src/couch_views/test/couch_views_size_test.erl diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 83f7e2851..fb732a698 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -21,6 +21,11 @@ init/0 ]). +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + -include("couch_views.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl new file mode 100644 index 000000000..18fa9e628 --- /dev/null +++ b/src/couch_views/test/couch_views_size_test.erl @@ -0,0 +1,564 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_size_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). + +% N.B., we should move to couch_ejson_size instead +% of erlang:external_size +% +% to calculate view size: +% total = 0 +% for (fdb_k, fdb_v) in VIEW_MAP_RANGE: +% {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v), +% UserKey = couch_views_encoding:decode(EncUserKey), +% UserVal = couch_views_encoding:decode(EncUserVal), +% total += erlang:external_size(UserKey), +% total += erlang:external_size(UserVal) +% +% Our goal in checking the size calculations is that we cover +% as much of the possible key mutation space as possible while +% not relying on fuzzing out the edge cases. Conceptually we have +% two sets of keys E and U. E is keys as currently exist in the +% view, and U is the new set of keys corresponding to an update. +% +% Both sets E and U have the same possible set of state variables: +% +% 1. N unique keys, where 0 =< N =< infinity +% 2. D keys with duplicates, where 0 =< D =< N, +% 3. R repeats for each member of D, for 2 =< R =< infinity +% +% Given two sets S1 and S2, we then have a set of transition variables: +% +% 1. deltaN - shared unique keys, where 0 =< deltaN =< N +% 2. deltaD - shared duplicates, where 0 =< deltaD =< N +% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity +% +% To search our state transition space, we can create two functions to +% first define our start and end states, and for each transition we have +% a function that defines the shared overlap between states. +% +% Given a list of transitions are checks then become simple in that +% we can iterate over each transition checking that our index is valid +% after each one. Index validation will purely look at the existing +% state of the index in fdb and validate correctness. + +-define(NUM_SINGLE_TESTS, 100). +-define(NUM_MULTI_TESTS, 100). + +-define(N_DOMAIN, [0, 1, 2, 5]). +-define(D_DOMAIN, [0, 1, 2, 5]). +-define(R_DOMAIN, [2, 4]). + +-define(DELTA_N_DOMAIN, [0, 1, 2, 5]). +-define(DELTA_D_DOMAIN, [0, 1, 2, 5]). +-define(DELTA_R_DOMAIN, [1, 2, 4]). + + +generate_sets() -> + permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) -> + % We can't have more duplicates than total keys + case D > N of + true -> throw(skip); + false -> ok + end, + + % Only include one of the repeat values + % for our zero sets + case D == 0 of + true when R == 2 -> ok; + true -> throw(skip); + false -> ok + end, + + % Replace R with a sentinel value for sanity + % when there are no dupes to have repeats + ActualR = if D == 0 -> 0; true -> R end, + + {N, D, ActualR} + end). + + +generate_transitions() -> + Sets = generate_sets(), + Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets], + lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) -> + Filter = fun(DeltaN, DeltaD, DeltaR) -> + % Can't share more keys than the smaller of the + % two sets + case DeltaN > min(N1, N2) of + true -> throw(skip); + false -> ok + end, + + % For DeltaD == 0, all combinations of DeltaD and + % DeltaR are equivalent tests + case DeltaN == 0 of + true when DeltaD == 0, DeltaR == 1 -> ok; + true -> throw(skip); + false -> ok + end, + + % Can't share more dupes than exist in either set + % or the total number of shared keys + case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of + true -> throw(skip); + false -> ok + end, + + % For DeltaD == 0, all DeltaR correspond to the + % same test so only include one instance + case DeltaD == 0 of + true when DeltaR == 1 -> ok; + true -> throw(skip); + false -> ok + end, + + % If we have more non-repeated keys in our + % transition than there's "room" for in the target + % set it isn't a valid test case. + TransitionNonRepeats = DeltaN - DeltaD, + TargetNonRepeats = N2 - D2, + case TransitionNonRepeats > TargetNonRepeats of + true -> throw(skip); + false -> ok + end, + + {S1, S2, {DeltaN, DeltaD, DeltaR}} + end, + permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter) + end, Pairs). + + +permute(NList, DList, RList, Filter) -> + % Technically we could call into Filter in each + % outer loops to conditionally skip inner loops. + % If someone comes along looking to speed up the + % fixture setup time, this would likely be an + % easy win. + lists:foldl(fun(N, NAcc) -> + lists:foldl(fun(D, DAcc) -> + lists:foldl(fun(R, RAcc) -> + try + [Filter(N, D, R) | RAcc] + catch throw:skip -> + RAcc + end + end, DAcc, RList) + end, NAcc, DList) + end, [], NList). + + +row_transition_test_() -> + { + "Test view size tracking", + { + setup, + fun setup/0, + fun cleanup/1, + fun create_transition_tests/1 + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Ctx, Db}. + + +cleanup({Ctx, Db}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +create_transition_tests({_Ctx, Db}) -> + Transitions = generate_transitions(), + Single = lists:flatmap(fun(T) -> + Name = lists:flatten(io_lib:format("single ~s", [tname(T)])), + [{Name, fun() -> check_single_transition(Db, T) end}] + end, lists:sort(Transitions)), + Multi = lists:flatmap(fun(T) -> + Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])), + [{Name, fun() -> check_multi_transition(Db, T) end}] + end, lists:sort(group(shuffle(Transitions)))), + subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi). + + +check_single_transition(Db, {Set1, Set2, Transition}) -> + clear_views(Db), + InitKVs = init_set(Set1, [a, b, c, d, e]), + CommonKVs = reduce_set(Transition, InitKVs), + FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]), + {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}), + {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings), + + Sig = couch_uuids:random(), + DocId = couch_uuids:random(), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)]) + end), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + write_docs(TxDb, Sig, [make_doc(DocId, FinalJSONKVs)]) + end), + + validate_index(Db, Sig, #{DocId => FinalJSONKVs}). + + +check_multi_transition(Db, Transitions) -> + clear_views(Db), + + {Docs, IdMap} = lists:mapfoldl(fun({Set1, Set2, Transition}, IdMapAcc) -> + DocId = couch_uuids:random(), + InitKVs = init_set(Set1, [a, b, c, d, e]), + CommonKVs = reduce_set(Transition, InitKVs), + FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]), + {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}), + {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings), + InitDoc = make_doc(DocId, InitJSONKVs), + FinalDoc = make_doc(DocId, FinalJSONKVs), + {{InitDoc, FinalDoc}, maps:put(DocId, FinalJSONKVs, IdMapAcc)} + end, #{}, Transitions), + + {InitDocs, FinalDocs} = lists:unzip(Docs), + + Sig = couch_uuids:random(), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + write_docs(TxDb, Sig, InitDocs) + end), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + write_docs(TxDb, Sig, FinalDocs) + end), + + validate_index(Db, Sig, IdMap). + + +clear_views(Db) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix), + erlfdb:clear_range(Tx, Start, End), + + GlobalKey = {?DB_STATS, <<"sizes">>, <<"views">>}, + BinGlobalKey = erlfdb_tuple:pack(GlobalKey, DbPrefix), + erlfdb:set(Tx, BinGlobalKey, ?uint2bin(0)) + end). + + +write_docs(TxDb, Sig, Docs) -> + Mrst = #mrst{ + sig = Sig, + views = [#mrview{ + id_num = 1 + }] + }, + IdxState = #{ + last_seq => <<"foo">> + }, + couch_views_indexer:write_docs(TxDb, Mrst, Docs, IdxState). + + +validate_index(Db, Sig, JSONRows) -> + #{ + db_prefix := DbPrefix + } = Db, + Rows = fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx + } = TxDb, + {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix), + erlfdb:get_range(Tx, Start, End) + end), + + InitAcc = #{ + row_count => 0, + kv_size => 0, + ids => #{}, + rows => [] + }, + + MapData = lists:foldl(fun({Key, Value}, Acc) -> + case erlfdb_tuple:unpack(Key, DbPrefix) of + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig} -> + ?assertEqual(<<"foo">>, Value), + Acc; + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, 1} -> + maps:put(row_count, ?bin2uint(Value), Acc); + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, 1} -> + maps:put(kv_size, ?bin2uint(Value), Acc); + {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, 1} -> + [ + TotalKeys, TotalSize, UniqueKeys + ] = couch_views_encoding:decode(Value), + maps:update_with(ids, fun(Ids) -> + false = maps:is_key(DocId, Ids), + maps:put(DocId, {TotalKeys, TotalSize, UniqueKeys}, Ids) + end, Acc); + {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, 1, MapKey, _DupeId} -> + {EncKey, DocId} = MapKey, + {UserKey, UserVal} = erlfdb_tuple:unpack(Value), + + UserJsonKey = couch_views_encoding:decode(UserKey), + UserJsonVal = couch_views_encoding:decode(UserVal), + + ?assertEqual( + EncKey, + couch_views_encoding:encode(UserJsonKey, key) + ), + + maps:update_with(rows, fun(RAcc) -> + [{DocId, UserJsonKey, UserJsonVal} | RAcc] + end, Acc) + end + end, InitAcc, Rows), + + #{ + row_count := RowCount, + kv_size := KVSize, + ids := MapIds, + rows := MapRows + } = MapData, + + SumFun = fun(_DocId, {TotalKVs, TotalSize, _UniqueKeys}, {KVAcc, SAcc}) -> + {KVAcc + TotalKVs, SAcc + TotalSize} + end, + {SumKVCount, SumKVSize} = maps:fold(SumFun, {0, 0}, MapIds), + ?assertEqual(RowCount, length(MapRows)), + ?assertEqual(RowCount, SumKVCount), + ?assertEqual(KVSize, SumKVSize), + ?assert(KVSize >= 0), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + GlobalSize = get_global_size(TxDb), + ?assertEqual(KVSize, GlobalSize), + + ViewSize = couch_views_fdb:get_kv_size(TxDb, #mrst{sig = Sig}, 1), + ?assertEqual(KVSize, ViewSize) + end), + + % Compare our raw JSON rows to what was indexed + IdsFromJSONRows = maps:fold(fun(DocId, DocRows, IdAcc) -> + FinalAcc = lists:foldl(fun({JsonKey, JsonVal}, {CAcc, SAcc, UAcc}) -> + KeySize = erlang:external_size(JsonKey), + ValSize = erlang:external_size(JsonVal), + NewUnique = lists:usort([JsonKey | UAcc]), + {CAcc + 1, SAcc + KeySize + ValSize, NewUnique} + end, {0, 0, []}, DocRows), + if FinalAcc == {0, 0, []} -> IdAcc; true -> + maps:put(DocId, FinalAcc, IdAcc) + end + end, #{}, JSONRows), + ?assertEqual(MapIds, IdsFromJSONRows), + + % Compare the found id entries to our row data + IdsFromMapRows = lists:foldl(fun({DocId, JsonKey, JsonVal}, Acc) -> + KeySize = erlang:external_size(JsonKey), + ValSize = erlang:external_size(JsonVal), + Default = {1, KeySize + ValSize, [JsonKey]}, + maps:update_with(DocId, fun({TotalKVs, TotalSize, UniqueKeys}) -> + NewUnique = lists:usort([JsonKey | UniqueKeys]), + {TotalKVs + 1, TotalSize + KeySize + ValSize, NewUnique} + end, Default, Acc) + end, #{}, MapRows), + ?assertEqual(MapIds, IdsFromMapRows). + + +make_doc(DocId, []) -> + case rand:uniform() < 0.5 of + true -> + #{ + id => DocId, + deleted => true, + results => [[]] + }; + false -> + #{ + id => DocId, + deleted => false, + results => [[]] + } + end; +make_doc(DocId, Results) -> + #{ + id => DocId, + deleted => false, + results => [Results] + }. + + +get_global_size(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + GlobalKey = {?DB_STATS, <<"sizes">>, <<"views">>}, + BinGlobalKey = erlfdb_tuple:pack(GlobalKey, DbPrefix), + ?bin2uint(erlfdb:wait(erlfdb:get(Tx, BinGlobalKey))). + + +init_set({N, D, R}, Labels) -> + {Dupes, RestLabels} = fill_keys(D, Labels, []), + {Unique, _} = fill_keys(N - D, RestLabels, []), + % Sanity assertions + N = length(Unique) + length(Dupes), + D = length(Dupes), + {Unique, [{Key, R} || Key <- Dupes]}. + + +reduce_set({DeltaN, DeltaD, DeltaR}, {Unique, Dupes}) -> + NewDupes = lists:sublist(Dupes, DeltaD), + NewUnique = lists:sublist(Unique, DeltaN - DeltaD), + {NewUnique, [{Key, DeltaR} || {Key, _} <- NewDupes]}. + + +fill_set({N, D, R}, {Unique, Dupes}, Labels) -> + AddDupes = D - length(Dupes), + {NewDupes, RestLabels} = fill_keys(AddDupes, Labels, Dupes), + + AddUnique = N - length(Unique) - length(NewDupes), + {NewUnique, _} = fill_keys(AddUnique, RestLabels, Unique), + % Sanity assertions + N = length(NewUnique) + length(NewDupes), + D = length(NewDupes), + {NewUnique, lists:map(fun(Dupe) -> + case Dupe of + {_, _} -> Dupe; + A when is_atom(A) -> {A, R} + end + end, NewDupes)}. + + +fill_keys(0, Labels, Acc) -> + {Acc, Labels}; +fill_keys(Count, [Label | RestLabels], Acc) when Count > 0 -> + fill_keys(Count - 1, RestLabels, [Label | Acc]). + + +unlabel({Unique, Dupes}, Bindings) -> + lists:foldl(fun(Item, {KVAcc, BindingsAcc}) -> + {KVs, NewBindingsAcc} = unlabel_item(Item, BindingsAcc), + {KVs ++ KVAcc, NewBindingsAcc} + end, {[], Bindings}, Unique ++ Dupes). + + +unlabel_item(Label, Bindings) when is_atom(Label) -> + NewBindings = maybe_bind(Label, Bindings), + KV = maps:get(Label, NewBindings), + {[KV], NewBindings}; +unlabel_item({Label, Count}, Bindings) when is_atom(Label), is_integer(Count) -> + NewBindings = maybe_bind(Label, Bindings), + {K, _} = KV = maps:get(Label, NewBindings), + ToAdd = lists:map(fun(_) -> + {K, gen_value()} + end, lists:seq(1, Count - 1)), + {[KV | ToAdd], NewBindings}. + + +maybe_bind(Label, Bindings) -> + case maps:is_key(Label, Bindings) of + true -> + case rand:uniform() < 0.5 of + true -> + rebind(Label, Bindings); + false -> + Bindings + end; + false -> + bind(Label, Bindings) + end. + + +bind(Label, Bindings) -> + maps:put(Label, {gen_key(), gen_value()}, Bindings). + + +rebind(Label, Bindings) -> + {Key, _} = maps:get(Label, Bindings), + maps:put(Label, {Key, gen_value()}, Bindings). + + +gen_key() -> + Unique = couch_uuids:random(), + case rand:uniform() of + N when N < 0.2 -> + [Unique, true, rand:uniform()]; + N when N < 0.4 -> + {[{Unique, true}, {<<"foo">>, [<<"bar">>, null, 1, {[]}]}]}; + _ -> + Unique + end. + + +gen_value() -> + case rand:uniform() of + N when N < 0.2 -> + [false, rand:uniform(), {[]}]; + N when N < 0.4 -> + {[{<<"a">>, 1}, {<<"b">>, 2}]}; + N when N < 0.6 -> + rand:uniform(100); + N when N < 0.8 -> + rand:uniform(); + _ -> + 1 + end. + + +group(Items) -> + case length(Items) > 5 of + true -> + {Group, Rest} = lists:split(5, Items), + [lists:sort(Group) | group(Rest)]; + false when Items == [] -> + []; + false -> + [lists:sort(Items)] + end. + + +shuffle(Items) -> + Tagged = [{rand:uniform(), I} || I <- Items], + Sorted = lists:sort(Tagged), + [I || {_T, I} <- Sorted]. + + +subset(Count, Items) -> + Random = shuffle(Items), + Take = lists:sublist(Random, Count), + lists:sort(Take). + + +tname([]) -> + []; +tname([Transition | RestTransitions]) -> + [tname(Transition) | tname(RestTransitions)]; +tname({{N1, D1, R1}, {N2, D2, R2}, {DN, DD, DR}}) -> + io_lib:format("~b~b~b~b~b~b~b~b~b", [N1, D1, R1, N2, D2, R2, DN, DD, DR]). + -- cgit v1.2.1 From 528e02ff5fb280df8a3419d26c7af2c5757c1093 Mon Sep 17 00:00:00 2001 From: Dan Barbarito Date: Sun, 8 Mar 2020 23:23:26 -0400 Subject: move "users_db_security_editable" to the correct location --- rel/overlay/etc/default.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 246c17307..2676ef530 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -73,6 +73,9 @@ default_engine = couch ; on startup if not present. ;single_node = false +; Allow edits on the _security object in the user db. By default, it's disabled. +users_db_security_editable = false + [purge] ; Allowed maximum number of documents in one purge request ;max_document_id_number = 100 @@ -84,9 +87,6 @@ default_engine = couch ; document. Default is 24 hours. ;index_lag_warn_seconds = 86400 -; Allow edits on the _security object in the user db. By default, it's disabled. -users_db_security_editable = false - [couchdb_engines] ; The keys in this section are the filename extension that ; the specified engine module will use. This is important so -- cgit v1.2.1 From 17ce741c7cfcb14df7a8f24d38a59a7bf302dd5d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sat, 7 Mar 2020 23:53:22 -0500 Subject: Optimize resubmitting pending jobs Previously even when the scheduled time was the same the job was still deleted and re-inserted into the pending queue. Now we perform the re-enqueing operation only if the scheduled time has changed. So if the whole operation is run in its own transaction, the transaction will now become a read-only transaction. This optimization should come in handy with the indexing auto-builder, for example, where multiple nodes might try to re-enqueue the job and then only the first would succeed, and the rest will perform a quick one row read and not do any writes. --- src/couch_jobs/src/couch_jobs_fdb.erl | 11 +++++++++++ src/couch_jobs/test/couch_jobs_tests.erl | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index e59387ee1..8c1ab7ac5 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -225,6 +225,17 @@ resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime) -> data => Data }, {ok, Job1}; + pending when STime == OldSTime -> + % If pending and scheduled time doesn't change avoid generating + % un-necessary writes by removing and re-adding the jobs into the + % pending queue. + Job1 = Job#{ + stime => STime, + seq => ?PENDING_SEQ, + state => pending, + data => Data + }, + {ok, Job1}; pending -> JV1 = JV#jv{seq = ?PENDING_SEQ, stime = STime}, set_job_val(Tx, Key, JV1), diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl index 62a75c83e..9d8e2df50 100644 --- a/src/couch_jobs/test/couch_jobs_tests.erl +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -35,6 +35,7 @@ couch_jobs_basic_test_() -> [ fun add_remove_pending/1, fun add_remove_errors/1, + fun add_with_the_same_scheduled_time/1, fun get_job_data_and_state/1, fun resubmit_as_job_creator/1, fun type_timeouts_and_server/1, @@ -159,6 +160,16 @@ add_remove_errors(#{t1 := T, j1 := J}) -> end). +add_with_the_same_scheduled_time(#{t1 := T, j1 := J}) -> + ?_test(begin + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{})), + fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, couch_jobs:add(Tx, T, J, #{})), + ?assert(erlfdb:is_read_only(Tx)) + end) + end). + + resubmit_as_job_creator(#{t1 := T, j1 := J}) -> ?_test(begin Data = #{<<"x">> => 42}, -- cgit v1.2.1 From 02ca72ba34eb1768a631f12e34022464cf70278f Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sun, 8 Mar 2020 00:03:15 -0500 Subject: Implement a simple index auto-updater The main logic is as follows: - After doc update transaction(s) are completed in the `fabric2_db` module, call the `fabric2_index:db_updated(Db)` function. - `fabric2_index:db_updated(Db)` inserts a `{DbName, Timestamp}` tuple into one of the sharded ets tables. The tuple is inserted using `ets:insert_new`, which ensures only the first entry succeeds and others will be ignored, until that entry is cleared. - Each ets table in `fabric2_index` has a simple monitor process that periodically scans that table. If it finds databases which have been updated, it notifies all the indices which have registered with `fabric2_index` to build indices. There are config settings to disable index auto-updating and to adjust the delay interval, and the resolution. The interval specifies how long to wait since the first time the db was modified. The resolution interval specifies how often to check the ets tables. Just like in the original ken, design documents can have an `"autoupdate": false` option to disable auto-updating that design document only. --- rel/files/eunit.ini | 6 +- rel/overlay/etc/default.ini | 10 ++ src/fabric/src/fabric2_db.erl | 4 + src/fabric/src/fabric2_index.erl | 222 +++++++++++++++++++++++ src/fabric/src/fabric2_sup.erl | 8 + src/fabric/test/fabric2_index_tests.erl | 304 ++++++++++++++++++++++++++++++++ 6 files changed, 553 insertions(+), 1 deletion(-) create mode 100644 src/fabric/src/fabric2_index.erl create mode 100644 src/fabric/test/fabric2_index_tests.erl diff --git a/rel/files/eunit.ini b/rel/files/eunit.ini index 361ea6669..2b73ab307 100644 --- a/rel/files/eunit.ini +++ b/rel/files/eunit.ini @@ -35,4 +35,8 @@ level = info [replicator] ; disable jitter to reduce test run times -startup_jitter = 0 \ No newline at end of file +startup_jitter = 0 + +[fabric] +; disable index auto-updater to avoid interfering with some of the tests +index_updater_enabled = false diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 4c978b29c..749cdd27f 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -226,6 +226,16 @@ port = 6984 ; should have a matching directory prefix in order to read and write the same ; data. Changes to this value take effect only on node start-up. ;fdb_directory = couchdb +; +; Enable or disable index auto-updater +;index_autoupdater_enabled = true +; +; How long to wait from the first db update event until index building is +; triggered. +;index_autoupdater_delay_msec = 60000 +; +; How often to check if databases may need their indices updated. +;index_autoupdater_resolution_msec = 10000 ; [rexi] ; buffer_count = 2000 diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index b0f7849e2..791282f63 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -733,6 +733,10 @@ update_docs(Db, Docs0, Options) -> end) end, Docs1) end, + + % Notify index builder + fabric2_index:db_updated(name(Db)), + % Convert errors Resps1 = lists:map(fun(Resp) -> case Resp of diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl new file mode 100644 index 000000000..938210514 --- /dev/null +++ b/src/fabric/src/fabric2_index.erl @@ -0,0 +1,222 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_index). + + +-behaviour(gen_server). + + +-export([ + register_index/1, + db_updated/1, + start_link/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-callback build_indices(Db :: map(), DDocs :: list(#doc{})) -> + [{ok, JobId::binary()} | {error, any()}]. + + +-define(SHARDS, 32). +-define(DEFAULT_DELAY_MSEC, 60000). +-define(DEFAULT_RESOLUTION_MSEC, 10000). + + +register_index(Mod) when is_atom(Mod) -> + Indices = lists:usort([Mod | registrations()]), + application:set_env(fabric, indices, Indices). + + +db_updated(DbName) when is_binary(DbName) -> + Table = table(erlang:phash2(DbName) rem ?SHARDS), + ets:insert_new(Table, {DbName, now_msec()}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + lists:foreach(fun(T) -> + spawn_link(fun() -> process_loop(T) end) + end, create_tables()), + {ok, nil}. + + +terminate(_M, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +create_tables() -> + Opts = [ + named_table, + public, + {write_concurrency, true}, + {read_concurrency, true} + ], + Tables = [table(N) || N <- lists:seq(0, ?SHARDS - 1)], + [ets:new(T, Opts) || T <- Tables]. + + +table(Id) when is_integer(Id), Id >= 0 andalso Id < ?SHARDS -> + list_to_atom("fabric2_index_" ++ integer_to_list(Id)). + + +process_loop(Table) -> + Now = now_msec(), + Delay = delay_msec(), + Since = Now - Delay, + case is_enabled() of + true -> + process_updates(Table, Since), + clean_stale(Table, Since); + false -> + clean_stale(Table, Now) + end, + Resolution = resolution_msec(), + Jitter = rand:uniform(1 + Resolution div 2), + timer:sleep(Resolution + Jitter), + process_loop(Table). + + +clean_stale(Table, Since) -> + Head = {'_', '$1'}, + Guard = {'<', '$1', Since}, + % Monotonic is not strictly monotonic, so we process items using `=<` but + % clean with `<` in case there was an update with the same timestamp after + % we started processing already at that timestamp. + ets:select_delete(Table, [{Head, [Guard], [true]}]). + + +process_updates(Table, Since) -> + Head = {'$1', '$2'}, + Guard = {'=<', '$2', Since}, + case ets:select(Table, [{Head, [Guard], ['$1']}], 25) of + '$end_of_table' -> ok; + {Match, Cont} -> process_updates_iter(Match, Cont) + end. + + +process_updates_iter([], Cont) -> + case ets:select(Cont) of + '$end_of_table' -> ok; + {Match, Cont1} -> process_updates_iter(Match, Cont1) + end; + +process_updates_iter([Db | Rest], Cont) -> + try + process_db(Db) + catch + error:database_does_not_exist -> + ok; + Tag:Reason -> + Stack = erlang:get_stacktrace(), + LogMsg = "~p failed to build indices for `~s` ~p:~p ~p", + couch_log:error(LogMsg, [?MODULE, Db, Tag, Reason, Stack]) + end, + process_updates_iter(Rest, Cont). + + +build_indices(_Db, []) -> + []; + +build_indices(Db, DDocs) -> + lists:flatmap(fun(Mod) -> + Mod:build_indices(Db, DDocs) + end, registrations()). + + +registrations() -> + application:get_env(fabric, indices, []). + + +process_db(DbName) when is_binary(DbName) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs1 = get_design_docs(TxDb), + DDocs2 = lists:filter(fun should_update/1, DDocs1), + DDocs3 = shuffle(DDocs2), + build_indices(TxDb, DDocs3) + end). + + +get_design_docs(Db) -> + Callback = fun + ({meta, _}, Acc) -> {ok, Acc}; + (complete, Acc) -> {ok, Acc}; + ({row, Row}, Acc) -> {ok, [get_doc(Db, Row) | Acc]} + end, + {ok, DDocs} = fabric2_db:fold_design_docs(Db, Callback, [], []), + DDocs. + + +get_doc(Db, Row) -> + {_, DocId} = lists:keyfind(id, 1, Row), + {ok, #doc{deleted = false} = Doc} = fabric2_db:open_doc(Db, DocId, []), + Doc. + + +should_update(#doc{body = {Props}}) -> + couch_util:get_value(<<"autoupdate">>, Props, true). + + +shuffle(Items) -> + Tagged = [{rand:uniform(), I} || I <- Items], + Sorted = lists:sort(Tagged), + [I || {_T, I} <- Sorted]. + + +now_msec() -> + erlang:monotonic_time(millisecond). + + +is_enabled() -> + config:get_boolean("fabric", "index_updater_enabled", true). + + +delay_msec() -> + config:get_integer("fabric", "index_updater_delay_msec", + ?DEFAULT_DELAY_MSEC). + + +resolution_msec() -> + config:get_integer("fabric", "index_updater_resolution_msec", + ?DEFAULT_RESOLUTION_MSEC). diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl index 2510b13bb..e8201b4ee 100644 --- a/src/fabric/src/fabric2_sup.erl +++ b/src/fabric/src/fabric2_sup.erl @@ -47,6 +47,14 @@ init([]) -> 5000, worker, [fabric2_server] + }, + { + fabric2_index, + {fabric2_index, start_link, []}, + permanent, + 5000, + worker, + [fabric2_index] } ], ChildrenWithEpi = couch_epi:register_service(fabric2_epi, Children), diff --git a/src/fabric/test/fabric2_index_tests.erl b/src/fabric/test/fabric2_index_tests.erl new file mode 100644 index 000000000..3fc8a5b18 --- /dev/null +++ b/src/fabric/test/fabric2_index_tests.erl @@ -0,0 +1,304 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_index_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2_test.hrl"). + + +% Should match fabric2_index define +-define(SHARDS, 32). + + +index_test_() -> + { + "Test fabric indexing module", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(register_index_works), + ?TDEF(single_update), + ?TDEF(multiple_updates), + ?TDEF(skip_db_if_no_ddocs), + ?TDEF(ignore_deleted_dbs), + ?TDEF(check_gen_server_messages) + ]) + } + }. + + +index_process_cleanup_test_() -> + { + "Test fabric process cleanup in indexing module", + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(updater_processes_start), + ?TDEF_FE(updater_processes_stop), + ?TDEF_FE(indexing_can_be_disabled), + ?TDEF_FE(handle_indexer_blowing_up) + ] + } + }. + + +setup() -> + meck:new(config, [passthrough]), + meck:expect(config, get_integer, fun + ("fabric", "index_updater_delay_msec", _) -> 200; + ("fabric", "index_updater_resolution_msec", _) -> 100; + + (_, _, Default) -> Default + end), + meck:expect(config, get_boolean, fun + ("fabric", "index_updater_enabled", _) -> true; + (_, _, Default) -> Default + end), + + Indices = application:get_env(fabric, indices, []), + + Ctx = test_util:start_couch([fabric]), + + % Db1 has a valid design doc, a deleted one and one with "autoupdate":false + {ok, Db1} = fabric2_db:create(?tempdb(), [?ADMIN_CTX]), + {_, _} = create_doc(Db1, <<"_design/doc1">>), + + DDocId2 = <<"_design/doc2">>, + {DDocId2, {Pos, Rev}} = create_doc(Db1, DDocId2), + Delete2 = #doc{id = DDocId2, revs = {Pos, [Rev]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db1, Delete2), + + NoAutoUpdate = {[{<<"autoupdate">>, false}]}, + {_, _} = create_doc(Db1, <<"_design/doc3">>, NoAutoUpdate), + + % Db2 doesn't have any desig documents + {ok, Db2} = fabric2_db:create(?tempdb(), [?ADMIN_CTX]), + + #{db1 => Db1, db2 => Db2, ctx => Ctx, indices => Indices}. + + +cleanup(#{db1 := Db1, db2 := Db2, ctx := Ctx, indices := Indices}) -> + catch fabric2_db:delete(fabric2_db:name(Db1), []), + catch fabric2_db:delete(fabric2_db:name(Db2), []), + + test_util:stop_couch(Ctx), + application:set_env(fabric, indices, Indices), + + meck:unload(). + + +register_index_works(_) -> + reset_callbacks(), + + Mod1 = fabric2_test_callback1, + fabric2_index:register_index(Mod1), + Indices1 = application:get_env(fabric, indices, []), + ?assertEqual([Mod1], Indices1), + + Mod2 = fabric2_test_callback2, + fabric2_index:register_index(Mod2), + Indices2 = application:get_env(fabric, indices, []), + ?assertEqual(lists:sort([Mod1, Mod2]), lists:sort(Indices2)). + + +single_update(#{db1 := Db}) -> + reset_callbacks(), + + Mod = fabric2_test_callback3, + setup_callback(Mod), + create_doc(Db), + + meck:wait(Mod, build_indices, 2, 2000), + ?assertEqual(1, meck:num_calls(Mod, build_indices, 2)). + + +multiple_updates(#{db1 := Db}) -> + reset_callbacks(), + + Mod = fabric2_test_callback4, + setup_callback(Mod), + create_docs(Db, 10), + + % should be called at least once + meck:wait(Mod, build_indices, 2, 2000), + + % Maybe called another time or two at most + timer:sleep(500), + ?assert(meck:num_calls(Mod, build_indices, 2) =< 3). + + +skip_db_if_no_ddocs(#{db2 := Db}) -> + reset_callbacks(), + + Mod = fabric2_test_callback5, + setup_callback(Mod), + create_doc(Db), + + timer:sleep(500), + ?assertEqual(0, meck:num_calls(Mod, build_indices, 2)). + + +ignore_deleted_dbs(#{}) -> + reset_callbacks(), + + Mod = fabric2_test_callback6, + setup_callback(Mod), + lists:foreach(fun(_) -> + RandomDbName = fabric2_util:uuid(), + fabric2_index:db_updated(RandomDbName) + end, lists:seq(1, 10000)), + + test_util:wait(fun() -> + case table_sizes() =:= 0 of + true -> ok; + false -> wait + end + end, 5000). + + +check_gen_server_messages(#{}) -> + CallExpect = {stop, {bad_call, foo}, {bad_call, foo}, baz}, + CastExpect = {stop, {bad_cast, foo}, bar}, + InfoExpect = {stop, {bad_info, foo}, bar}, + ?assertEqual(CallExpect, fabric2_index:handle_call(foo, bar, baz)), + ?assertEqual(CastExpect, fabric2_index:handle_cast(foo, bar)), + ?assertEqual(InfoExpect, fabric2_index:handle_info(foo, bar)), + ?assertEqual(ok, fabric2_index:terminate(shutdown, nil)), + ?assertEqual({ok, nil}, fabric2_index:code_change(v0, nil, extra)). + + +updater_processes_start(#{}) -> + Pid = whereis(fabric2_index), + ?assert(is_process_alive(Pid)), + lists:map(fun(N) -> + ?assertEqual(tid(N), ets:info(tid(N), name)) + end, lists:seq(0, ?SHARDS - 1)). + + +updater_processes_stop(#{}) -> + Refs = lists:map(fun(N) -> + Pid = ets:info(tid(N), owner), + ?assert(is_process_alive(Pid)), + monitor(process, Pid) + end, lists:seq(0, ?SHARDS - 1)), + + % We stop but don't restart fabric after this as we're running in a foreach + % test list where app restart happens after each test. + application:stop(fabric), + + lists:foreach(fun(Ref) -> + receive + {'DOWN', Ref, _, _, _} -> ok + after 3000 -> + ?assert(false) + end + end, Refs). + + +indexing_can_be_disabled(#{db1 := Db}) -> + Mod = fabric2_test_callback7, + setup_callback(Mod), + + meck:expect(config, get_boolean, fun + ("fabric", "index_updater_enabled", _) -> false; + (_, _, Default) -> Default + end), + + create_doc(Db), + timer:sleep(500), + ?assertEqual(0, meck:num_calls(Mod, build_indices, 2)), + + meck:expect(config, get_boolean, fun + ("fabric", "index_updater_enabled", _) -> true; + (_, _, Default) -> Default + end), + + create_doc(Db), + meck:wait(Mod, build_indices, 2, 2000). + + +handle_indexer_blowing_up(#{db1 := Db}) -> + Mod = fabric2_test_callback8, + setup_callback(Mod), + meck:expect(Mod, build_indices, fun(_, _) -> error(bad_index) end), + + MainPid = whereis(fabric2_index), + WPids1 = [ets:info(tid(N), owner) || N <- lists:seq(0, ?SHARDS - 1)], + + create_doc(Db), + meck:wait(Mod, build_indices, 2, 2000), + + ?assert(is_process_alive(MainPid)), + + WPids2 = [ets:info(tid(N), owner) || N <- lists:seq(0, ?SHARDS - 1)], + ?assertEqual(lists:sort(WPids1), lists:sort(WPids2)), + ?assert(lists:all(fun(Pid) -> is_process_alive(Pid) end, WPids2)). + + +% Utility functions + +setup_callback(Mod) -> + catch meck:unload(Mod), + meck:new(Mod, [non_strict]), + meck:expect(Mod, build_indices, 2, []), + fabric2_index:register_index(Mod). + + +reset_callbacks() -> + Mods = application:get_env(fabric, indices, []), + application:set_env(fabric, indices, []), + lists:foreach(fun(M) -> + catch meck:reset(M), + catch meck:unload(M) + end, Mods). + + +tid(Id) when is_integer(Id) -> + TableName = "fabric2_index_" ++ integer_to_list(Id), + list_to_existing_atom(TableName). + + +table_sizes() -> + Sizes = [ets:info(tid(N), size) || N <- lists:seq(0, ?SHARDS - 1)], + lists:sum(Sizes). + + +create_docs(Db, Count) -> + lists:map(fun(_) -> + {DocId, _RevStr} = create_doc(Db), + DocId + end, lists:seq(1, Count)). + + +create_doc(Db) -> + create_doc(Db, fabric2_util:uuid()). + + +create_doc(Db, DocId) -> + create_doc(Db, DocId, {[]}). + + +create_doc(Db, DocId, Body) -> + Doc = #doc{ + id = DocId, + body = Body + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + {DocId, {Pos, Rev}}. -- cgit v1.2.1 From 132cf7a9ba797a1cf228c39ccf6448a683a934d0 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sun, 8 Mar 2020 00:38:48 -0500 Subject: Enable index auto-updating for couch_views * Register with the fabric2_index module * Provide a build_indices/2 callback * In the callback attempt to parse an `#mrst{}` indexing context and then trigger an indexing job. We don't wait for the job to finish but instead rely on `couch_job`'s max worker limit to keep concurrency in check. --- src/couch_views/src/couch_views.erl | 21 ++++++++++++++++++++- src/couch_views/src/couch_views_sup.erl | 8 ++++++++ src/couch_views/test/couch_views_indexer_test.erl | 18 +++++++++++++++++- 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 322415b91..58cfb2467 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -12,8 +12,15 @@ -module(couch_views). + +-behavior(fabric2_index). + + -export([ - query/6 + query/6, + + % fabric2_index behavior + build_indices/2 ]). @@ -55,6 +62,18 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> end. +build_indices(#{} = Db, DDocs) when is_list(DDocs) -> + DbName = fabric2_db:name(Db), + lists:filtermap(fun(DDoc) -> + try couch_views_util:ddoc_to_mrst(DbName, DDoc) of + {ok, #mrst{} = Mrst} -> + {true, couch_views_jobs:build_view_async(Db, Mrst)} + catch _:_ -> + false + end + end, DDocs). + + read_view(Db, Mrst, ViewName, Callback, Acc0, Args) -> fabric2_fdb:transactional(Db, fun(TxDb) -> try diff --git a/src/couch_views/src/couch_views_sup.erl b/src/couch_views/src/couch_views_sup.erl index 7a72a1f33..2a40f0a79 100644 --- a/src/couch_views/src/couch_views_sup.erl +++ b/src/couch_views/src/couch_views_sup.erl @@ -28,6 +28,7 @@ start_link() -> + ok = register_views_index(), Arg = case fabric2_node_types:is_type(view_indexing) of true -> normal; false -> builds_disabled @@ -50,6 +51,13 @@ init(builds_disabled) -> {ok, {flags(), []}}. +register_views_index() -> + case fabric2_node_types:is_type(api_frontend) of + true -> fabric2_index:register_index(couch_views); + false -> ok + end. + + flags() -> #{ strategy => one_for_one, diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index cd5b2b0bf..02a12e788 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -41,7 +41,8 @@ indexer_test_() -> ?TDEF_FE(multipe_identical_keys_from_same_doc), ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc), ?TDEF_FE(handle_size_key_limits), - ?TDEF_FE(handle_size_value_limits) + ?TDEF_FE(handle_size_value_limits), + ?TDEF_FE(index_autoupdater_callback) ] } } @@ -536,6 +537,21 @@ handle_size_value_limits(Db) -> ], Out1). +index_autoupdater_callback(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + DbSeq = fabric2_db:get_update_seq(Db), + + Result = couch_views:build_indices(Db, [DDoc]), + ?assertMatch([{ok, <<_/binary>>}], Result), + [{ok, JobId}] = Result, + + ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DbSeq)). + + row(Id, Key, Value) -> {row, [ {id, Id}, -- cgit v1.2.1 From 6b5985fa4e0abc787698cb458050203c01250adc Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 9 Mar 2020 16:10:24 +0200 Subject: Fix bug in reverse folding with startkey_docid Fixes an issue where the first k/v was skipped if the startkey_docid was included. --- src/fabric/src/fabric2_fdb.erl | 3 +++ test/elixir/test/map_test.exs | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index c34b33cbc..b4a4fd6a2 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -1470,6 +1470,9 @@ get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) EndKey2 = case EndKey1 of undefined -> <>; + EK2 when Reverse -> + PackedEK = erlfdb_tuple:pack({EK2}, RangePrefix), + <>; EK2 -> erlfdb_tuple:pack({EK2}, RangePrefix) end, diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs index bccd4173b..d2a79449f 100644 --- a/test/elixir/test/map_test.exs +++ b/test/elixir/test/map_test.exs @@ -535,6 +535,22 @@ defmodule ViewMapTest do assert error == "foundationdb_error" end + test "descending=true query with startkey_docid", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/some" + + resp = + Couch.get(url, + query: %{descending: true, startkey: 8, startkey_docid: "doc-id-8", limit: 3} + ) + + ids = get_ids(resp) + + assert resp.status_code == 200 + assert ids == ["doc-id-8", "doc-id-7", "doc-id-6"] + end + def update_doc_value(db_name, id, value) do resp = Couch.get("/#{db_name}/#{id}") doc = convert(resp.body) -- cgit v1.2.1 From 640e39caa7ec4124d75b1cb9132bf774b6edef86 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 10 Mar 2020 16:22:38 +0000 Subject: Create LICENSE --- LICENSE | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..d9a10c0d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS -- cgit v1.2.1 From 1457c2826eceed7cd6244e1130c6f9c499fd2bb3 Mon Sep 17 00:00:00 2001 From: garren smith Date: Tue, 10 Mar 2020 19:34:33 +0200 Subject: couch_jobs resubmit updates job data (#2649) * couch_jobs resubmit updates job data When a job is either pending or finished and the job is resubmitted with new data the job data is updated. --- src/couch_jobs/src/couch_jobs.erl | 8 ++++ src/couch_jobs/src/couch_jobs_fdb.erl | 25 ++++++++++--- src/couch_jobs/test/couch_jobs_tests.erl | 63 ++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index c134f5ac5..d9ea0fbfa 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -27,6 +27,7 @@ finish/3, resubmit/2, resubmit/3, + resubmit/4, is_resubmitted/1, update/2, update/3, @@ -151,6 +152,13 @@ resubmit(Tx, #{jlock := <<_/binary>>} = Job, SchedTime) -> end). +-spec resubmit(jtx(), job(), scheduled_time(), job_data()) -> {ok, job()} | {error, any()}. +resubmit(Tx, #{jlock := <<_/binary>>} = Job, SchedTime, Data) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:resubmit(JTx, Job, SchedTime, Data) + end). + + -spec is_resubmitted(job()) -> true | false. is_resubmitted(#{job := true} = Job) -> maps:get(resubmit, Job, false). diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 8c1ab7ac5..4c8cd9f37 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -23,6 +23,7 @@ accept/4, finish/3, resubmit/3, + resubmit/4, update/3, set_type_timeout/3, @@ -98,7 +99,7 @@ add(#{jtx := true} = JTx0, Type, JobId, Data, STime) -> Key = job_key(JTx, Job), case erlfdb:wait(erlfdb:get(Tx, Key)) of <<_/binary>> -> - {ok, Job1} = resubmit(JTx, Job, STime), + {ok, Job1} = resubmit(JTx, Job, STime, Data), #{seq := Seq, state := State, data := Data1} = Job1, {ok, State, Seq, Data1}; not_found -> @@ -205,8 +206,11 @@ finish(#{jtx := true} = JTx0, #{jlock := <<_/binary>>} = Job, Data) when {error, halt} end. +resubmit(JTx0, Job, NewSTime) -> + resubmit(JTx0, Job, NewSTime, undefined). -resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime) -> + +resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime, NewData) -> #{tx := Tx} = JTx = get_jtx(JTx0), #{type := Type, id := JobId} = Job, Key = job_key(JTx, Job), @@ -218,11 +222,12 @@ resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime) -> end, case job_state(JLock, Seq) of finished -> - ok = maybe_enqueue(JTx, Type, JobId, STime, true, Data), + ok = maybe_enqueue(JTx, Type, JobId, STime, true, NewData), + NewData1 = update_job_data(Data, NewData), Job1 = Job#{ seq => ?PENDING_SEQ, state => pending, - data => Data + data => NewData1 }, {ok, Job1}; pending when STime == OldSTime -> @@ -237,15 +242,16 @@ resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime) -> }, {ok, Job1}; pending -> - JV1 = JV#jv{seq = ?PENDING_SEQ, stime = STime}, + JV1 = JV#jv{seq = ?PENDING_SEQ, stime = STime, data = NewData}, set_job_val(Tx, Key, JV1), couch_jobs_pending:remove(JTx, Type, JobId, OldSTime), couch_jobs_pending:enqueue(JTx, Type, STime, JobId), + NewData1 = update_job_data(Data, NewData), Job1 = Job#{ stime => STime, seq => ?PENDING_SEQ, state => pending, - data => Data + data => NewData1 }, {ok, Job1}; running -> @@ -705,3 +711,10 @@ get_md_version_age(Version) -> update_md_version_timestamp(Version) -> Ts = erlang:system_time(second), ets:insert(?MODULE, {?MD_TIMESTAMP_ETS_KEY, Version, Ts}). + + +update_job_data(Data, undefined) -> + Data; + +update_job_data(_Data, NewData) -> + NewData. diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl index 9d8e2df50..af95eebe6 100644 --- a/src/couch_jobs/test/couch_jobs_tests.erl +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -47,7 +47,12 @@ couch_jobs_basic_test_() -> fun accept_blocking/1, fun job_processor_update/1, fun resubmit_enqueues_job/1, + fun resubmit_finished_updates_job_data/1, + fun resubmit_running_does_not_update_job_data/1, fun resubmit_custom_schedtime/1, + fun add_pending_updates_job_data/1, + fun add_finished_updates_job_data/1, + fun add_running_does_not_update_job_data/1, fun accept_max_schedtime/1, fun accept_no_schedule/1, fun subscribe/1, @@ -426,6 +431,30 @@ resubmit_enqueues_job(#{t1 := T, j1 := J}) -> end). +resubmit_finished_updates_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job1, 6, Data2)), + ?assertMatch({ok, _, Data2}, couch_jobs:accept(T)) + end). + + +resubmit_running_does_not_update_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job1, 6, Data2)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch({ok, _, Data1}, couch_jobs:accept(T)) + end). + + resubmit_custom_schedtime(#{t1 := T, j1 := J}) -> ?_test(begin ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{}, 7)), @@ -436,6 +465,40 @@ resubmit_custom_schedtime(#{t1 := T, j1 := J}) -> end). +add_pending_updates_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, Data2, 6)), + ?assertMatch({ok, _, Data2}, couch_jobs:accept(T)) + end). + + +add_finished_updates_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, Data2, 6)), + ?assertMatch({ok, _, Data2}, couch_jobs:accept(T)) + end). + + +add_running_does_not_update_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, Data2, 6)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch({ok, _, Data1}, couch_jobs:accept(T)) + end). + + accept_max_schedtime(#{t1 := T, j1 := J1, j2 := J2}) -> ?_test(begin ok = couch_jobs:add(?TX, T, J1, #{}, 5000), -- cgit v1.2.1 From b00814e58ba601a83b676c336ce2f5d82744a535 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 11 Mar 2020 01:36:59 -0700 Subject: Enable code coverage --- rebar.config | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 rebar.config diff --git a/rebar.config b/rebar.config new file mode 100644 index 000000000..e0d18443b --- /dev/null +++ b/rebar.config @@ -0,0 +1,2 @@ +{cover_enabled, true}. +{cover_print_enabled, true}. -- cgit v1.2.1 From 258ec665f5f1bcadcd8b2e87deaef72960a9684d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 11 Mar 2020 11:27:48 -0400 Subject: Fix _changes with a _view filter Use `fabric2_db:is_clustered/1` instead of `couch_db:is_clustered` --- src/chttpd/src/chttpd_changes.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl index 3a13f81e4..45c7d57b9 100644 --- a/src/chttpd/src/chttpd_changes.erl +++ b/src/chttpd/src/chttpd_changes.erl @@ -169,7 +169,7 @@ configure_filter("_view", Style, Req, Db) -> [DName, VName] -> {ok, DDoc} = open_ddoc(Db, <<"_design/", DName/binary>>), check_member_exists(DDoc, [<<"views">>, VName]), - case couch_db:is_clustered(Db) of + case fabric2_db:is_clustered(Db) of true -> DIR = fabric_util:doc_id_and_rev(DDoc), {fetch, view, Style, DIR, VName}; -- cgit v1.2.1 From c9a9bf086498ae89b3d283e178efce599f00286f Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Wed, 11 Mar 2020 18:42:32 +0100 Subject: Upgrade Credo to 1.3.0 --- mix.exs | 2 +- mix.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mix.exs b/mix.exs index 29c81fa49..bd78c30d5 100644 --- a/mix.exs +++ b/mix.exs @@ -68,7 +68,7 @@ defmodule CouchDBTest.Mixfile do {:jiffy, path: Path.expand("src/jiffy", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, - {:credo, "~> 1.2.0", only: [:dev, :test, :integration], runtime: false} + {:credo, "~> 1.3.0", only: [:dev, :test, :integration], runtime: false} ] end diff --git a/mix.lock b/mix.lock index c03e11f64..e7460a3d6 100644 --- a/mix.lock +++ b/mix.lock @@ -1,7 +1,7 @@ %{ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm", "805abd97539caf89ec6d4732c91e62ba9da0cda51ac462380bbd28ee697a8c42"}, - "credo": {:hex, :credo, "1.2.2", "f57faf60e0a12b0ba9fd4bad07966057fde162b33496c509b95b027993494aab", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8f2623cd8c895a6f4a55ef10f3fdf6a55a9ca7bef09676bd835551687bf8a740"}, + "credo": {:hex, :credo, "1.3.0", "37699fefdbe1b0480a5a6b73f259207e9cd7ad5e492277e22c2179bcb226a67b", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8036b9226e4440d3ebce3931505e407b8d59fc95975f574c26337812e8de2a86"}, "excoveralls": {:hex, :excoveralls, "0.12.1", "a553c59f6850d0aff3770e4729515762ba7c8e41eedde03208182a8dc9d0ce07", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "5c1f717066a299b1b732249e736c5da96bb4120d1e55dc2e6f442d251e18a812"}, "hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "e0100f8ef7d1124222c11ad362c857d3df7cb5f4204054f9f0f4a728666591fc"}, "httpotion": {:hex, :httpotion, "3.1.3", "fdaf1e16b9318dcb722de57e75ac368c93d4c6e3c9125f93e960f953a750fb77", [:mix], [{:ibrowse, "== 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: false]}], "hexpm", "e420172ef697a0f1f4dc40f89a319d5a3aad90ec51fa424f08c115f04192ae43"}, -- cgit v1.2.1 From ddeb2d127e0fa53a42fb2f6ce6adac802cb83ab6 Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Wed, 11 Mar 2020 18:42:44 +0100 Subject: Add new rules to .credo.exs --- .credo.exs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.credo.exs b/.credo.exs index bd26f407c..64d281e5e 100644 --- a/.credo.exs +++ b/.credo.exs @@ -110,6 +110,7 @@ {Credo.Check.Readability.StringSigils, []}, {Credo.Check.Readability.TrailingBlankLine, []}, {Credo.Check.Readability.TrailingWhiteSpace, []}, + {Credo.Check.Readability.UnnecessaryAliasExpansion, []}, {Credo.Check.Readability.VariableNames, []}, # @@ -130,6 +131,7 @@ excluded_functions: [] ]}, {Credo.Check.Refactor.UnlessWithElse, []}, + {Credo.Check.Refactor.WithClauses, []}, # ## Warnings @@ -138,7 +140,8 @@ {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, {Credo.Check.Warning.IExPry, []}, {Credo.Check.Warning.IoInspect, []}, - {Credo.Check.Warning.LazyLogging, false}, # Disabled since not compatible with Elixir > 1.9 + {Credo.Check.Warning.LazyLogging, false}, # Disabled since not compatible with Elixir > 1.9 + {Credo.Check.Warning.MixEnv, []}, {Credo.Check.Warning.OperationOnSameValues, []}, {Credo.Check.Warning.OperationWithConstantResult, []}, {Credo.Check.Warning.RaiseInsideRescue, []}, @@ -150,10 +153,12 @@ {Credo.Check.Warning.UnusedRegexOperation, []}, {Credo.Check.Warning.UnusedStringOperation, []}, {Credo.Check.Warning.UnusedTupleOperation, []}, + {Credo.Check.Warning.UnsafeExec, []}, # # Controversial and experimental checks (opt-in, just remove `, false`) # + {Credo.Check.Readability.StrictModuleLayout, false}, {Credo.Check.Consistency.MultiAliasImportRequireUse, false}, {Credo.Check.Design.DuplicatedCode, false}, {Credo.Check.Readability.Specs, false}, -- cgit v1.2.1 From 1794e146c8b3283c77fb549f75afbc96a92d62be Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 11 Mar 2020 11:58:56 -0700 Subject: Handle malformed tokens with jiffy 1.x Recent changes in how `jiffy:decode/1` handles malformed JSON has caused `jwtf:decode/3` to fail to properly return a bad request 400 response for some malformed tokens. First, this changes the name of the function to `decode_b64url_json/1`, indicating that it decodes something that has been first been JSON encoded, and then base64url encoded. More substantially, it wraps both the base64url and jiffy decoding in a try/catch block, since both can throw errors, while the former can also return an error tuple. Tests have been added to ensure all code paths are covered. --- src/jwtf.erl | 24 +++++++++++++++--------- test/jwtf_tests.erl | 24 ++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/jwtf.erl b/src/jwtf.erl index c6cc78433..8e58e0897 100644 --- a/src/jwtf.erl +++ b/src/jwtf.erl @@ -74,7 +74,7 @@ decode(EncodedToken, Checks, KS) -> try [Header, Payload, Signature] = split(EncodedToken), validate(Header, Payload, Signature, Checks, KS), - {ok, decode_json(Payload)} + {ok, decode_b64url_json(Payload)} catch throw:Error -> {error, Error} @@ -102,10 +102,10 @@ verification_algorithm(Alg) -> validate(Header0, Payload0, Signature, Checks, KS) -> - Header1 = props(decode_json(Header0)), + Header1 = props(decode_b64url_json(Header0)), validate_header(Header1, Checks), - Payload1 = props(decode_json(Payload0)), + Payload1 = props(decode_b64url_json(Payload0)), validate_payload(Payload1, Checks), Alg = prop(<<"alg">>, Header1), @@ -269,14 +269,20 @@ split(EncodedToken) -> end. -decode_json(Encoded) -> - case b64url:decode(Encoded) of - {error, Reason} -> - throw({bad_request, Reason}); - Decoded -> - jiffy:decode(Decoded) +decode_b64url_json(B64UrlEncoded) -> + try + case b64url:decode(B64UrlEncoded) of + {error, Reason} -> + throw({bad_request, Reason}); + JsonEncoded -> + jiffy:decode(JsonEncoded) + end + catch + error:Error -> + throw({bad_request, Error}) end. + props({Props}) -> Props; diff --git a/test/jwtf_tests.erl b/test/jwtf_tests.erl index 527bc327f..dcebe5f40 100644 --- a/test/jwtf_tests.erl +++ b/test/jwtf_tests.erl @@ -35,6 +35,30 @@ jwt_io_pubkey() -> public_key:pem_entry_decode(PEMEntry). +b64_badarg_test() -> + Encoded = <<"0.0.0">>, + ?assertEqual({error, {bad_request,badarg}}, + jwtf:decode(Encoded, [], nil)). + + +b64_bad_block_test() -> + Encoded = <<" aGVsbG8. aGVsbG8. aGVsbG8">>, + ?assertEqual({error, {bad_request,{bad_block,0}}}, + jwtf:decode(Encoded, [], nil)). + + +invalid_json_test() -> + Encoded = <<"fQ.fQ.fQ">>, + ?assertEqual({error, {bad_request,{1,invalid_json}}}, + jwtf:decode(Encoded, [], nil)). + + +truncated_json_test() -> + Encoded = <<"ew.ew.ew">>, + ?assertEqual({error, {bad_request,{2,truncated_json}}}, + jwtf:decode(Encoded, [], nil)). + + missing_typ_test() -> Encoded = encode({[]}, []), ?assertEqual({error, {bad_request,<<"Missing typ header parameter">>}}, -- cgit v1.2.1 From bf989eb281ae97052c7b555d68f93f46cf42aa19 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 11 Mar 2020 12:32:32 -0700 Subject: Handle spurious erlfdb future Seeing log errors like the following: CRASH REPORT Process epep_fdb_decision_cache (<0.633.0>) with 0 neighbors exited with reason: {bad_info,{#Ref<0.2506675824.3127640065.49278>,ready}} at gen_server:handle_common_reply/8(line:726) <= proc_lib:init_p_do_apply/3(line:247); initial_call: {couch_expiring_cache_server,init,['Argument__1']}, ancestors: [epep_sup,<0.596.0>], message_queue_len: 0, messages: [], links: [<0.614.0>], dictionary: [{rand_seed,{#{bits => 58,jump => #Fun,next => #Fun<..>,...},...}},...], trap_exit: false, status: running, heap_size: 2586, stack_size: 27, reductions: 7493102 This should handle those errors, and prevent the crashes. --- src/couch_expiring_cache/src/couch_expiring_cache_server.erl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl index 65e742bba..99d386485 100644 --- a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl +++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl @@ -92,6 +92,14 @@ handle_info(remove_expired, St) -> largest_elapsed := max(Elapsed, LargestElapsed), lag := NowTS - OldestTS}}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Prevent crashing server and application + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + + handle_info(Msg, St) -> {stop, {bad_info, Msg}, St}. -- cgit v1.2.1 From 27abf0e67c082518e41c264f01cc6540ef7204a6 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 11 Mar 2020 16:43:22 -0500 Subject: Send correct seq values for filtered changes If a filtered changes feed hit a rewind we would send a bare `integer()` value for the Seq. If this was used again during a rewind it causes a competely rewind to zero due to not having the `node()` and UUID `binary()` values to calculate a new start seq. --- src/fabric/src/fabric_rpc.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index 7b688b2b9..a67dcd148 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -515,7 +515,8 @@ changes_enumerator(DocInfo, Acc) -> [] -> ChangesRow = {no_pass, [ {pending, Pending-1}, - {seq, Seq}]}; + {seq, {Seq, uuid(Db), couch_db:owner_of(Epochs, Seq)}} + ]}; Results -> Opts = if Conflicts -> [conflicts | DocOptions]; true -> DocOptions end, ChangesRow = {change, [ -- cgit v1.2.1 From af2eb048cb8f8ebf4b529795f984697d0ed760c5 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 12 Mar 2020 08:45:06 +0000 Subject: Set cookie domain when DELETE'ing Closes #2655 --- src/couch/src/couch_httpd_auth.erl | 3 ++- src/couch/test/eunit/couchdb_cookie_domain_tests.erl | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 5e4450301..43ecda958 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -365,7 +365,8 @@ handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req, _AuthModule) -> end; % logout by deleting the session handle_session_req(#httpd{method='DELETE'}=Req, _AuthModule) -> - Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}] ++ cookie_scheme(Req)), + Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}] ++ + cookie_domain() ++ cookie_scheme(Req)), {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of nil -> {200, [Cookie]}; diff --git a/src/couch/test/eunit/couchdb_cookie_domain_tests.erl b/src/couch/test/eunit/couchdb_cookie_domain_tests.erl index e66ab31e6..c46352f35 100755 --- a/src/couch/test/eunit/couchdb_cookie_domain_tests.erl +++ b/src/couch/test/eunit/couchdb_cookie_domain_tests.erl @@ -43,7 +43,8 @@ cookie_test_() -> fun({ok, Url, ContentType, Payload, _}) -> [ should_set_cookie_domain(Url, ContentType, Payload), - should_not_set_cookie_domain(Url, ContentType, Payload) + should_not_set_cookie_domain(Url, ContentType, Payload), + should_delete_cookie_domain(Url, ContentType, Payload) ] end } @@ -67,3 +68,13 @@ should_not_set_cookie_domain(Url, ContentType, Payload) -> Cookie = proplists:get_value("Set-Cookie", Headers), ?assertEqual(0, string:str(Cookie, "; Domain=")) end). + +should_delete_cookie_domain(Url, ContentType, Payload) -> + ?_test(begin + ok = config:set("couch_httpd_auth", "cookie_domain", + "example.com", false), + {ok, Code, Headers, _} = test_request:delete(Url, ContentType, Payload), + ?assertEqual(200, Code), + Cookie = proplists:get_value("Set-Cookie", Headers), + ?assert(string:str(Cookie, "; Domain=example.com") > 0) + end). -- cgit v1.2.1 From 919f75c344c9737b42d36ee3403e1ae0620c5606 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 12 Mar 2020 11:58:00 +0000 Subject: add jwtf to release --- rebar.config.script | 1 + rel/reltool.config | 2 ++ 2 files changed, 3 insertions(+) diff --git a/rebar.config.script b/rebar.config.script index 1dcad566c..408ad3d48 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -132,6 +132,7 @@ SubDirs = [ "src/fabric", "src/global_changes", "src/ioq", + "src/jwtf", "src/ken", "src/mango", "src/rexi", diff --git a/rel/reltool.config b/rel/reltool.config index 5285504ba..796019298 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -51,6 +51,7 @@ ibrowse, ioq, jiffy, + jwtf, ken, khash, mango, @@ -110,6 +111,7 @@ {app, ibrowse, [{incl_cond, include}]}, {app, ioq, [{incl_cond, include}]}, {app, jiffy, [{incl_cond, include}]}, + {app, jwtf, [{incl_cond, include}]}, {app, ken, [{incl_cond, include}]}, {app, khash, [{incl_cond, include}]}, {app, mango, [{incl_cond, include}]}, -- cgit v1.2.1 From 0db143a7a86927cd503baf14e8d56ac922590b46 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 12 Mar 2020 13:40:20 -0400 Subject: Handle transaction timeouts in list_dbs and list_dbs_info Previously those endpoints would break when transactions time-out and are retried. To fix it we re-use the mechanism from changes feeds. There is a longer discussion about this on the mailing list: https://lists.apache.org/thread.html/r02cee7045cac4722e1682bb69ba0ec791f5cce025597d0099fb34033%40%3Cdev.couchdb.apache.org%3E --- src/fabric/src/fabric2_fdb.erl | 12 ++- src/fabric/test/fabric2_db_crud_tests.erl | 164 +++++++++++++++++++++++++++--- 2 files changed, 162 insertions(+), 14 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index b4a4fd6a2..8bc87926d 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -340,7 +340,11 @@ get_dir(Tx) -> erlfdb_directory:get_name(CouchDB). -list_dbs(Tx, Callback, AccIn, Options) -> +list_dbs(Tx, Callback, AccIn, Options0) -> + Options = case fabric2_util:get_value(restart_tx, Options0) of + undefined -> [{restart_tx, true} | Options0]; + _AlreadySet -> Options0 + end, LayerPrefix = get_dir(Tx), Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), fold_range({tx, Tx}, Prefix, fun({K, _V}, Acc) -> @@ -349,7 +353,11 @@ list_dbs(Tx, Callback, AccIn, Options) -> end, AccIn, Options). -list_dbs_info(Tx, Callback, AccIn, Options) -> +list_dbs_info(Tx, Callback, AccIn, Options0) -> + Options = case fabric2_util:get_value(restart_tx, Options0) of + undefined -> [{restart_tx, true} | Options0]; + _AlreadySet -> Options0 + end, LayerPrefix = get_dir(Tx), Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), fold_range({tx, Tx}, Prefix, fun({DbNameKey, DbPrefix}, Acc) -> diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 943b55f3f..6323a22bd 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -18,27 +18,61 @@ -include("fabric2_test.hrl"). +-define(PDICT_ERROR_IN_FOLD_RANGE, '$fabric2_error_in_fold_range'). +-define(PDICT_ERROR_IN_USER_FUN, '$fabric2_error_throw_in_user_fun'). + + crud_test_() -> { "Test database CRUD operations", { setup, - fun() -> test_util:start_couch([fabric]) end, - fun test_util:stop_couch/1, - with([ - ?TDEF(create_db), - ?TDEF(open_db), - ?TDEF(delete_db), - ?TDEF(list_dbs), - ?TDEF(list_dbs_user_fun), - ?TDEF(list_dbs_user_fun_partial), - ?TDEF(list_dbs_info), - ?TDEF(list_dbs_info_partial) - ]) + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(create_db), + ?TDEF_FE(open_db), + ?TDEF_FE(delete_db), + ?TDEF_FE(list_dbs), + ?TDEF_FE(list_dbs_user_fun), + ?TDEF_FE(list_dbs_user_fun_partial), + ?TDEF_FE(list_dbs_info), + ?TDEF_FE(list_dbs_info_partial), + ?TDEF_FE(list_dbs_tx_too_long), + ?TDEF_FE(list_dbs_info_tx_too_long) + ] + } } }. +setup_all() -> + Ctx = test_util:start_couch([fabric]), + meck:new(erlfdb, [passthrough]), + Ctx. + + +teardown_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +setup() -> + meck:expect(erlfdb, fold_range, fun(Tx, Start, End, Callback, Acc, Opts) -> + maybe_tx_too_long(?PDICT_ERROR_IN_FOLD_RANGE), + meck:passthrough([Tx, Start, End, Callback, Acc, Opts]) + end), + ok. + + +cleanup(_) -> + reset_error_counts(). + + create_db(_) -> DbName = ?tempdb(), ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), @@ -132,6 +166,81 @@ list_dbs_info_partial(_) -> ?assertEqual([{meta, []}], UserAcc). +list_dbs_tx_too_long(_) -> + DbName1 = ?tempdb(), + DbName2 = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), + + UserFun = fun(Row, Acc) -> + maybe_tx_too_long(?PDICT_ERROR_IN_USER_FUN), + {ok, [Row, Acc]} + end, + + % Get get expected output without any transactions timing out + Dbs = fabric2_db:list_dbs(UserFun, [], []), + + % Blow up in fold range + tx_too_long_errors(0, 1), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in fold_range after emitting one row + tx_too_long_errors(0, {1, 1}), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in user fun + tx_too_long_errors(1, 0), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in user fun after emitting one row + tx_too_long_errors({1, 1}, 0), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in in user fun and fold range + tx_too_long_errors(1, {1, 1}), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + ok = fabric2_db:delete(DbName1, []), + ok = fabric2_db:delete(DbName2, []). + + +list_dbs_info_tx_too_long(_) -> + DbName1 = ?tempdb(), + DbName2 = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), + + UserFun = fun(Row, Acc) -> + maybe_tx_too_long(?PDICT_ERROR_IN_USER_FUN), + {ok, [Row, Acc]} + end, + + {ok, DbInfos} = fabric2_db:list_dbs_info(UserFun, [], []), + + % Blow up in fold range + tx_too_long_errors(0, 1), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in fold_range after emitting one row + tx_too_long_errors(0, {1, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun + tx_too_long_errors(1, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun after emitting one row + tx_too_long_errors({1, 1}, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in in user fun and fold range + tx_too_long_errors(1, {1, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + ok = fabric2_db:delete(DbName1, []), + ok = fabric2_db:delete(DbName2, []). + + is_db_info_member(_, []) -> false; @@ -142,3 +251,34 @@ is_db_info_member(DbName, [DbInfo | RestInfos]) -> _E -> is_db_info_member(DbName, RestInfos) end. + + +tx_too_long_errors(UserFunCount, FoldErrors) when is_integer(UserFunCount) -> + tx_too_long_errors({0, UserFunCount}, FoldErrors); + +tx_too_long_errors(UserFunErrors, FoldCount) when is_integer(FoldCount) -> + tx_too_long_errors(UserFunErrors, {0, FoldCount}); + +tx_too_long_errors({UserFunSkip, UserFunCount}, {FoldSkip, FoldCount}) -> + reset_error_counts(), + put(?PDICT_ERROR_IN_USER_FUN, {UserFunSkip, UserFunCount}), + put(?PDICT_ERROR_IN_FOLD_RANGE, {FoldSkip, FoldCount}). + + +reset_error_counts() -> + erase(?PDICT_ERROR_IN_FOLD_RANGE), + erase(?PDICT_ERROR_IN_USER_FUN). + + +maybe_tx_too_long(Key) -> + case get(Key) of + {Skip, Count} when is_integer(Skip), Skip > 0 -> + put(Key, {Skip - 1, Count}); + {0, Count} when is_integer(Count), Count > 0 -> + put(Key, {0, Count - 1}), + error({erlfdb_error, 1007}); + {0, 0} -> + ok; + undefined -> + ok + end. -- cgit v1.2.1 From 39b9cc7e741f6b3b9a1f08e7aff8f3e9d0b14325 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 13 Mar 2020 10:33:13 +0000 Subject: Enhance alg check The "alg" check can now take list of algorithms that are supported, which must be from the valid list of algorithms. --- src/jwtf/src/jwtf.erl | 7 ++++--- src/jwtf/test/jwtf_tests.erl | 12 +++++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/jwtf/src/jwtf.erl b/src/jwtf/src/jwtf.erl index 8e58e0897..0bdc0aa1a 100644 --- a/src/jwtf/src/jwtf.erl +++ b/src/jwtf/src/jwtf.erl @@ -139,10 +139,11 @@ validate_alg(Props, Checks) -> case {Required, Alg} of {undefined, _} -> ok; - {true, undefined} -> + {Required, undefined} when Required /= undefined -> throw({bad_request, <<"Missing alg header parameter">>}); - {true, Alg} -> - case lists:member(Alg, valid_algorithms()) of + {Required, Alg} when Required == true; is_list(Required) -> + AllowedAlg = if Required == true -> true; true -> lists:member(Alg, Required) end, + case AllowedAlg andalso lists:member(Alg, valid_algorithms()) of true -> ok; false -> diff --git a/src/jwtf/test/jwtf_tests.erl b/src/jwtf/test/jwtf_tests.erl index dcebe5f40..222bb4792 100644 --- a/src/jwtf/test/jwtf_tests.erl +++ b/src/jwtf/test/jwtf_tests.erl @@ -82,6 +82,16 @@ invalid_alg_test() -> ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, jwtf:decode(Encoded, [alg], nil)). +not_allowed_alg_test() -> + Encoded = encode({[{<<"alg">>, <<"HS256">>}]}, []), + ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, + jwtf:decode(Encoded, [{alg, [<<"RS256">>]}], nil)). + +reject_unknown_alg_test() -> + Encoded = encode({[{<<"alg">>, <<"NOPE">>}]}, []), + ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, + jwtf:decode(Encoded, [{alg, [<<"NOPE">>]}], nil)). + missing_iss_test() -> Encoded = encode(valid_header(), {[]}), @@ -176,7 +186,7 @@ hs256_test() -> "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, KS = fun(<<"HS256">>, <<"123456">>) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], + Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, {alg, [<<"HS256">>]}, kid], ?assertMatch({ok, _}, catch jwtf:decode(EncodedToken, Checks, KS)). -- cgit v1.2.1 From 0cd9c26b201ff626f794058b94ed06e496f5baa3 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 13 Mar 2020 18:28:51 -0400 Subject: Refactor some of the "tx_too_old" tests * There was a good amount of duplication between `_db_crud_tests` and `_changes_fold_tests`, so make a common test utility module so both suites can use. * Clean up test names. Previously some were named `tx_too_long` but since the official FDB error is `transaction_too_old` rename them to match a bit better. * `list_dbs_info` implementation queue of 100 futures to parallelize fetching. So its test was update to create more than 100 dbs. Creating 100 dbs took about 3 seconds so add a small parallel map (pmap) utility function to help with that. --- src/fabric/src/fabric2_util.erl | 49 ++++++++++- src/fabric/test/fabric2_changes_fold_tests.erl | 103 ++++++++--------------- src/fabric/test/fabric2_db_crud_tests.erl | 112 +++++++++++-------------- src/fabric/test/fabric2_test_util.erl | 76 +++++++++++++++++ 4 files changed, 205 insertions(+), 135 deletions(-) create mode 100644 src/fabric/test/fabric2_test_util.erl diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index a4faf3987..46f9abeef 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -37,7 +37,10 @@ from_hex/1, uuid/0, - encode_all_doc_key/1 + encode_all_doc_key/1, + + pmap/2, + pmap/3 ]). @@ -298,3 +301,47 @@ encode_all_doc_key(N) when is_number(N) -> <<>>; encode_all_doc_key(B) when is_binary(B) -> B; encode_all_doc_key(L) when is_list(L) -> <<255>>; encode_all_doc_key({O}) when is_list(O) -> <<255>>. + + +pmap(Fun, Args) -> + pmap(Fun, Args, []). + + +pmap(Fun, Args, Opts) -> + Refs = lists:map(fun(Arg) -> + {_, Ref} = spawn_monitor(fun() -> exit(pmap_exec(Fun, Arg)) end), + Ref + end, Args), + Timeout = fabric2_util:get_value(timeout, Opts, 5000), + lists:map(fun(Ref) -> + receive + {'DOWN', Ref, _, _, {'$res', Res}} -> + Res; + {'DOWN', Ref, _, _, {'$err', Tag, Reason, Stack}} -> + erlang:raise(Tag, Reason, Stack) + after Timeout -> + error({pmap_timeout, Timeout}) + end + end, Refs). + + +% OTP_RELEASE is defined in OTP 21+ only +-ifdef(OTP_RELEASE). + +pmap_exec(Fun, Arg) -> + try + {'$res', Fun(Arg)} + catch Tag:Reason:Stack -> + {'$err', Tag, Reason, Stack} + end. + +-else. + +pmap_exec(Fun, Arg) -> + try + {'$res', Fun(Arg)} + catch Tag:Reason -> + {'$err', Tag, Reason, erlang:get_stacktrace()} + end. + +-endif. diff --git a/src/fabric/test/fabric2_changes_fold_tests.erl b/src/fabric/test/fabric2_changes_fold_tests.erl index fddf1802b..8541d973c 100644 --- a/src/fabric/test/fabric2_changes_fold_tests.erl +++ b/src/fabric/test/fabric2_changes_fold_tests.erl @@ -21,9 +21,6 @@ -define(DOC_COUNT, 25). --define(PDICT_ERROR_IN_FOLD_RANGE, '$fabric2_error_in_fold_range'). --define(PDICT_ERROR_IN_USER_FUN, '$fabric2_error_throw_in_user_fun'). - changes_fold_test_() -> { @@ -43,10 +40,10 @@ changes_fold_test_() -> ?TDEF_FE(fold_changes_basic_rev), ?TDEF_FE(fold_changes_since_now_rev), ?TDEF_FE(fold_changes_since_seq_rev), - ?TDEF_FE(fold_changes_basic_tx_too_long), - ?TDEF_FE(fold_changes_reverse_tx_too_long), - ?TDEF_FE(fold_changes_tx_too_long_with_single_row_emits), - ?TDEF_FE(fold_changes_since_seq_tx_too_long), + ?TDEF_FE(fold_changes_basic_tx_too_old), + ?TDEF_FE(fold_changes_reverse_tx_too_old), + ?TDEF_FE(fold_changes_tx_too_old_with_single_row_emits), + ?TDEF_FE(fold_changes_since_seq_tx_too_old), ?TDEF_FE(fold_changes_not_progressing) ] } @@ -66,10 +63,7 @@ teardown_all(Ctx) -> setup() -> - meck:expect(erlfdb, fold_range, fun(Tx, Start, End, Callback, Acc, Opts) -> - maybe_tx_too_long(?PDICT_ERROR_IN_FOLD_RANGE), - meck:passthrough([Tx, Start, End, Callback, Acc, Opts]) - end), + fabric2_test_util:tx_too_old_mock_erlfdb(), {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), Rows = lists:map(fun(Val) -> DocId = fabric2_util:uuid(), @@ -90,7 +84,7 @@ setup() -> cleanup({Db, _DocIdRevs}) -> - reset_error_counts(), + fabric2_test_util:tx_too_old_reset_errors(), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -130,131 +124,114 @@ fold_changes_since_seq_rev({Db, DocRows}) -> fold_changes_since_seq_rev({Db, RestRows}). -fold_changes_basic_tx_too_long({Db, DocRows0}) -> +fold_changes_basic_tx_too_old({Db, DocRows0}) -> DocRows = lists:reverse(DocRows0), - tx_too_long_errors(0, 1), + fabric2_test_util:tx_too_old_setup_errors(0, 1), ?assertEqual(DocRows, changes(Db)), - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual(DocRows, changes(Db)), % Blow up in user fun but after emitting one row successfully. - tx_too_long_errors({1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), ?assertEqual(DocRows, changes(Db)), % Blow up before last document - tx_too_long_errors({?DOC_COUNT - 1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), ?assertEqual(DocRows, changes(Db)), % Emit one value, then blow up in user function and then blow up twice in % fold_range. But it is not enough to stop the iteration. - tx_too_long_errors({1, 1}, {1, 2}), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 2}), ?assertEqual(DocRows, changes(Db)). -fold_changes_reverse_tx_too_long({Db, DocRows}) -> +fold_changes_reverse_tx_too_old({Db, DocRows}) -> Opts = [{dir, rev}], - tx_too_long_errors(0, 1), + fabric2_test_util:tx_too_old_setup_errors(0, 1), ?assertEqual([], changes(Db, 0, Opts)), - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual([], changes(Db, 0, Opts)), - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual(DocRows, changes(Db, now, Opts)), - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual(DocRows, changes(Db, now, Opts)), % Blow up in user fun but after emitting one row successfully. - tx_too_long_errors({1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), ?assertEqual(DocRows, changes(Db, now, Opts)), % Blow up before last document - tx_too_long_errors({?DOC_COUNT - 1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), ?assertEqual(DocRows, changes(Db, now, Opts)), % Emit value, blow up in user function, and twice in fold_range - tx_too_long_errors({1, 1}, {1, 2}), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 2}), ?assertEqual(DocRows, changes(Db, now, Opts)). -fold_changes_tx_too_long_with_single_row_emits({Db, DocRows0}) -> +fold_changes_tx_too_old_with_single_row_emits({Db, DocRows0}) -> % This test does a few basic operations while forcing erlfdb range fold to % emit a single row at a time, thus forcing it to use continuations while % also inducing tx errors Opts = [{target_bytes, 1}], DocRows = lists:reverse(DocRows0), - tx_too_long_errors(0, 1), + fabric2_test_util:tx_too_old_setup_errors(0, 1), ?assertEqual(DocRows, changes(Db, 0, Opts)), - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual(DocRows, changes(Db, 0, Opts)), % Blow up in user fun but after emitting one row successfully. - tx_too_long_errors({1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), ?assertEqual(DocRows, changes(Db, 0, Opts)), % Blow up before last document - tx_too_long_errors({?DOC_COUNT - 1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), ?assertEqual(DocRows, changes(Db, 0, Opts)). -fold_changes_since_seq_tx_too_long({Db, Rows}) -> +fold_changes_since_seq_tx_too_old({Db, Rows}) -> % Blow up after after a successful emit, then twice % in range fold call. Also re-use already existing basic % fold_changes_since_seq test function. - tx_too_long_errors({1, 1}, {1, 2}), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 2}), fold_changes_since_seq({Db, Rows}). fold_changes_not_progressing({Db, _}) -> % Fail in first fold range call. - tx_too_long_errors(5, 0), + fabric2_test_util:tx_too_old_setup_errors(5, 0), ?assertError(fold_range_not_progressing, changes(Db)), % Fail in first user fun call. - tx_too_long_errors(0, 5), + fabric2_test_util:tx_too_old_setup_errors(0, 5), ?assertError(fold_range_not_progressing, changes(Db)), % Blow up in last user fun call - tx_too_long_errors({?DOC_COUNT - 1, 5}, 0), + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 5}, 0), ?assertError(fold_range_not_progressing, changes(Db)), % Blow up in user function after one success. - tx_too_long_errors({1, 5}, 0), + fabric2_test_util:tx_too_old_setup_errors({1, 5}, 0), ?assertError(fold_range_not_progressing, changes(Db)), % Emit value, blow up in user function, then keep blowing up in fold_range. - tx_too_long_errors({1, 1}, {1, 4}), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 4}), ?assertError(fold_range_not_progressing, changes(Db)). fold_fun(#{} = Change, Acc) -> - maybe_tx_too_long(?PDICT_ERROR_IN_USER_FUN), + fabric2_test_util:tx_too_old_raise_in_user_fun(), {ok, [Change | Acc]}. -tx_too_long_errors(UserFunCount, FoldErrors) when is_integer(UserFunCount) -> - tx_too_long_errors({0, UserFunCount}, FoldErrors); - -tx_too_long_errors(UserFunErrors, FoldCount) when is_integer(FoldCount) -> - tx_too_long_errors(UserFunErrors, {0, FoldCount}); - -tx_too_long_errors({UserFunSkip, UserFunCount}, {FoldSkip, FoldCount}) -> - reset_error_counts(), - put(?PDICT_ERROR_IN_USER_FUN, {UserFunSkip, UserFunCount}), - put(?PDICT_ERROR_IN_FOLD_RANGE, {FoldSkip, FoldCount}). - - -reset_error_counts() -> - erase(?PDICT_ERROR_IN_FOLD_RANGE), - erase(?PDICT_ERROR_IN_USER_FUN). - - changes(Db) -> changes(Db, 0, []). @@ -262,17 +239,3 @@ changes(Db) -> changes(Db, Since, Opts) -> {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, [], Opts), Rows. - - -maybe_tx_too_long(Key) -> - case get(Key) of - {Skip, Count} when is_integer(Skip), Skip > 0 -> - put(Key, {Skip - 1, Count}); - {0, Count} when is_integer(Count), Count > 0 -> - put(Key, {0, Count - 1}), - error({erlfdb_error, 1007}); - {0, 0} -> - ok; - undefined -> - ok - end. diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 6323a22bd..c0a65ebd8 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -18,10 +18,6 @@ -include("fabric2_test.hrl"). --define(PDICT_ERROR_IN_FOLD_RANGE, '$fabric2_error_in_fold_range'). --define(PDICT_ERROR_IN_USER_FUN, '$fabric2_error_throw_in_user_fun'). - - crud_test_() -> { "Test database CRUD operations", @@ -42,8 +38,8 @@ crud_test_() -> ?TDEF_FE(list_dbs_user_fun_partial), ?TDEF_FE(list_dbs_info), ?TDEF_FE(list_dbs_info_partial), - ?TDEF_FE(list_dbs_tx_too_long), - ?TDEF_FE(list_dbs_info_tx_too_long) + ?TDEF_FE(list_dbs_tx_too_old), + ?TDEF_FE(list_dbs_info_tx_too_old) ] } } @@ -62,15 +58,11 @@ teardown_all(Ctx) -> setup() -> - meck:expect(erlfdb, fold_range, fun(Tx, Start, End, Callback, Acc, Opts) -> - maybe_tx_too_long(?PDICT_ERROR_IN_FOLD_RANGE), - meck:passthrough([Tx, Start, End, Callback, Acc, Opts]) - end), - ok. + fabric2_test_util:tx_too_old_mock_erlfdb(). cleanup(_) -> - reset_error_counts(). + fabric2_test_util:tx_too_old_reset_errors(). create_db(_) -> @@ -166,14 +158,14 @@ list_dbs_info_partial(_) -> ?assertEqual([{meta, []}], UserAcc). -list_dbs_tx_too_long(_) -> +list_dbs_tx_too_old(_) -> DbName1 = ?tempdb(), DbName2 = ?tempdb(), ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), UserFun = fun(Row, Acc) -> - maybe_tx_too_long(?PDICT_ERROR_IN_USER_FUN), + fabric2_test_util:tx_too_old_raise_in_user_fun(), {ok, [Row, Acc]} end, @@ -181,64 +173,87 @@ list_dbs_tx_too_long(_) -> Dbs = fabric2_db:list_dbs(UserFun, [], []), % Blow up in fold range - tx_too_long_errors(0, 1), + fabric2_test_util:tx_too_old_setup_errors(0, 1), ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), % Blow up in fold_range after emitting one row - tx_too_long_errors(0, {1, 1}), + fabric2_test_util:tx_too_old_setup_errors(0, {1, 1}), ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), % Blow up in user fun - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(2, 2), ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), % Blow up in user fun after emitting one row - tx_too_long_errors({1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), % Blow up in in user fun and fold range - tx_too_long_errors(1, {1, 1}), + fabric2_test_util:tx_too_old_setup_errors(1, {1, 1}), ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), ok = fabric2_db:delete(DbName1, []), ok = fabric2_db:delete(DbName2, []). -list_dbs_info_tx_too_long(_) -> - DbName1 = ?tempdb(), - DbName2 = ?tempdb(), - ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), - ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), +list_dbs_info_tx_too_old(_) -> + % list_dbs_info uses a queue of 100 futures to fetch db infos in parallel + % so create more than 100 dbs so make sure we have 100+ dbs in our test + + DbCount = 101, + DbNames = fabric2_util:pmap(fun(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + DbName + end, lists:seq(1, DbCount)), UserFun = fun(Row, Acc) -> - maybe_tx_too_long(?PDICT_ERROR_IN_USER_FUN), + fabric2_test_util:tx_too_old_raise_in_user_fun(), {ok, [Row, Acc]} end, + % This is the expected return with no tx timeouts {ok, DbInfos} = fabric2_db:list_dbs_info(UserFun, [], []), - % Blow up in fold range - tx_too_long_errors(0, 1), + % Blow up in fold range on the first call + fabric2_test_util:tx_too_old_setup_errors(0, 1), ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), % Blow up in fold_range after emitting one row - tx_too_long_errors(0, {1, 1}), + fabric2_test_util:tx_too_old_setup_errors(0, {1, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in fold_range after emitting 99 rows + fabric2_test_util:tx_too_old_setup_errors(0, {DbCount - 2, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in fold_range after emitting 100 rows + fabric2_test_util:tx_too_old_setup_errors(0, {DbCount - 1, 1}), ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), % Blow up in user fun - tx_too_long_errors(1, 0), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), % Blow up in user fun after emitting one row - tx_too_long_errors({1, 1}, 0), + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun after emitting 99 rows + fabric2_test_util:tx_too_old_setup_errors({DbCount - 2, 1}, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun after emitting 100 rows + fabric2_test_util:tx_too_old_setup_errors({DbCount - 1, 1}, 0), ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), % Blow up in in user fun and fold range - tx_too_long_errors(1, {1, 1}), + fabric2_test_util:tx_too_old_setup_errors(1, {1, 1}), ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), - ok = fabric2_db:delete(DbName1, []), - ok = fabric2_db:delete(DbName2, []). + fabric2_util:pmap(fun(DbName) -> + ?assertEqual(ok, fabric2_db:delete(DbName, [])) + end, DbNames). is_db_info_member(_, []) -> @@ -251,34 +266,3 @@ is_db_info_member(DbName, [DbInfo | RestInfos]) -> _E -> is_db_info_member(DbName, RestInfos) end. - - -tx_too_long_errors(UserFunCount, FoldErrors) when is_integer(UserFunCount) -> - tx_too_long_errors({0, UserFunCount}, FoldErrors); - -tx_too_long_errors(UserFunErrors, FoldCount) when is_integer(FoldCount) -> - tx_too_long_errors(UserFunErrors, {0, FoldCount}); - -tx_too_long_errors({UserFunSkip, UserFunCount}, {FoldSkip, FoldCount}) -> - reset_error_counts(), - put(?PDICT_ERROR_IN_USER_FUN, {UserFunSkip, UserFunCount}), - put(?PDICT_ERROR_IN_FOLD_RANGE, {FoldSkip, FoldCount}). - - -reset_error_counts() -> - erase(?PDICT_ERROR_IN_FOLD_RANGE), - erase(?PDICT_ERROR_IN_USER_FUN). - - -maybe_tx_too_long(Key) -> - case get(Key) of - {Skip, Count} when is_integer(Skip), Skip > 0 -> - put(Key, {Skip - 1, Count}); - {0, Count} when is_integer(Count), Count > 0 -> - put(Key, {0, Count - 1}), - error({erlfdb_error, 1007}); - {0, 0} -> - ok; - undefined -> - ok - end. diff --git a/src/fabric/test/fabric2_test_util.erl b/src/fabric/test/fabric2_test_util.erl new file mode 100644 index 000000000..acbe252b1 --- /dev/null +++ b/src/fabric/test/fabric2_test_util.erl @@ -0,0 +1,76 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_test_util). + + +-export([ + tx_too_old_mock_erlfdb/0, + tx_too_old_setup_errors/2, + tx_too_old_reset_errors/0, + tx_too_old_raise_in_user_fun/0 +]). + + +-define(PDICT_ERROR_IN_FOLD_RANGE, '$fabric2_error_in_fold_range'). +-define(PDICT_ERROR_IN_USER_FUN, '$fabric2_error_throw_in_user_fun'). + + +% Set of function to test scenarios where the FDB throws transaction_too_long +% (1007) errors. The general pattern is to call tx_too_old_mock_erlfdb() in +% setup. Then, before tests call tx_too_old_setup_errors(UserErrs, FoldErrs) +% which will set how and when the error will be thrown. + +tx_too_old_mock_erlfdb() -> + meck:expect(erlfdb, fold_range, fun(Tx, Start, End, Callback, Acc, Opts) -> + MockFun = fun(Row, InnerAcc) -> + maybe_tx_too_old(?PDICT_ERROR_IN_FOLD_RANGE), + Callback(Row, InnerAcc) + end, + meck:passthrough([Tx, Start, End, MockFun, Acc, Opts]) + end). + + +tx_too_old_setup_errors(UserCnt, FoldErrs) when is_integer(UserCnt) -> + tx_too_old_setup_errors({0, UserCnt}, FoldErrs); + +tx_too_old_setup_errors(UserErrs, FoldCnt) when is_integer(FoldCnt) -> + tx_too_old_setup_errors(UserErrs, {0, FoldCnt}); + +tx_too_old_setup_errors({UserSkip, UserCnt}, {FoldSkip, FoldCnt}) -> + put(?PDICT_ERROR_IN_USER_FUN, {UserSkip, UserCnt}), + put(?PDICT_ERROR_IN_FOLD_RANGE, {FoldSkip, FoldCnt}). + + +tx_too_old_reset_errors() -> + erase(?PDICT_ERROR_IN_FOLD_RANGE), + erase(?PDICT_ERROR_IN_USER_FUN). + + +tx_too_old_raise_in_user_fun() -> + maybe_tx_too_old(?PDICT_ERROR_IN_USER_FUN). + + +% Private functions + +maybe_tx_too_old(Key) -> + case get(Key) of + {Skip, Count} when is_integer(Skip), Skip > 0 -> + put(Key, {Skip - 1, Count}); + {0, Count} when is_integer(Count), Count > 0 -> + put(Key, {0, Count - 1}), + error({erlfdb_error, 1007}); + {0, 0} -> + ok; + undefined -> + ok + end. -- cgit v1.2.1 From 1f54b1419342c5182a5fd17863020e08137e479d Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 16 Mar 2020 16:21:57 +0100 Subject: Port elixir proxyauth tests from js to elixir (#2660) * Add support for specify a custom config file for CouchDB startup during testing * Port proxyauth test from js to elixir --- Makefile | 5 +- dev/run | 27 ++++++ src/mango/test/user_docs.py | 2 +- test/elixir/README.md | 2 +- test/elixir/lib/couch.ex | 4 +- test/elixir/test/config/test-config.ini | 2 + test/elixir/test/proxyauth_test.exs | 163 ++++++++++++++++++++++++++++++++ test/elixir/test/users_db_test.exs | 3 +- test/javascript/tests/proxyauth.js | 51 +++++----- 9 files changed, 227 insertions(+), 32 deletions(-) create mode 100644 test/elixir/test/config/test-config.ini create mode 100644 test/elixir/test/proxyauth_test.exs diff --git a/Makefile b/Makefile index e229ee55b..7d56dd1ab 100644 --- a/Makefile +++ b/Makefile @@ -223,7 +223,10 @@ python-black-update: .venv/bin/black elixir: export MIX_ENV=integration elixir: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 elixir: elixir-init elixir-check-formatted elixir-credo devclean - @dev/run "$(TEST_OPTS)" -a adm:pass -n 1 --enable-erlang-views --no-eval 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' + @dev/run "$(TEST_OPTS)" -a adm:pass -n 1 \ + --enable-erlang-views \ + --locald-config test/elixir/test/config/test-config.ini \ + --no-eval 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' .PHONY: elixir-init elixir-init: MIX_ENV=test diff --git a/dev/run b/dev/run index a96817d83..573c80c9b 100755 --- a/dev/run +++ b/dev/run @@ -211,6 +211,14 @@ def get_args_parser(): default=None, help="Extra arguments to pass to beam process", ) + parser.add_option( + "-l", + "--locald-config", + dest="locald_configs", + action="append", + default=[], + help="Path to config to place in 'local.d'. Can be repeated", + ) return parser @@ -238,6 +246,7 @@ def setup_context(opts, args): "reset_logs": True, "procs": [], "auto_ports": opts.auto_ports, + "locald_configs": opts.locald_configs, } @@ -279,9 +288,24 @@ def setup_configs(ctx): "_default": "", } write_config(ctx, node, env) + write_locald_configs(ctx, node, env) generate_haproxy_config(ctx) +def write_locald_configs(ctx, node, env): + for locald_config in ctx["locald_configs"]: + config_src = os.path.join(ctx["rootdir"], locald_config) + if os.path.exists(config_src): + config_filename = os.path.basename(config_src) + config_tgt = os.path.join( + ctx["devdir"], "lib", node, "etc", "local.d", config_filename + ) + with open(config_src) as handle: + content = handle.read() + with open(config_tgt, "w") as handle: + handle.write(content) + + def generate_haproxy_config(ctx): haproxy_config = os.path.join(ctx["devdir"], "lib", "haproxy.cfg") template = os.path.join(ctx["rootdir"], "rel", "haproxy.cfg") @@ -382,6 +406,8 @@ def write_config(ctx, node, env): with open(tgt, "w") as handle: handle.write(content) + ensure_dir_exists(etc_tgt, "local.d") + def boot_haproxy(ctx): if not ctx["with_haproxy"]: @@ -580,6 +606,7 @@ def boot_node(ctx, node): "-couch_ini", os.path.join(node_etcdir, "default.ini"), os.path.join(node_etcdir, "local.ini"), + os.path.join(node_etcdir, "local.d"), "-reltool_config", os.path.join(reldir, "reltool.config"), "-parent_pid", diff --git a/src/mango/test/user_docs.py b/src/mango/test/user_docs.py index 316ca7841..617b430c7 100644 --- a/src/mango/test/user_docs.py +++ b/src/mango/test/user_docs.py @@ -60,7 +60,7 @@ def setup_users(db, **kwargs): def teardown_users(db): - [db.delete_doc(doc['_id']) for doc in USERS_DOCS] + [db.delete_doc(doc["_id"]) for doc in USERS_DOCS] def setup(db, index_type="view", **kwargs): diff --git a/test/elixir/README.md b/test/elixir/README.md index 1fc0ce630..2806cfb7a 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -60,7 +60,7 @@ X means done, - means partially - [X] Port lots_of_docs.js - [ ] Port method_override.js - [X] Port multiple_rows.js - - [ ] Port proxyauth.js + - [X] Port proxyauth.js - [ ] Port purge.js - [ ] Port reader_acl.js - [ ] Port recreate_doc.js diff --git a/test/elixir/lib/couch.ex b/test/elixir/lib/couch.ex index 3aef07f01..7819299cc 100644 --- a/test/elixir/lib/couch.ex +++ b/test/elixir/lib/couch.ex @@ -127,8 +127,8 @@ defmodule Couch do def set_auth_options(options) do if Keyword.get(options, :cookie) == nil do headers = Keyword.get(options, :headers, []) - - if headers[:basic_auth] != nil or headers[:authorization] != nil do + if headers[:basic_auth] != nil or headers[:authorization] != nil + or List.keymember?(headers, :"X-Auth-CouchDB-UserName", 0) do options else username = System.get_env("EX_USERNAME") || "adm" diff --git a/test/elixir/test/config/test-config.ini b/test/elixir/test/config/test-config.ini new file mode 100644 index 000000000..72a13a707 --- /dev/null +++ b/test/elixir/test/config/test-config.ini @@ -0,0 +1,2 @@ +[chttpd] +authentication_handlers = {chttpd_auth, proxy_authentication_handler}, {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} diff --git a/test/elixir/test/proxyauth_test.exs b/test/elixir/test/proxyauth_test.exs new file mode 100644 index 000000000..6f2d49a53 --- /dev/null +++ b/test/elixir/test/proxyauth_test.exs @@ -0,0 +1,163 @@ +defmodule ProxyAuthTest do + use CouchTestCase + + @moduletag :authentication + + @tag :with_db + test "proxy auth with secret", context do + db_name = context[:db_name] + + design_doc = %{ + _id: "_design/test", + language: "javascript", + shows: %{ + welcome: """ + function(doc,req) { + return "Welcome " + req.userCtx["name"]; + } + """, + role: """ + function(doc, req) { + return req.userCtx['roles'][0]; + } + """ + } + } + + {:ok, _} = create_doc(db_name, design_doc) + + users_db_name = random_db_name() + create_db(users_db_name) + + secret = generate_secret(64) + + server_config = [ + %{ + :section => "chttpd_auth", + :key => "authentication_db", + :value => users_db_name + }, + %{ + :section => "couch_httpd_auth", + :key => "proxy_use_secret", + :value => "true" + }, + %{ + :section => "couch_httpd_auth", + :key => "secret", + :value => secret + } + ] + + run_on_modified_server(server_config, fn -> + test_fun(db_name, users_db_name, secret) + end) + delete_db(users_db_name) + end + + defp generate_secret(len) do + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + |> String.splitter("", trim: true) + |> Enum.take_random(len) + |> Enum.join("") + end + + defp hex_hmac_sha1(secret, message) do + signature = :crypto.hmac(:sha, secret, message) + Base.encode16(signature, case: :lower) + end + + def test_fun(db_name, users_db_name, secret) do + user = prepare_user_doc(name: "couch@apache.org", password: "test") + create_doc(users_db_name, user) + + resp = + Couch.get("/_session", + headers: [authorization: "Basic Y291Y2hAYXBhY2hlLm9yZzp0ZXN0"] + ) + + assert resp.body["userCtx"]["name"] == "couch@apache.org" + assert resp.body["info"]["authenticated"] == "default" + + headers = [ + "X-Auth-CouchDB-UserName": "couch@apache.org", + "X-Auth-CouchDB-Roles": "test", + "X-Auth-CouchDB-Token": hex_hmac_sha1(secret, "couch@apache.org") + ] + resp = Couch.get("/#{db_name}/_design/test/_show/welcome", headers: headers) + assert resp.body == "Welcome couch@apache.org" + + resp = Couch.get("/#{db_name}/_design/test/_show/role", headers: headers) + assert resp.body == "test" + end + + @tag :with_db + test "proxy auth without secret", context do + db_name = context[:db_name] + + design_doc = %{ + _id: "_design/test", + language: "javascript", + shows: %{ + welcome: """ + function(doc,req) { + return "Welcome " + req.userCtx["name"]; + } + """, + role: """ + function(doc, req) { + return req.userCtx['roles'][0]; + } + """ + } + } + + {:ok, _} = create_doc(db_name, design_doc) + + users_db_name = random_db_name() + create_db(users_db_name) + + server_config = [ + %{ + :section => "chttpd_auth", + :key => "authentication_db", + :value => users_db_name + }, + %{ + :section => "couch_httpd_auth", + :key => "proxy_use_secret", + :value => "false" + } + ] + + run_on_modified_server(server_config, fn -> + test_fun_no_secret(db_name, users_db_name) + end) + + delete_db(users_db_name) + end + + def test_fun_no_secret(db_name, users_db_name) do + user = prepare_user_doc(name: "couch@apache.org", password: "test") + create_doc(users_db_name, user) + + resp = + Couch.get("/_session", + headers: [authorization: "Basic Y291Y2hAYXBhY2hlLm9yZzp0ZXN0"] + ) + + assert resp.body["userCtx"]["name"] == "couch@apache.org" + assert resp.body["info"]["authenticated"] == "default" + + headers = [ + "X-Auth-CouchDB-UserName": "couch@apache.org", + "X-Auth-CouchDB-Roles": "test" + ] + + resp = Couch.get("/#{db_name}/_design/test/_show/welcome", headers: headers) + assert resp.body == "Welcome couch@apache.org" + + resp = Couch.get("/#{db_name}/_design/test/_show/role", headers: headers) + assert resp.body == "test" + end +end diff --git a/test/elixir/test/users_db_test.exs b/test/elixir/test/users_db_test.exs index 71ab2f7e7..1d34d8c9e 100644 --- a/test/elixir/test/users_db_test.exs +++ b/test/elixir/test/users_db_test.exs @@ -147,7 +147,8 @@ defmodule UsersDbTest do assert resp.body["userCtx"]["name"] == "jchris@apache.org" assert resp.body["info"]["authenticated"] == "default" assert resp.body["info"]["authentication_db"] == @users_db_name - assert resp.body["info"]["authentication_handlers"] == ["cookie", "default"] + assert Enum.member?(resp.body["info"]["authentication_handlers"], "cookie") + assert Enum.member?(resp.body["info"]["authentication_handlers"], "default") resp = Couch.get( diff --git a/test/javascript/tests/proxyauth.js b/test/javascript/tests/proxyauth.js index cc75faaf3..a91f28c32 100644 --- a/test/javascript/tests/proxyauth.js +++ b/test/javascript/tests/proxyauth.js @@ -9,12 +9,11 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - - - + +couchTests.elixir = true; couchTests.proxyauth = function(debug) { // this test proxy authentification handler - + return console.log('done in test/elixir/test/proxyauth_test.exs'); var users_db_name = get_random_db_name(); var usersDb = new CouchDB(users_db_name, {"X-Couch-Full-Commit":"false"}); usersDb.createDb(); @@ -22,9 +21,9 @@ couchTests.proxyauth = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); db.createDb(); - + if (debug) debugger; - + // Simple secret key generator function generateSecret(length) { var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -34,16 +33,16 @@ couchTests.proxyauth = function(debug) { } return secret; } - + var secret = generateSecret(64); - + function TestFun() { - + var benoitcUserDoc = CouchDB.prepareUserDoc({ name: "benoitc@apache.org" }, "test"); T(usersDb.save(benoitcUserDoc).ok); - + T(CouchDB.session().userCtx.name == null); // test that you can use basic auth aginst the users db @@ -54,20 +53,20 @@ couchTests.proxyauth = function(debug) { }); T(s.userCtx.name == "benoitc@apache.org"); T(s.info.authenticated == "default"); - + CouchDB.logout(); -/* XXX: None of the rest of this is supported yet in 2.0 +/* XXX: None of the rest of this is supported yet in 2.0 var headers = { "X-Auth-CouchDB-UserName": "benoitc@apache.org", "X-Auth-CouchDB-Roles": "test", "X-Auth-CouchDB-Token": hex_hmac_sha1(secret, "benoitc@apache.org") }; - + var designDoc = { _id:"_design/test", language: "javascript", - + shows: { "welcome": stringFun(function(doc,req) { return "Welcome " + req.userCtx["name"]; @@ -79,53 +78,53 @@ couchTests.proxyauth = function(debug) { }; db.save(designDoc); - + var req = CouchDB.request("GET", "/" + db_name + "/_design/test/_show/welcome", {headers: headers}); T(req.responseText == "Welcome benoitc@apache.org", req.responseText); - + req = CouchDB.request("GET", "/" + db_name + "/_design/test/_show/role", {headers: headers}); T(req.responseText == "test"); - + var xhr = CouchDB.request("PUT", "/_node/node1@127.0.0.1/_config/couch_httpd_auth/proxy_use_secret",{ body : JSON.stringify("true"), headers: {"X-Couch-Persist": "false"} }); T(xhr.status == 200); - + req = CouchDB.request("GET", "/" + db_name + "/_design/test/_show/welcome", {headers: headers}); T(req.responseText == "Welcome benoitc@apache.org"); - + req = CouchDB.request("GET", "/" + db_name + "/_design/test/_show/role", {headers: headers}); T(req.responseText == "test"); */ } - + run_on_modified_server( [{section: "httpd", key: "authentication_handlers", value:"{chttpd_auth, proxy_authentication_handler}, {chttpd_auth, default_authentication_handler}"}, {section: "chttpd_auth", - key: "authentication_db", + key: "authentication_db", value: users_db_name}, {section: "chttpd_auth", - key: "secret", + key: "secret", value: secret}, {section: "chttpd_auth", - key: "x_auth_username", + key: "x_auth_username", value: "X-Auth-CouchDB-UserName"}, {section: "chttpd_auth", - key: "x_auth_roles", + key: "x_auth_roles", value: "X-Auth-CouchDB-Roles"}, {section: "chttpd_auth", - key: "x_auth_token", + key: "x_auth_token", value: "X-Auth-CouchDB-Token"}, {section: "chttpd_auth", - key: "proxy_use_secret", + key: "proxy_use_secret", value: "false"}], TestFun ); -- cgit v1.2.1 From ff6cef663afe4665f85d2e5cfb458d2bd1a16caf Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 16 Mar 2020 16:44:18 +0000 Subject: Throw if an unknown check is passed to jwtf:decode --- src/jwtf/src/jwtf.erl | 19 +++++++++++++++++++ src/jwtf/test/jwtf_tests.erl | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/src/jwtf/src/jwtf.erl b/src/jwtf/src/jwtf.erl index 0bdc0aa1a..b558bdc63 100644 --- a/src/jwtf/src/jwtf.erl +++ b/src/jwtf/src/jwtf.erl @@ -35,6 +35,16 @@ {<<"HS384">>, {hmac, sha384}}, {<<"HS512">>, {hmac, sha512}}]). +-define(CHECKS, [ + alg, + exp, + iat, + iss, + kid, + nbf, + sig, + typ]). + % @doc encode % Encode the JSON Header and Claims using Key and Alg obtained from Header @@ -102,6 +112,7 @@ verification_algorithm(Alg) -> validate(Header0, Payload0, Signature, Checks, KS) -> + validate_checks(Checks), Header1 = props(decode_b64url_json(Header0)), validate_header(Header1, Checks), @@ -112,6 +123,14 @@ validate(Header0, Payload0, Signature, Checks, KS) -> Key = key(Header1, Checks, KS), verify(Alg, Header0, Payload0, Signature, Key). +validate_checks(Checks) when is_list(Checks) -> + UnknownChecks = proplists:get_keys(Checks) -- ?CHECKS, + case UnknownChecks of + [] -> + ok; + UnknownChecks -> + error({unknown_checks, UnknownChecks}) + end. validate_header(Props, Checks) -> validate_typ(Props, Checks), diff --git a/src/jwtf/test/jwtf_tests.erl b/src/jwtf/test/jwtf_tests.erl index 222bb4792..e445e5fc9 100644 --- a/src/jwtf/test/jwtf_tests.erl +++ b/src/jwtf/test/jwtf_tests.erl @@ -178,6 +178,10 @@ malformed_token_test() -> ?assertEqual({error, {bad_request, <<"Malformed token">>}}, jwtf:decode(<<"a.b.c.d">>, [], nil)). +unknown_check_test() -> + ?assertError({unknown_checks, [bar, foo]}, + jwtf:decode(<<"a.b.c">>, [exp, foo, iss, bar, exp], nil)). + %% jwt.io generated hs256_test() -> -- cgit v1.2.1 From f28a1ad439ffd1a2df3a715a2b696c0f4886bddf Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 16 Mar 2020 18:15:20 -0400 Subject: A few minor cleanups in fabric2_db_crud_tests * Made a silly error building the accumulator list: `[Row, Acc]` -> `[Row | Acc]` * Left some debugging code in `list_dbs_tx_too_old` test The test was supposed to setup only 1 failure in the user callback --- src/fabric/test/fabric2_db_crud_tests.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index c0a65ebd8..205994267 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -166,7 +166,7 @@ list_dbs_tx_too_old(_) -> UserFun = fun(Row, Acc) -> fabric2_test_util:tx_too_old_raise_in_user_fun(), - {ok, [Row, Acc]} + {ok, [Row | Acc]} end, % Get get expected output without any transactions timing out @@ -181,7 +181,7 @@ list_dbs_tx_too_old(_) -> ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), % Blow up in user fun - fabric2_test_util:tx_too_old_setup_errors(2, 2), + fabric2_test_util:tx_too_old_setup_errors(1, 0), ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), % Blow up in user fun after emitting one row @@ -209,7 +209,7 @@ list_dbs_info_tx_too_old(_) -> UserFun = fun(Row, Acc) -> fabric2_test_util:tx_too_old_raise_in_user_fun(), - {ok, [Row, Acc]} + {ok, [Row | Acc]} end, % This is the expected return with no tx timeouts -- cgit v1.2.1 From 3bac80479bd57535e5748dd4e6c3bc53d1ef7bb0 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 16 Mar 2020 16:21:33 +0200 Subject: add _conflicts field to docs for views --- src/couch_views/src/couch_views_indexer.erl | 10 ++++++++- src/fabric/src/fabric2_db.erl | 4 +++- test/elixir/test/map_test.exs | 35 +++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index fb732a698..04dbcf815 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -348,7 +348,15 @@ fetch_docs(Db, Changes) -> ChangesWithDocs = lists:map(fun (BodyFuture) -> {Id, RevInfo, Change} = maps:get(BodyFuture, BodyState), Doc = fabric2_fdb:get_doc_body_wait(Db, Id, RevInfo, BodyFuture), - Change#{doc => Doc} + + BranchCount = maps:get(branch_count, RevInfo, 1), + Doc1 = if BranchCount == 1 -> Doc; true -> + RevConflicts = fabric2_fdb:get_all_revs(Db, Id), + {ok, DocWithConflicts} = fabric2_db:apply_open_doc_opts(Doc, + RevConflicts, [conflicts]), + DocWithConflicts + end, + Change#{doc => Doc1} end, erlfdb:wait_for_all(BodyFutures)), % This combines the deleted changes with the changes that contain docs diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 791282f63..4d65f306f 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -126,7 +126,9 @@ validate_dbname/1, %% make_doc/5, - new_revid/2 + new_revid/2, + + apply_open_doc_opts/3 ]). diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs index d2a79449f..84325659d 100644 --- a/test/elixir/test/map_test.exs +++ b/test/elixir/test/map_test.exs @@ -551,6 +551,41 @@ defmodule ViewMapTest do assert ids == ["doc-id-8", "doc-id-7", "doc-id-6"] end + test "_conflict is supported", context do + db_name = context[:db_name] + conflict = %{ + :_id => "doc-id-1", + :value => 10, + :some => "field", + :group => false, + :_rev => "1-7cc2eea421141064893681a1582148d8" + } + ddoc = %{ + _id: "_design/conflicts", + views: %{ + view: %{ + map: """ + function (doc) { + if (!doc._conflicts) { + return; + } + emit(doc._id, doc._conflicts); + } + """ + } + } + } + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => [ddoc]}) + assert resp.status_code == 201 + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => [conflict], :new_edits => false}) + assert resp.status_code == 201 + + url = "/#{db_name}/_design/conflicts/_view/view" + resp = Couch.get(url) + assert get_ids(resp) == ["doc-id-1"] + end + def update_doc_value(db_name, id, value) do resp = Couch.get("/#{db_name}/#{id}") doc = convert(resp.body) -- cgit v1.2.1 From 0cafb178488e77e94b29607d278aae22ffb98187 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 16 Mar 2020 13:50:34 -0400 Subject: Handle transaction cancelled errors in list_dbs_info/3 `list_dbs_info/3` maintains a queue of up to 100 futures which are used to concurrently fetch data. Previously, if the transaction was reset, and the accumulator inside the fold may have had futures from a previous transaction, which have gotten their results yet, they threw a transaction_canceled (1025) error. To fix this, if we're in a read-only transaction, we return the tx object in the opaque db info record. Then, if `erlfdb:wait/1` throws a transaction canceled error, we re-fetch the future from the now restarted transaction. Potentially, the transaction may also time-out while the futures queues is drained after the main range fold has finished already. Handle that case by reseting the transaction and then re-fetching the futures. To avoid an infinite loop we allow up to 2 retries only. This approach is not the most optimal but simpler as it hides the complexity inside the fabric2_fdb module where we already handle these conditions. It means that every 5 or so seconds we might have to refetch less than 100 extra futures from the queue (as some or all may have gotten their results back already). --- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric2_fdb.erl | 96 ++++++++++++++++++++++--------- src/fabric/test/fabric2_db_crud_tests.erl | 88 +++++++++++++++++++++++++++- 3 files changed, 157 insertions(+), 28 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index f526d7b34..a4f68bdf6 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -68,6 +68,7 @@ -define(TRANSACTION_TOO_OLD, 1007). -define(FUTURE_VERSION, 1009). -define(COMMIT_UNKNOWN_RESULT, 1021). +-define(TRANSACTION_CANCELLED, 1025). -define(BINARY_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 8bc87926d..403b5bb53 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -89,6 +89,14 @@ user_acc }). +-record(info_future, { + tx, + db_prefix, + changes_future, + meta_future, + retries = 0 +}). + transactional(Fun) -> do_transaction(Fun, undefined). @@ -386,36 +394,37 @@ get_info_future(Tx, DbPrefix) -> StatsPrefix = erlfdb_tuple:pack({?DB_STATS}, DbPrefix), MetaFuture = erlfdb:get_range_startswith(Tx, StatsPrefix), - {DbPrefix, ChangesFuture, MetaFuture}. + % Save the tx object only if it's read-only as we might retry to get the + % future again after the tx was reset + SaveTx = case erlfdb:get_writes_allowed(Tx) of + true -> undefined; + false -> Tx + end, + #info_future{ + tx = SaveTx, + db_prefix = DbPrefix, + changes_future = ChangesFuture, + meta_future = MetaFuture + }. -get_info_wait({DbPrefix, ChangesFuture, MetaFuture}) -> - RawSeq = case erlfdb:wait(ChangesFuture) of - [] -> - vs_to_seq(fabric2_util:seq_zero_vs()); - [{SeqKey, _}] -> - {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(SeqKey, DbPrefix), - vs_to_seq(SeqVS) - end, - CProp = {update_seq, RawSeq}, - MProps = lists:foldl(fun({K, V}, Acc) -> - case erlfdb_tuple:unpack(K, DbPrefix) of - {?DB_STATS, <<"doc_count">>} -> - [{doc_count, ?bin2uint(V)} | Acc]; - {?DB_STATS, <<"doc_del_count">>} -> - [{doc_del_count, ?bin2uint(V)} | Acc]; - {?DB_STATS, <<"sizes">>, Name} -> - Val = ?bin2uint(V), - {_, {Sizes}} = lists:keyfind(sizes, 1, Acc), - NewSizes = lists:keystore(Name, 1, Sizes, {Name, Val}), - lists:keystore(sizes, 1, Acc, {sizes, {NewSizes}}); - {?DB_STATS, _} -> - Acc - end - end, [{sizes, {[]}}], erlfdb:wait(MetaFuture)), +get_info_wait(#info_future{tx = Tx, retries = Retries} = Future) + when Tx =:= undefined orelse Retries >= 2 -> + get_info_wait_int(Future); - [CProp | MProps]. +get_info_wait(#info_future{tx = Tx, retries = Retries} = Future) -> + try + get_info_wait_int(Future) + catch + error:{erlfdb_error, ?TRANSACTION_CANCELLED} -> + Future1 = get_info_future(Tx, Future#info_future.db_prefix), + get_info_wait(Future1#info_future{retries = Retries + 1}); + error:{erlfdb_error, ?TRANSACTION_TOO_OLD} -> + ok = erlfdb:reset(Tx), + Future1 = get_info_future(Tx, Future#info_future.db_prefix), + get_info_wait(Future1#info_future{retries = Retries + 1}) + end. load_config(#{} = Db) -> @@ -1759,6 +1768,41 @@ with_span(Operation, ExtraTags, Fun) -> end. +get_info_wait_int(#info_future{} = InfoFuture) -> + #info_future{ + db_prefix = DbPrefix, + changes_future = ChangesFuture, + meta_future = MetaFuture + } = InfoFuture, + + RawSeq = case erlfdb:wait(ChangesFuture) of + [] -> + vs_to_seq(fabric2_util:seq_zero_vs()); + [{SeqKey, _}] -> + {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(SeqKey, DbPrefix), + vs_to_seq(SeqVS) + end, + CProp = {update_seq, RawSeq}, + + MProps = lists:foldl(fun({K, V}, Acc) -> + case erlfdb_tuple:unpack(K, DbPrefix) of + {?DB_STATS, <<"doc_count">>} -> + [{doc_count, ?bin2uint(V)} | Acc]; + {?DB_STATS, <<"doc_del_count">>} -> + [{doc_del_count, ?bin2uint(V)} | Acc]; + {?DB_STATS, <<"sizes">>, Name} -> + Val = ?bin2uint(V), + {_, {Sizes}} = lists:keyfind(sizes, 1, Acc), + NewSizes = lists:keystore(Name, 1, Sizes, {Name, Val}), + lists:keystore(sizes, 1, Acc, {sizes, {NewSizes}}); + {?DB_STATS, _} -> + Acc + end + end, [{sizes, {[]}}], erlfdb:wait(MetaFuture)), + + [CProp | MProps]. + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 205994267..a82afb54d 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -18,6 +18,9 @@ -include("fabric2_test.hrl"). +-define(PDICT_RAISE_IN_ERLFDB_WAIT, '$pdict_raise_in_erlfdb_wait'). + + crud_test_() -> { "Test database CRUD operations", @@ -39,7 +42,9 @@ crud_test_() -> ?TDEF_FE(list_dbs_info), ?TDEF_FE(list_dbs_info_partial), ?TDEF_FE(list_dbs_tx_too_old), - ?TDEF_FE(list_dbs_info_tx_too_old) + ?TDEF_FE(list_dbs_info_tx_too_old), + ?TDEF_FE(get_info_wait_retry_on_tx_too_old), + ?TDEF_FE(get_info_wait_retry_on_tx_abort) ] } } @@ -62,7 +67,9 @@ setup() -> cleanup(_) -> - fabric2_test_util:tx_too_old_reset_errors(). + fabric2_test_util:tx_too_old_reset_errors(), + reset_fail_erfdb_wait(), + meck:reset([erlfdb]). create_db(_) -> @@ -256,6 +263,83 @@ list_dbs_info_tx_too_old(_) -> end, DbNames). +get_info_wait_retry_on_tx_too_old(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db} = fabric2_db:open(DbName, []), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + % Simulate being in a list_dbs_info callback + ok = erlfdb:set_option(Tx, disallow_writes), + + InfoF = fabric2_fdb:get_info_future(Tx, DbPrefix), + {info_future, _, _, ChangesF, _, _} = InfoF, + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1007}, 3), + ?assertError({erlfdb_error, 1007}, fabric2_fdb:get_info_wait(InfoF)), + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1007}, 2), + ?assertMatch([{_, _} | _], fabric2_fdb:get_info_wait(InfoF)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])) + end). + + +get_info_wait_retry_on_tx_abort(_)-> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db} = fabric2_db:open(DbName, []), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + % Simulate being in a list_dbs_info callback + ok = erlfdb:set_option(Tx, disallow_writes), + + InfoF = fabric2_fdb:get_info_future(Tx, DbPrefix), + {info_future, _, _, ChangesF, _, _} = InfoF, + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1025}, 3), + ?assertError({erlfdb_error, 1025}, fabric2_fdb:get_info_wait(InfoF)), + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1025}, 2), + ?assertMatch([{_, _} | _], fabric2_fdb:get_info_wait(InfoF)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])) + end). + + +reset_fail_erfdb_wait() -> + erase(?PDICT_RAISE_IN_ERLFDB_WAIT), + meck:expect(erlfdb, wait, fun(F) -> meck:passthrough([F]) end). + + +raise_in_erlfdb_wait(Future, Error, Count) -> + put(?PDICT_RAISE_IN_ERLFDB_WAIT, Count), + meck:expect(erlfdb, wait, fun + (F) when F =:= Future -> + case get(?PDICT_RAISE_IN_ERLFDB_WAIT) of + N when is_integer(N), N > 0 -> + put(?PDICT_RAISE_IN_ERLFDB_WAIT, N - 1), + error(Error); + _ -> + meck:passthrough([F]) + end; + (F) -> + meck:passthrough([F]) + end). + + is_db_info_member(_, []) -> false; -- cgit v1.2.1 From 28ffba4611934cabcf68929abcf32656d60b91c0 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 17 Mar 2020 12:50:20 -0400 Subject: Handle transaction timeouts in _all_docs Previously transactions could time-out, retry and re-emit the same data. Use the same mechanism as the _list_dbs and _changes feeds to fix it. Additional detail in the mailing list discussion: https://lists.apache.org/thread.html/r02cee7045cac4722e1682bb69ba0ec791f5cce025597d0099fb34033%40%3Cdev.couchdb.apache.org%3E --- src/chttpd/src/chttpd_db.erl | 3 +- src/fabric/test/fabric2_doc_fold_tests.erl | 117 ++++++++++++++++++++++------- 2 files changed, 90 insertions(+), 30 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 50a9effdb..b7a149b09 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -867,7 +867,8 @@ all_docs_view(Req, Db, Keys, OP) -> send_all_docs(Db, #mrargs{keys = undefined} = Args, VAcc0) -> - Opts = all_docs_view_opts(Args), + Opts0 = all_docs_view_opts(Args), + Opts = Opts0 ++ [{restart_tx, true}], NS = couch_util:get_value(namespace, Opts), FoldFun = case NS of <<"_all_docs">> -> fold_docs; diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl index 6262a10fe..7c95dd372 100644 --- a/src/fabric/test/fabric2_doc_fold_tests.erl +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -27,25 +27,38 @@ doc_fold_test_() -> "Test document fold operations", { setup, - fun setup/0, - fun cleanup/1, - with([ - ?TDEF(fold_docs_basic), - ?TDEF(fold_docs_rev), - ?TDEF(fold_docs_with_start_key), - ?TDEF(fold_docs_with_end_key), - ?TDEF(fold_docs_with_both_keys_the_same), - ?TDEF(fold_docs_with_different_keys, 10000), - ?TDEF(fold_docs_with_limit), - ?TDEF(fold_docs_with_skip), - ?TDEF(fold_docs_with_skip_and_limit) - ]) + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(fold_docs_basic), + ?TDEF_FE(fold_docs_rev), + ?TDEF_FE(fold_docs_with_start_key), + ?TDEF_FE(fold_docs_with_end_key), + ?TDEF_FE(fold_docs_with_both_keys_the_same), + ?TDEF_FE(fold_docs_with_different_keys, 10000), + ?TDEF_FE(fold_docs_with_limit), + ?TDEF_FE(fold_docs_with_skip), + ?TDEF_FE(fold_docs_with_skip_and_limit), + ?TDEF_FE(fold_docs_tx_too_old) + ] + } } }. +setup_all() -> + test_util:start_couch([fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + setup() -> - Ctx = test_util:start_couch([fabric]), {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), DocIdRevs = lists:map(fun(Val) -> DocId = fabric2_util:uuid(), @@ -56,38 +69,39 @@ setup() -> {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), {DocId, {[{rev, couch_doc:rev_to_str(Rev)}]}} end, lists:seq(1, ?DOC_COUNT)), - {Db, lists:sort(DocIdRevs), Ctx}. + fabric2_test_util:tx_too_old_mock_erlfdb(), + {Db, lists:sort(DocIdRevs)}. -cleanup({Db, _DocIdRevs, Ctx}) -> - ok = fabric2_db:delete(fabric2_db:name(Db), []), - test_util:stop_couch(Ctx). +cleanup({Db, _DocIdRevs}) -> + fabric2_test_util:tx_too_old_reset_errors(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). -fold_docs_basic({Db, DocIdRevs, _}) -> +fold_docs_basic({Db, DocIdRevs}) -> {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_docs(Db, fun fold_fun/2, []), ?assertEqual(DocIdRevs, lists:reverse(Rows)). -fold_docs_rev({Db, DocIdRevs, _}) -> +fold_docs_rev({Db, DocIdRevs}) -> Opts = [{dir, rev}], {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), ?assertEqual(DocIdRevs, Rows). -fold_docs_with_start_key({Db, DocIdRevs, _}) -> +fold_docs_with_start_key({Db, DocIdRevs}) -> {StartKey, _} = hd(DocIdRevs), Opts = [{start_key, StartKey}], {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), ?assertEqual(DocIdRevs, lists:reverse(Rows)), if length(DocIdRevs) == 1 -> ok; true -> - fold_docs_with_start_key({Db, tl(DocIdRevs), nil}) + fold_docs_with_start_key({Db, tl(DocIdRevs)}) end. -fold_docs_with_end_key({Db, DocIdRevs, _}) -> +fold_docs_with_end_key({Db, DocIdRevs}) -> RevDocIdRevs = lists:reverse(DocIdRevs), {EndKey, _} = hd(RevDocIdRevs), Opts = [{end_key, EndKey}], @@ -95,24 +109,24 @@ fold_docs_with_end_key({Db, DocIdRevs, _}) -> fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), ?assertEqual(RevDocIdRevs, Rows), if length(DocIdRevs) == 1 -> ok; true -> - fold_docs_with_end_key({Db, lists:reverse(tl(RevDocIdRevs)), nil}) + fold_docs_with_end_key({Db, lists:reverse(tl(RevDocIdRevs))}) end. -fold_docs_with_both_keys_the_same({Db, DocIdRevs, _}) -> +fold_docs_with_both_keys_the_same({Db, DocIdRevs}) -> lists:foreach(fun({DocId, _} = Row) -> check_all_combos(Db, DocId, DocId, [Row]) end, DocIdRevs). -fold_docs_with_different_keys({Db, DocIdRevs, _}) -> +fold_docs_with_different_keys({Db, DocIdRevs}) -> lists:foreach(fun(_) -> {StartKey, EndKey, Rows} = pick_range(DocIdRevs), check_all_combos(Db, StartKey, EndKey, Rows) end, lists:seq(1, 500)). -fold_docs_with_limit({Db, DocIdRevs, _}) -> +fold_docs_with_limit({Db, DocIdRevs}) -> lists:foreach(fun(Limit) -> Opts1 = [{limit, Limit}], {ok, {?DOC_COUNT, Rows1}} = @@ -129,7 +143,7 @@ fold_docs_with_limit({Db, DocIdRevs, _}) -> end, lists:seq(0, 51)). -fold_docs_with_skip({Db, DocIdRevs, _}) -> +fold_docs_with_skip({Db, DocIdRevs}) -> lists:foreach(fun(Skip) -> Opts1 = [{skip, Skip}], {ok, {?DOC_COUNT, Rows1}} = @@ -151,13 +165,57 @@ fold_docs_with_skip({Db, DocIdRevs, _}) -> end, lists:seq(0, 51)). -fold_docs_with_skip_and_limit({Db, DocIdRevs, _}) -> +fold_docs_with_skip_and_limit({Db, DocIdRevs}) -> lists:foreach(fun(_) -> check_skip_and_limit(Db, [], DocIdRevs), check_skip_and_limit(Db, [{dir, rev}], lists:reverse(DocIdRevs)) end, lists:seq(1, 100)). +fold_docs_tx_too_old({Db, _DocIdRevs}) -> + {ok, Expected} = fabric2_db:fold_docs(Db, fun fold_fun/2, []), + + FoldDocsFun = fun() -> + fabric2_db:fold_docs(Db, fun fold_fun/2, [], [{restart_tx, true}]) + end, + + % Blow up in fold range on the first call + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in fold_range after emitting one row + fabric2_test_util:tx_too_old_setup_errors(0, {1, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in fold_range after emitting 48 rows + fabric2_test_util:tx_too_old_setup_errors(0, {?DOC_COUNT - 2, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in fold_range after emitting 49 rows + fabric2_test_util:tx_too_old_setup_errors(0, {?DOC_COUNT - 1, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun after emitting one row + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun after emitting 48 rows + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 2, 1}, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun after emitting 49 rows + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in in user fun and fold range + fabric2_test_util:tx_too_old_setup_errors(1, {1, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()). + + check_all_combos(Db, StartKey, EndKey, Rows) -> Opts1 = make_opts(fwd, StartKey, EndKey, true), {ok, {?DOC_COUNT, Rows1}} = @@ -280,6 +338,7 @@ fold_fun({meta, Meta}, _Acc) -> Total = fabric2_util:get_value(total, Meta), {ok, {Total, []}}; fold_fun({row, Row}, {Total, Rows}) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), RowId = fabric2_util:get_value(id, Row), RowId = fabric2_util:get_value(key, Row), RowRev = fabric2_util:get_value(value, Row), -- cgit v1.2.1 From 301db96fd7a9ab12f0fd787ee6cfeadf6c212c50 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 17 Mar 2020 16:53:35 -0400 Subject: Reformat and simplify couch_views_indexer_test module * Use the `row/3` helper function in a few more places * Make a `run_query/3` function to shorten all the query calls * A few minor emilio suggesions (whitespace, comma issues, ...) --- src/couch_views/test/couch_views_indexer_test.erl | 304 +++++----------------- 1 file changed, 64 insertions(+), 240 deletions(-) diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 02a12e788..5475cf68e 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -18,6 +18,11 @@ -include_lib("couch_mrview/include/couch_mrview.hrl"). -include_lib("fabric/test/fabric2_test.hrl"). + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). + + indexer_test_() -> { "Test view indexing", @@ -75,18 +80,8 @@ foreach_teardown(Db) -> indexed_empty_db(Db) -> DDoc = create_ddoc(), - {ok, _} = fabric2_db:update_doc(Db, DDoc, []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([], Out). + ?assertEqual({ok, []}, run_query(Db, DDoc, ?MAP_FUN1)). indexed_single_doc(Db) -> @@ -96,20 +91,9 @@ indexed_single_doc(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, _} = fabric2_db:update_doc(Db, Doc1, []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), - ?assertEqual([{row, [ - {id, <<"0">>}, - {key, 0}, - {value, 0} - ]}], Out). + ?assertEqual([row(<<"0">>, 0, 0)], Out). updated_docs_are_reindexed(Db) -> @@ -119,20 +103,9 @@ updated_docs_are_reindexed(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), - {ok, Out1} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([{row, [ - {id, <<"0">>}, - {key, 0}, - {value, 0} - ]}], Out1), + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out1), Doc2 = Doc1#doc{ revs = {Pos, [Rev]}, @@ -140,20 +113,9 @@ updated_docs_are_reindexed(Db) -> }, {ok, _} = fabric2_db:update_doc(Db, Doc2, []), - {ok, Out2} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([{row, [ - {id, <<"0">>}, - {key, 1}, - {value, 1} - ]}], Out2), + {ok, Out2} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 1, 1)], Out2), % Check that our id index is updated properly % as well. @@ -175,20 +137,9 @@ updated_docs_without_changes_are_reindexed(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), - {ok, Out1} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([{row, [ - {id, <<"0">>}, - {key, 0}, - {value, 0} - ]}], Out1), + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out1), Doc2 = Doc1#doc{ revs = {Pos, [Rev]}, @@ -196,20 +147,9 @@ updated_docs_without_changes_are_reindexed(Db) -> }, {ok, _} = fabric2_db:update_doc(Db, Doc2, []), - {ok, Out2} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([{row, [ - {id, <<"0">>}, - {key, 0}, - {value, 0} - ]}], Out2), + {ok, Out2} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out2), % Check fdb directly to make sure we've also % removed the id idx keys properly. @@ -237,16 +177,7 @@ deleted_docs_not_indexed(Db) -> }, {ok, _} = fabric2_db:update_doc(Db, Doc2, []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([], Out). + ?assertEqual({ok, []}, run_query(Db, DDoc, ?MAP_FUN1)). deleted_docs_are_unindexed(Db) -> @@ -256,20 +187,8 @@ deleted_docs_are_unindexed(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), - {ok, Out1} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([{row, [ - {id, <<"0">>}, - {key, 0}, - {value, 0} - ]}], Out1), + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out1), Doc2 = Doc1#doc{ revs = {Pos, [Rev]}, @@ -278,16 +197,7 @@ deleted_docs_are_unindexed(Db) -> }, {ok, _} = fabric2_db:update_doc(Db, Doc2, []), - {ok, Out2} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), - - ?assertEqual([], Out2), + ?assertEqual({ok, []}, run_query(Db, DDoc, ?MAP_FUN1)), % Check fdb directly to make sure we've also % removed the id idx keys properly. @@ -307,27 +217,12 @@ multipe_docs_with_same_key(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, _} = fabric2_db:update_docs(Db, [Doc1, Doc2], []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), ?assertEqual([ - {row, [ - {id, <<"0">>}, - {key, 1}, - {value, 1} - ]}, - {row, [ - {id, <<"1">>}, - {key, 1}, - {value, 1} - ]} - ], Out). + row(<<"0">>, 1, 1), + row(<<"1">>, 1, 1) + ], Out). multipe_keys_from_same_doc(Db) -> @@ -337,27 +232,12 @@ multipe_keys_from_same_doc(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, _} = fabric2_db:update_doc(Db, Doc, []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), ?assertEqual([ - {row, [ - {id, <<"0">>}, - {key, 1}, - {value, 1} - ]}, - {row, [ - {id, <<"0">>}, - {key, <<"0">>}, - {value, <<"0">>} - ]} - ], Out). + row(<<"0">>, 1, 1), + row(<<"0">>, <<"0">>, <<"0">>) + ], Out). multipe_identical_keys_from_same_doc(Db) -> @@ -367,27 +247,12 @@ multipe_identical_keys_from_same_doc(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, _} = fabric2_db:update_doc(Db, Doc, []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), ?assertEqual([ - {row, [ - {id, <<"0">>}, - {key, 1}, - {value, 1} - ]}, - {row, [ - {id, <<"0">>}, - {key, 1}, - {value, 2} - ]} - ], Out). + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2) + ], Out). fewer_multipe_identical_keys_from_same_doc(Db) -> @@ -400,20 +265,13 @@ fewer_multipe_identical_keys_from_same_doc(Db) -> {ok, _} = fabric2_db:update_doc(Db, DDoc, []), {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []), - {ok, Out1} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), ?assertEqual([ - row(<<"0">>, 1, 1), - row(<<"0">>, 1, 2), - row(<<"0">>, 1, 3) - ], Out1), + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2), + row(<<"0">>, 1, 3) + ], Out1), Doc1 = #doc{ id = <<"0">>, @@ -422,19 +280,12 @@ fewer_multipe_identical_keys_from_same_doc(Db) -> }, {ok, _} = fabric2_db:update_doc(Db, Doc1, []), - {ok, Out2} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out2} = run_query(Db, DDoc, ?MAP_FUN1), ?assertEqual([ - row(<<"0">>, 1, 1), - row(<<"0">>, 1, 2) - ], Out2). + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2) + ], Out2). handle_size_key_limits(Db) -> @@ -451,18 +302,9 @@ handle_size_key_limits(Db) -> {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), - ?assertEqual([ - row(<<"1">>, 2, 2) - ], Out), + ?assertEqual([row(<<"1">>, 2, 2)], Out), {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), Doc2 = Doc#doc { @@ -470,14 +312,7 @@ handle_size_key_limits(Db) -> }, {ok, _} = fabric2_db:update_doc(Db, Doc2), - {ok, Out1} = couch_views:query( - Db, - DDoc, - <<"map_fun1">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), ?assertEqual([ row(<<"1">>, 2, 2), @@ -499,14 +334,7 @@ handle_size_value_limits(Db) -> {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), - {ok, Out} = couch_views:query( - Db, - DDoc, - <<"map_fun2">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN2), ?assertEqual([ row(<<"1">>, 2, 2), @@ -515,21 +343,13 @@ handle_size_value_limits(Db) -> row(<<"2">>, 23, 3) ], Out), - {ok, Doc} = fabric2_db:open_doc(Db, <<"1">>), - Doc2 = Doc#doc { - body = {[{<<"val">>,1}]} + Doc2 = Doc#doc{ + body = {[{<<"val">>, 1}]} }, {ok, _} = fabric2_db:update_doc(Db, Doc2), - {ok, Out1} = couch_views:query( - Db, - DDoc, - <<"map_fun2">>, - fun fold_fun/2, - [], - #mrargs{} - ), + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN2), ?assertEqual([ row(<<"2">>, 3, 3), @@ -576,10 +396,10 @@ create_ddoc(simple) -> couch_doc:from_json_obj({[ {<<"_id">>, <<"_design/bar">>}, {<<"views">>, {[ - {<<"map_fun1">>, {[ + {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} ]}}, - {<<"map_fun2">>, {[ + {?MAP_FUN2, {[ {<<"map">>, <<"function(doc) {}">>} ]}} ]}} @@ -589,13 +409,13 @@ create_ddoc(multi_emit_different) -> couch_doc:from_json_obj({[ {<<"_id">>, <<"_design/bar">>}, {<<"views">>, {[ - {<<"map_fun1">>, {[ + {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) { " "emit(doc._id, doc._id); " "emit(doc.val, doc.val); " "}">>} ]}}, - {<<"map_fun2">>, {[ + {?MAP_FUN2, {[ {<<"map">>, <<"function(doc) {}">>} ]}} ]}} @@ -605,7 +425,7 @@ create_ddoc(multi_emit_same) -> couch_doc:from_json_obj({[ {<<"_id">>, <<"_design/bar">>}, {<<"views">>, {[ - {<<"map_fun1">>, {[ + {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) { " "emit(doc.val, doc.val * 2); " "emit(doc.val, doc.val); " @@ -614,7 +434,7 @@ create_ddoc(multi_emit_same) -> "}" "}">>} ]}}, - {<<"map_fun2">>, {[ + {?MAP_FUN2, {[ {<<"map">>, <<"function(doc) {}">>} ]}} ]}} @@ -624,7 +444,7 @@ create_ddoc(multi_emit_key_limit) -> couch_doc:from_json_obj({[ {<<"_id">>, <<"_design/bar">>}, {<<"views">>, {[ - {<<"map_fun1">>, {[ + {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) { " "if (doc.val === 1) { " "emit('a very long string to be limited', doc.val);" @@ -633,7 +453,7 @@ create_ddoc(multi_emit_key_limit) -> "}" "}">>} ]}}, - {<<"map_fun2">>, {[ + {?MAP_FUN2, {[ {<<"map">>, <<"function(doc) { " "emit(doc.val + 20, doc.val);" "if (doc.val === 1) { " @@ -656,3 +476,7 @@ doc(Id, Val) -> {<<"_id">>, list_to_binary(integer_to_list(Id))}, {<<"val">>, Val} ]}). + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). -- cgit v1.2.1 From 41be92bb94bc2b5bdc4070d3f48a6e27179ca154 Mon Sep 17 00:00:00 2001 From: Russell Branca Date: Wed, 18 Mar 2020 13:02:50 -0700 Subject: Add additional get_doc spans --- src/fabric/src/fabric2_fdb.erl | 43 +++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 403b5bb53..6f9373936 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -503,24 +503,30 @@ incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) -> get_all_revs(#{} = Db, DocId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = ensure_current(Db), - - Prefix = erlfdb_tuple:pack({?DB_REVS, DocId}, DbPrefix), - Options = [{streaming_mode, want_all}], - Future = erlfdb:get_range_startswith(Tx, Prefix, Options), - lists:map(fun({K, V}) -> - Key = erlfdb_tuple:unpack(K, DbPrefix), - Val = erlfdb_tuple:unpack(V), - fdb_to_revinfo(Key, Val) - end, erlfdb:wait(Future)). + DbName = maps:get(name, Db, undefined), + with_span('db.get_all_revs', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Prefix = erlfdb_tuple:pack({?DB_REVS, DocId}, DbPrefix), + Options = [{streaming_mode, want_all}], + Future = erlfdb:get_range_startswith(Tx, Prefix, Options), + lists:map(fun({K, V}) -> + Key = erlfdb_tuple:unpack(K, DbPrefix), + Val = erlfdb_tuple:unpack(V), + fdb_to_revinfo(Key, Val) + end, erlfdb:wait(Future)) + end). get_winning_revs(Db, DocId, NumRevs) -> - Future = get_winning_revs_future(Db, DocId, NumRevs), - get_winning_revs_wait(Db, Future). + DbName = maps:get(name, Db, undefined), + with_span('db.get_winning_revs', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> + Future = get_winning_revs_future(Db, DocId, NumRevs), + get_winning_revs_wait(Db, Future) + end). get_winning_revs_future(#{} = Db, DocId, NumRevs) -> @@ -566,8 +572,11 @@ get_non_deleted_rev(#{} = Db, DocId, RevId) -> get_doc_body(Db, DocId, RevInfo) -> - Future = get_doc_body_future(Db, DocId, RevInfo), - get_doc_body_wait(Db, DocId, RevInfo, Future). + DbName = maps:get(name, Db, undefined), + with_span('db.get_doc_body', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> + Future = get_doc_body_future(Db, DocId, RevInfo), + get_doc_body_wait(Db, DocId, RevInfo, Future) + end). get_doc_body_future(#{} = Db, DocId, RevInfo) -> -- cgit v1.2.1 From 032934f3764c9e1ae2f8f359cf039349bf56cf86 Mon Sep 17 00:00:00 2001 From: Alexander Trauzzi Date: Thu, 19 Mar 2020 05:43:47 -0500 Subject: Feature - Add JWT support (#2648) Add JWT Authentication Handler Co-authored-by: Robert Newson Co-authored-by: Joan Touzet --- rel/overlay/etc/default.ini | 10 +++++++++ src/chttpd/src/chttpd_auth.erl | 4 ++++ src/couch/src/couch_httpd_auth.erl | 26 ++++++++++++++++++++++ test/elixir/test/config/test-config.ini | 2 +- test/elixir/test/jwtauth_test.exs | 39 +++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/jwtauth_test.exs diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 2676ef530..82a56590f 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -134,10 +134,20 @@ max_db_number_for_dbs_info_req = 100 ; authentication_handlers = {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} ; uncomment the next line to enable proxy authentication ; authentication_handlers = {chttpd_auth, proxy_authentication_handler}, {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} +; uncomment the next line to enable JWT authentication +; authentication_handlers = {chttpd_auth, jwt_authentication_handler}, {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} ; prevent non-admins from accessing /_all_dbs ; admin_only_all_dbs = true +;[jwt_auth] +; Symmetric secret to be used when checking JWT token signatures +; secret = +; List of claims to validate +; required_claims = exp +; List of algorithms to accept during checks +; allowed_algorithms = HS256 + [couch_peruser] ; If enabled, couch_peruser ensures that a private per-user database ; exists for each document in _users. These databases are writable only diff --git a/src/chttpd/src/chttpd_auth.erl b/src/chttpd/src/chttpd_auth.erl index 607f09a8a..1b6d16eb3 100644 --- a/src/chttpd/src/chttpd_auth.erl +++ b/src/chttpd/src/chttpd_auth.erl @@ -18,6 +18,7 @@ -export([default_authentication_handler/1]). -export([cookie_authentication_handler/1]). -export([proxy_authentication_handler/1]). +-export([jwt_authentication_handler/1]). -export([party_mode_handler/1]). -export([handle_session_req/1]). @@ -51,6 +52,9 @@ cookie_authentication_handler(Req) -> proxy_authentication_handler(Req) -> couch_httpd_auth:proxy_authentication_handler(Req). +jwt_authentication_handler(Req) -> + couch_httpd_auth:jwt_authentication_handler(Req). + party_mode_handler(#httpd{method='POST', path_parts=[<<"_session">>]} = Req) -> % See #1947 - users should always be able to attempt a login Req#httpd{user_ctx=#user_ctx{}}; diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 43ecda958..7c55f390e 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -31,6 +31,8 @@ -export([cookie_auth_cookie/4, cookie_scheme/1]). -export([maybe_value/3]). +-export([jwt_authentication_handler/1]). + -import(couch_httpd, [header_value/2, send_json/2,send_json/4, send_method_not_allowed/2]). -compile({no_auto_import,[integer_to_binary/1, integer_to_binary/2]}). @@ -186,6 +188,30 @@ proxy_auth_user(Req) -> end end. +jwt_authentication_handler(Req) -> + case {config:get("jwt_auth", "secret"), header_value(Req, "Authorization")} of + {Secret, "Bearer " ++ Jwt} when Secret /= undefined -> + RequiredClaims = get_configured_claims(), + AllowedAlgorithms = get_configured_algorithms(), + case jwtf:decode(?l2b(Jwt), [{alg, AllowedAlgorithms} | RequiredClaims], fun(_,_) -> Secret end) of + {ok, {Claims}} -> + case lists:keyfind(<<"sub">>, 1, Claims) of + false -> throw({unauthorized, <<"Token missing sub claim.">>}); + {_, User} -> Req#httpd{user_ctx=#user_ctx{ + name=User + }} + end; + {error, Reason} -> + throw({unauthorized, Reason}) + end; + {_, _} -> Req + end. + +get_configured_algorithms() -> + re:split(config:get("jwt_auth", "allowed_algorithms", "HS256"), "\s*,\s*", [{return, binary}]). + +get_configured_claims() -> + lists:usort(re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}])). cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). diff --git a/test/elixir/test/config/test-config.ini b/test/elixir/test/config/test-config.ini index 72a13a707..1980139d1 100644 --- a/test/elixir/test/config/test-config.ini +++ b/test/elixir/test/config/test-config.ini @@ -1,2 +1,2 @@ [chttpd] -authentication_handlers = {chttpd_auth, proxy_authentication_handler}, {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} +authentication_handlers = {chttpd_auth, jwt_authentication_handler}, {chttpd_auth, proxy_authentication_handler}, {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs new file mode 100644 index 000000000..2e78ee989 --- /dev/null +++ b/test/elixir/test/jwtauth_test.exs @@ -0,0 +1,39 @@ +defmodule JwtAuthTest do + use CouchTestCase + + @moduletag :authentication + + test "jwt auth with secret", _context do + + secret = "zxczxc12zxczxc12" + + server_config = [ + %{ + :section => "jwt_auth", + :key => "secret", + :value => secret + } + ] + + run_on_modified_server(server_config, fn -> + test_fun() + end) + end + + def test_fun() do + resp = Couch.get("/_session", + headers: [authorization: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjb3VjaEBhcGFjaGUub3JnIn0.KYHmGXWj0HNHzZCjfOfsIfZWdguEBSn31jUdDUA9118"] + ) + + assert resp.body["userCtx"]["name"] == "couch@apache.org" + assert resp.body["info"]["authenticated"] == "jwt" + end + + test "jwt auth without secret", _context do + + resp = Couch.get("/_session") + + assert resp.body["userCtx"]["name"] == "adm" + assert resp.body["info"]["authenticated"] == "default" + end +end -- cgit v1.2.1 From cb3c7723cc877890c810f4e2b4d10326bdb3e72c Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Thu, 19 Mar 2020 21:27:30 +0100 Subject: Port design_docs tests from js to elixir (#2641) --- test/elixir/README.md | 2 +- test/elixir/test/design_docs_test.exs | 479 ++++++++++++++++++++++++++++++++++ test/javascript/tests/design_docs.js | 2 + 3 files changed, 482 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/design_docs_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 2806cfb7a..53b56a2af 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -42,7 +42,7 @@ X means done, - means partially - [X] Port conflicts.js - [X] Port cookie_auth.js - [X] Port copy_doc.js - - [ ] Port design_docs.js + - [X] Port design_docs.js - [X] Port design_docs_query.js - [X] Port design_options.js - [X] Port design_paths.js diff --git a/test/elixir/test/design_docs_test.exs b/test/elixir/test/design_docs_test.exs new file mode 100644 index 000000000..258f5f72f --- /dev/null +++ b/test/elixir/test/design_docs_test.exs @@ -0,0 +1,479 @@ +defmodule DesignDocsTest do + use CouchTestCase + + @moduletag :design_docs + + @design_doc %{ + _id: "_design/test", + language: "javascript", + autoupdate: false, + whatever: %{ + stringzone: "exports.string = 'plankton';", + commonjs: %{ + whynot: """ + exports.test = require('../stringzone'); + exports.foo = require('whatever/stringzone'); + """, + upper: """ + exports.testing = require('./whynot').test.string.toUpperCase()+ + module.id+require('./whynot').foo.string + """, + circular_one: "require('./circular_two'); exports.name = 'One';", + circular_two: "require('./circular_one'); exports.name = 'Two';" + }, + # paths relative to parent + idtest1: %{ + a: %{ + b: %{d: "module.exports = require('../c/e').id;"}, + c: %{e: "exports.id = module.id;"} + } + }, + # multiple paths relative to parent + idtest2: %{ + a: %{ + b: %{d: "module.exports = require('../../a/c/e').id;"}, + c: %{e: "exports.id = module.id;"} + } + }, + # paths relative to module + idtest3: %{ + a: %{ + b: "module.exports = require('./c/d').id;", + c: %{ + d: "module.exports = require('./e');", + e: "exports.id = module.id;" + } + } + }, + # paths relative to module and parent + idtest4: %{ + a: %{ + b: "module.exports = require('../a/./c/d').id;", + c: %{ + d: "module.exports = require('./e');", + e: "exports.id = module.id;" + } + } + }, + # paths relative to root + idtest5: %{ + a: "module.exports = require('whatever/idtest5/b').id;", + b: "exports.id = module.id;" + } + }, + views: %{ + all_docs_twice: %{ + map: """ + function(doc) { + emit(doc.integer, null); + emit(doc.integer, null); + } + """ + }, + no_docs: %{ + map: """ + function(doc) {} + """ + }, + single_doc: %{ + map: """ + function(doc) { + if (doc._id === "1") { + emit(1, null); + } + } + """ + }, + summate: %{ + map: """ + function(doc) { + emit(doc.integer, doc.integer); + } + """, + reduce: """ + function(keys, values) { + return sum(values); + } + """ + }, + summate2: %{ + map: """ + function(doc) { + emit(doc.integer, doc.integer); + } + """, + reduce: """ + function(keys, values) { + return sum(values); + } + """ + }, + huge_src_and_results: %{ + map: """ + function(doc) { + if (doc._id === "1") { + emit("#{String.duplicate("a", 16)}", null); + } + } + """, + reduce: """ + function(keys, values) { + return "#{String.duplicate("a", 16)}"; + } + """ + }, + lib: %{ + baz: "exports.baz = 'bam';", + foo: %{ + foo: "exports.foo = 'bar';", + boom: "exports.boom = 'ok';", + zoom: "exports.zoom = 'yeah';" + } + }, + commonjs: %{ + map: """ + function(doc) { + emit(null, require('views/lib/foo/boom').boom); + } + """ + } + }, + shows: %{ + simple: """ + function() { + return 'ok'; + } + """, + requirey: """ + function() { + var lib = require('whatever/commonjs/upper'); + return lib.testing; + } + """, + circular: """ + function() { + var lib = require('whatever/commonjs/upper'); + return JSON.stringify(this); + } + """, + circular_require: """ + function() { + return require('whatever/commonjs/circular_one').name; + } + """, + idtest1: """ + function() { + return require('whatever/idtest1/a/b/d'); + } + """, + idtest2: """ + function() { + return require('whatever/idtest2/a/b/d'); + } + """, + idtest3: """ + function() { + return require('whatever/idtest3/a/b'); + } + """, + idtest4: """ + function() { + return require('whatever/idtest4/a/b'); + } + """, + idtest5: """ + function() { + return require('whatever/idtest5/a'); + } + """ + } + } + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + {:ok, _} = create_doc(db_name, @design_doc) + {:ok, _} = create_doc(db_name, %{}) + {:ok, [db_name: db_name]} + end + + test "consistent _rev for design docs", context do + resp = Couch.get("/#{context[:db_name]}/_design/test") + assert resp.status_code == 200 + first_db_rev = resp.body["_rev"] + + second_db_name = random_db_name() + create_db(second_db_name) + {:ok, resp2} = create_doc(second_db_name, @design_doc) + assert first_db_rev == resp2.body["rev"] + end + + test "commonjs require", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design/test/_show/requirey") + assert resp.status_code == 200 + assert resp.body == "PLANKTONwhatever/commonjs/upperplankton" + + resp = Couch.get("/#{db_name}/_design/test/_show/circular") + assert resp.status_code == 200 + + result = + resp.body + |> IO.iodata_to_binary() + |> :jiffy.decode([:return_maps]) + + assert result["language"] == "javascript" + end + + test "circular commonjs dependencies", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}/_design/test/_show/circular_require") + assert resp.status_code == 200 + assert resp.body == "One" + end + + test "module id values are as expected", context do + db_name = context[:db_name] + + check_id_value(db_name, "idtest1", "whatever/idtest1/a/c/e") + check_id_value(db_name, "idtest2", "whatever/idtest2/a/c/e") + check_id_value(db_name, "idtest3", "whatever/idtest3/a/c/e") + check_id_value(db_name, "idtest4", "whatever/idtest4/a/c/e") + check_id_value(db_name, "idtest5", "whatever/idtest5/b") + end + + defp check_id_value(db_name, id, expected) do + resp = Couch.get("/#{db_name}/_design/test/_show/#{id}") + assert resp.status_code == 200 + assert resp.body == expected + end + + @tag :with_db + test "test that we get correct design doc info back", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @design_doc) + + resp = Couch.get("/#{db_name}/_design/test/_info") + prev_view_sig = resp.body["view_index"]["signature"] + prev_view_size = resp.body["view_index"]["sizes"]["file"] + + num_docs = 500 + bulk_save(db_name, make_docs(1..(num_docs + 1))) + + Couch.get("/#{db_name}/_design/test/_view/summate", query: [stale: "ok"]) + + for _x <- 0..1 do + resp = Couch.get("/#{db_name}/_design/test/_info") + assert resp.body["name"] == "test" + assert resp.body["view_index"]["sizes"]["file"] == prev_view_size + assert resp.body["view_index"]["compact_running"] == false + assert resp.body["view_index"]["signature"] == prev_view_sig + end + end + + test "commonjs in map functions", context do + db_name = context[:db_name] + + resp = Couch.get("/#{db_name}/_design/test/_view/commonjs", query: [limit: 1]) + assert resp.status_code == 200 + assert Enum.at(resp.body["rows"], 0)["value"] == "ok" + end + + test "_all_docs view returns correctly with keys", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_all_docs", + query: [startkey: :jiffy.encode("_design"), endkey: :jiffy.encode("_design0")] + ) + + assert length(resp.body["rows"]) == 1 + end + + @tag :with_db + test "all_docs_twice", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @design_doc) + + num_docs = 500 + bulk_save(db_name, make_docs(1..(2 * num_docs))) + + for _x <- 0..1 do + test_all_docs_twice(db_name, num_docs) + end + end + + defp test_all_docs_twice(db_name, num_docs) do + resp = Couch.get("/#{db_name}/_design/test/_view/all_docs_twice") + assert resp.status_code == 200 + rows = resp.body["rows"] + + for x <- 0..num_docs do + assert Map.get(Enum.at(rows, 2 * x), "key") == x + 1 + assert Map.get(Enum.at(rows, 2 * x + 1), "key") == x + 1 + end + + resp = Couch.get("/#{db_name}/_design/test/_view/no_docs") + assert resp.body["total_rows"] == 0 + + resp = Couch.get("/#{db_name}/_design/test/_view/single_doc") + assert resp.body["total_rows"] == 1 + end + + @tag :with_db + test "language not specified, Javascript is implied", context do + db_name = context[:db_name] + bulk_save(db_name, make_docs(1..2)) + + design_doc_2 = %{ + _id: "_design/test2", + views: %{ + single_doc: %{ + map: """ + function(doc) { + if (doc._id === "1") { + emit(1, null); + } + } + """ + } + } + } + + {:ok, _} = create_doc(db_name, design_doc_2) + + resp = Couch.get("/#{db_name}/_design/test2/_view/single_doc") + assert resp.status_code == 200 + assert length(resp.body["rows"]) == 1 + end + + @tag :with_db + test "startkey and endkey", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @design_doc) + + num_docs = 500 + bulk_save(db_name, make_docs(1..(2 * num_docs))) + + resp = Couch.get("/#{db_name}/_design/test/_view/summate") + assert Enum.at(resp.body["rows"], 0)["value"] == summate(num_docs * 2) + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [startkey: 4, endkey: 4] + ) + + assert Enum.at(resp.body["rows"], 0)["value"] == 4 + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [startkey: 4, endkey: 5] + ) + + assert Enum.at(resp.body["rows"], 0)["value"] == 9 + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [startkey: 4, endkey: 6] + ) + + assert Enum.at(resp.body["rows"], 0)["value"] == 15 + + # test start_key and end_key aliases + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [start_key: 4, end_key: 6] + ) + + assert Enum.at(resp.body["rows"], 0)["value"] == 15 + + # Verify that a shared index (view def is an exact copy of "summate") + # does not confuse the reduce stage + resp = + Couch.get("/#{db_name}/_design/test/_view/summate2", + query: [startkey: 4, endkey: 6] + ) + + assert Enum.at(resp.body["rows"], 0)["value"] == 15 + + for x <- 0..Integer.floor_div(num_docs, 60) do + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [startkey: x * 30, endkey: num_docs - x * 30] + ) + + assert Enum.at(resp.body["rows"], 0)["value"] == + summate(num_docs - x * 30) - summate(x * 30 - 1) + end + end + + defp summate(n) do + (n + 1) * (n / 2) + end + + @tag :with_db + test "design doc deletion", context do + db_name = context[:db_name] + {:ok, resp} = create_doc(db_name, @design_doc) + + del_resp = + Couch.delete("/#{db_name}/#{resp.body["id"]}", query: [rev: resp.body["rev"]]) + + assert del_resp.status_code == 200 + + resp = Couch.get("/#{db_name}/#{resp.body["id"]}") + assert resp.status_code == 404 + + resp = Couch.get("/#{db_name}/_design/test/_view/no_docs") + assert resp.status_code == 404 + end + + @tag :with_db + test "validate doc update", context do + db_name = context[:db_name] + + # COUCHDB-1227 - if a design document is deleted, by adding a "_deleted" + # field with the boolean value true, its validate_doc_update functions + # should no longer have effect. + + ddoc = %{ + _id: "_design/test", + language: "javascript", + validate_doc_update: """ + function(newDoc, oldDoc, userCtx, secObj) { + if (newDoc.value % 2 == 0) { + throw({forbidden: "dont like even numbers"}); + } + return true; + } + """ + } + + {:ok, resp_ddoc} = create_doc(db_name, ddoc) + + resp = + Couch.post("/#{db_name}", + body: %{_id: "doc1", value: 4} + ) + + assert resp.status_code == 403 + assert resp.body["reason"] == "dont like even numbers" + + ddoc_resp = Couch.get("/#{db_name}/#{resp_ddoc.body["id"]}") + + ddoc = + ddoc_resp.body + |> Map.put("_deleted", true) + + del_resp = + Couch.post("/#{db_name}", + body: ddoc + ) + + assert del_resp.status_code in [201, 202] + + {:ok, _} = create_doc(db_name, %{_id: "doc1", value: 4}) + end +end diff --git a/test/javascript/tests/design_docs.js b/test/javascript/tests/design_docs.js index 55e592a18..dd2d0e307 100644 --- a/test/javascript/tests/design_docs.js +++ b/test/javascript/tests/design_docs.js @@ -10,7 +10,9 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.design_docs = function(debug) { + return console.log('done in test/elixir/test/design_docs.exs'); var db_name = get_random_db_name(); var db_name_a = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 996587d943853a681bc73d94db6648aa7f57d271 Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Fri, 20 Mar 2020 12:30:27 +0100 Subject: Upgrade Credo to 1.3.1 --- mix.exs | 2 +- mix.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mix.exs b/mix.exs index bd78c30d5..2e4a7aa85 100644 --- a/mix.exs +++ b/mix.exs @@ -68,7 +68,7 @@ defmodule CouchDBTest.Mixfile do {:jiffy, path: Path.expand("src/jiffy", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, - {:credo, "~> 1.3.0", only: [:dev, :test, :integration], runtime: false} + {:credo, "~> 1.3.1", only: [:dev, :test, :integration], runtime: false} ] end diff --git a/mix.lock b/mix.lock index e7460a3d6..29151a77e 100644 --- a/mix.lock +++ b/mix.lock @@ -1,13 +1,13 @@ %{ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm", "805abd97539caf89ec6d4732c91e62ba9da0cda51ac462380bbd28ee697a8c42"}, - "credo": {:hex, :credo, "1.3.0", "37699fefdbe1b0480a5a6b73f259207e9cd7ad5e492277e22c2179bcb226a67b", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8036b9226e4440d3ebce3931505e407b8d59fc95975f574c26337812e8de2a86"}, + "credo": {:hex, :credo, "1.3.1", "082e8d9268a489becf8e7aa75671a7b9088b1277cd6c1b13f40a55554b3f5126", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "0da816ed52fa520b9ea0e5d18a0d3ca269e0bd410b1174d88d8abd94be6cce3c"}, "excoveralls": {:hex, :excoveralls, "0.12.1", "a553c59f6850d0aff3770e4729515762ba7c8e41eedde03208182a8dc9d0ce07", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "5c1f717066a299b1b732249e736c5da96bb4120d1e55dc2e6f442d251e18a812"}, "hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "e0100f8ef7d1124222c11ad362c857d3df7cb5f4204054f9f0f4a728666591fc"}, "httpotion": {:hex, :httpotion, "3.1.3", "fdaf1e16b9318dcb722de57e75ac368c93d4c6e3c9125f93e960f953a750fb77", [:mix], [{:ibrowse, "== 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: false]}], "hexpm", "e420172ef697a0f1f4dc40f89a319d5a3aad90ec51fa424f08c115f04192ae43"}, "ibrowse": {:hex, :ibrowse, "4.4.0", "2d923325efe0d2cb09b9c6a047b2835a5eda69d8a47ed6ff8bc03628b764e991", [:rebar3], [], "hexpm"}, "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "4bdd305eb64e18b0273864920695cb18d7a2021f31a11b9c5fbcd9a253f936e2"}, - "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fdf843bca858203ae1de16da2ee206f53416bbda5dc8c9e78f43243de4bc3afe"}, + "jason": {:hex, :jason, "1.2.0", "10043418c42d2493d0ee212d3fddd25d7ffe484380afad769a0a38795938e448", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "116747dbe057794c3a3e4e143b7c8390b29f634e16c78a7f59ba75bfa6852e7f"}, "jiffy": {:hex, :jiffy, "0.15.2", "de266c390111fd4ea28b9302f0bc3d7472468f3b8e0aceabfbefa26d08cd73b7", [:rebar3], [], "hexpm"}, "junit_formatter": {:hex, :junit_formatter, "3.0.0", "13950d944dbd295da7d8cc4798b8faee808a8bb9b637c88069954eac078ac9da", [:mix], [], "hexpm", "d77b7b9a1601185b18dfe7682b27c46d5d12721f12fdc75180a6fc573b4e64b1"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, -- cgit v1.2.1 From 11dee528b46f8c4619bdffeffc27536b9d9c2fcf Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Fri, 20 Mar 2020 15:14:24 +0100 Subject: Ignore unused string variable inside utf8 test case --- test/elixir/test/utf8_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/elixir/test/utf8_test.exs b/test/elixir/test/utf8_test.exs index ad78080ae..0e4d8b875 100644 --- a/test/elixir/test/utf8_test.exs +++ b/test/elixir/test/utf8_test.exs @@ -29,7 +29,7 @@ defmodule UTF8Test do texts |> Enum.with_index() - |> Enum.each(fn {string, index} -> + |> Enum.each(fn {_, index} -> resp = Couch.get("/#{db_name}/#{index}") %{"_id" => id, "text" => text} = resp.body assert resp.status_code == 200 -- cgit v1.2.1 From 3248ebcccf0a0895780d0241445c98de72789d67 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Sat, 21 Mar 2020 01:17:14 +0100 Subject: Port http, method_override and jsonp tests into elixir test suite (#2646) --- test/elixir/README.md | 6 +- test/elixir/lib/couch/db_test.ex | 4 +- test/elixir/test/http_test.exs | 81 +++++++++++++++++++++ test/elixir/test/jsonp_test.exs | 116 ++++++++++++++++++++++++++++++ test/elixir/test/method_override_test.exs | 55 ++++++++++++++ test/javascript/tests/changes.js | 6 +- test/javascript/tests/http.js | 3 +- test/javascript/tests/jsonp.js | 2 + test/javascript/tests/method_override.js | 2 + 9 files changed, 267 insertions(+), 8 deletions(-) create mode 100644 test/elixir/test/http_test.exs create mode 100644 test/elixir/test/jsonp_test.exs create mode 100644 test/elixir/test/method_override_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 53b56a2af..b2ffbc047 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -50,15 +50,15 @@ X means done, - means partially - [X] Port etags_head.js - [ ] ~~Port etags_views.js~~ (skipped in js test suite) - [X] Port form_submit.js - - [ ] Port http.js + - [X] Port http.js - [X] Port invalid_docids.js - - [ ] Port jsonp.js + - [X] Port jsonp.js - [X] Port large_docs.js - [ ] Port list_views.js - [X] Port lorem_b64.txt - [X] Port lorem.txt - [X] Port lots_of_docs.js - - [ ] Port method_override.js + - [X] Port method_override.js - [X] Port multiple_rows.js - [X] Port proxyauth.js - [ ] Port purge.js diff --git a/test/elixir/lib/couch/db_test.ex b/test/elixir/lib/couch/db_test.ex index 0a091c667..47a067652 100644 --- a/test/elixir/lib/couch/db_test.ex +++ b/test/elixir/lib/couch/db_test.ex @@ -399,8 +399,8 @@ defmodule Couch.DBTest do Enum.each(setting.nodes, fn node_value -> node = elem(node_value, 0) value = elem(node_value, 1) - - if value == ~s(""\\n) do + + if value == ~s(""\\n) or value == "" or value == nil do resp = Couch.delete( "/_node/#{node}/_config/#{setting.section}/#{setting.key}", diff --git a/test/elixir/test/http_test.exs b/test/elixir/test/http_test.exs new file mode 100644 index 000000000..09d743060 --- /dev/null +++ b/test/elixir/test/http_test.exs @@ -0,0 +1,81 @@ +defmodule HttpTest do + use CouchTestCase + + @moduletag :http + + @tag :with_db + test "location header", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/test", body: %{}) + db_url = Couch.process_url("/" <> db_name) + assert resp.headers.hdrs["location"] == db_url <> "/test" + end + + @tag :with_db + test "location header should include X-Forwarded-Host", context do + db_name = context[:db_name] + + resp = + Couch.put("/#{db_name}/test2", + body: %{}, + headers: ["X-Forwarded-Host": "mysite.com"] + ) + + assert resp.headers.hdrs["location"] == "http://mysite.com/#{db_name}/test2" + end + + @tag :with_db + test "location header should include custom header", context do + db_name = context[:db_name] + + server_config = [ + %{ + :section => "httpd", + :key => "x_forwarded_host", + :value => "X-Host" + } + ] + + run_on_modified_server(server_config, fn -> + resp = + Couch.put("/#{db_name}/test3", + body: %{}, + headers: ["X-Host": "mysite2.com"] + ) + + assert resp.headers.hdrs["location"] == "http://mysite2.com/#{db_name}/test3" + end) + end + + @tag :with_db + test "COUCHDB-708: newlines document names", context do + db_name = context[:db_name] + + resp = + Couch.put("/#{db_name}/docid%0A/attachment.txt", + body: %{}, + headers: ["Content-Type": "text/plain;charset=utf-8"] + ) + + db_url = Couch.process_url("/" <> db_name) + assert resp.headers.hdrs["location"] == db_url <> "/docid%0A/attachment.txt" + + resp = + Couch.put("/#{db_name}/docidtest%0A", + body: %{}, + headers: ["Content-Type": "text/plain;charset=utf-8"] + ) + + db_url = Couch.process_url("/" <> db_name) + assert resp.headers.hdrs["location"] == db_url <> "/docidtest%0A" + + resp = + Couch.post("/#{db_name}/", + body: %{_id: "docidtestpost%0A"}, + headers: ["Content-Type": "application/json"] + ) + + db_url = Couch.process_url("/" <> db_name) + assert resp.headers.hdrs["location"] == db_url <> "/docidtestpost%250A" + end +end diff --git a/test/elixir/test/jsonp_test.exs b/test/elixir/test/jsonp_test.exs new file mode 100644 index 000000000..3fdc2ba5f --- /dev/null +++ b/test/elixir/test/jsonp_test.exs @@ -0,0 +1,116 @@ +defmodule JsonpTest do + use CouchTestCase + + @moduletag :jsonp + + @tag :with_db + test "jsonp not configured callbacks", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, %{_id: "0", a: 0, b: 0}) + + resp = Couch.get("/#{db_name}/0?callback=jsonp_no_chunk") + assert resp.status_code == 200 + assert resp.headers.hdrs["content-type"] == "application/json" + end + + @tag :with_db + test "jsonp unchunked callbacks", context do + db_name = context[:db_name] + + server_config = [ + %{ + :section => "httpd", + :key => "allow_jsonp", + :value => "true" + } + ] + + {:ok, create_resp} = create_doc(db_name, %{_id: "0", a: 0, b: 0}) + + run_on_modified_server(server_config, fn -> + resp = Couch.get("/#{db_name}/0?callback=jsonp_no_chunk") + + assert resp.status_code == 200 + assert resp.headers.hdrs["content-type"] == "application/javascript" + + {callback_fun, callback_param} = parse_callback(resp.body) + + assert callback_fun == "jsonp_no_chunk" + assert create_resp.body["id"] == callback_param["_id"] + assert create_resp.body["rev"] == callback_param["_rev"] + + resp = Couch.get("/#{db_name}/0?callback=jsonp_no_chunk\"") + assert resp.status_code == 400 + end) + end + + @tag :with_db + test "jsonp chunked callbacks", context do + db_name = context[:db_name] + + server_config = [ + %{ + :section => "httpd", + :key => "allow_jsonp", + :value => "true" + } + ] + + design_doc = %{ + _id: "_design/test", + language: "javascript", + views: %{ + all_docs: %{map: "function(doc) {if(doc.a) emit(null, doc.a);}"} + } + } + + {:ok, _} = create_doc(db_name, design_doc) + {:ok, _} = create_doc(db_name, %{_id: "0", a: 0, b: 0}) + {:ok, _} = create_doc(db_name, %{_id: "1", a: 1, b: 1}) + + run_on_modified_server(server_config, fn -> + resp = Couch.get("/#{db_name}/_design/test/_view/all_docs?callback=jsonp_chunk") + assert resp.status_code == 200 + assert resp.headers.hdrs["content-type"] == "application/javascript" + + {callback_fun, callback_param} = parse_callback(resp.body) + + assert callback_fun == "jsonp_chunk" + assert callback_param["total_rows"] == 1 + + resp = Couch.get("/#{db_name}/_design/test/_view/all_docs?callback=jsonp_chunk'") + assert resp.status_code == 400 + + resp = Couch.get("/#{db_name}/_changes?callback=jsonp_chunk") + assert resp.status_code == 200 + assert resp.headers.hdrs["content-type"] == "application/javascript" + + {callback_fun, callback_param} = parse_callback(resp.body) + assert callback_fun == "jsonp_chunk" + assert length(callback_param["results"]) == 3 + + end) + end + + defp parse_callback(msg) do + captures = Regex.scan(~r/\/\* CouchDB \*\/(\w+)\((.*)\)/s, msg) + + callback_fun = + captures + |> Enum.map(fn p -> Enum.at(p, 1) end) + |> Enum.at(0) + + param = + captures + |> Enum.map(fn p -> Enum.at(p, 2) end) + |> Enum.filter(fn p -> String.trim(p) != "" end) + |> Enum.map(fn p -> + p + |> IO.iodata_to_binary() + |> :jiffy.decode([:return_maps]) + end) + |> Enum.at(0) + + {callback_fun, param} + end +end diff --git a/test/elixir/test/method_override_test.exs b/test/elixir/test/method_override_test.exs new file mode 100644 index 000000000..c67fe3966 --- /dev/null +++ b/test/elixir/test/method_override_test.exs @@ -0,0 +1,55 @@ +defmodule MethodOverrideTest do + use CouchTestCase + + @moduletag :http + + @moduledoc """ + Allow broken HTTP clients to fake a full method vocabulary with an + X-HTTP-METHOD-OVERRIDE header + """ + + @tag :with_db + test "method override PUT", context do + db_name = context[:db_name] + + resp = + Couch.post("/#{db_name}/fnord", + body: %{bob: "connie"}, + headers: ["X-HTTP-Method-Override": "PUT"] + ) + + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/fnord") + assert resp.body["bob"] == "connie" + end + + @tag :with_db + test "method override DELETE", context do + db_name = context[:db_name] + {:ok, resp} = create_doc(db_name, %{_id: "fnord", bob: "connie"}) + + resp = + Couch.post("/#{db_name}/fnord?rev=#{resp.body["rev"]}", + headers: ["X-HTTP-Method-Override": "DELETE"] + ) + + assert resp.status_code == 200 + + resp = Couch.get("/#{db_name}/fnord") + assert resp.status_code == 404 + end + + @tag :with_db + test "Method Override is ignored when original Method isn't POST", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/fnord2", + body: %{bob: "connie"}, + headers: ["X-HTTP-Method-Override": "PUT"] + ) + + assert resp.status_code == 404 + end +end diff --git a/test/javascript/tests/changes.js b/test/javascript/tests/changes.js index d98e37cc8..338c1571c 100644 --- a/test/javascript/tests/changes.js +++ b/test/javascript/tests/changes.js @@ -9,15 +9,17 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; + function jsonp(obj) { - return console.log('done in test/elixir/test/changes_test.exs and changes_async_test.exs'); T(jsonp_flag == 0); T(obj.results.length == 1 && obj.last_seq == 1, "jsonp"); jsonp_flag = 1; } couchTests.changes = function(debug) { + return console.log('done in test/elixir/test/changes_test.exs and changes_async_test.exs'); + var db; if (debug) debugger; diff --git a/test/javascript/tests/http.js b/test/javascript/tests/http.js index c78177897..bc35921e1 100644 --- a/test/javascript/tests/http.js +++ b/test/javascript/tests/http.js @@ -9,8 +9,9 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.http = function(debug) { + return console.log('done in test/elixir/test/http_test.exs'); var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); diff --git a/test/javascript/tests/jsonp.js b/test/javascript/tests/jsonp.js index 1013c9eba..f34fdc9c5 100644 --- a/test/javascript/tests/jsonp.js +++ b/test/javascript/tests/jsonp.js @@ -9,6 +9,7 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; // Verify callbacks ran var jsonp_flag = 0; @@ -28,6 +29,7 @@ function jsonp_chunk(doc) { // Do some jsonp tests. couchTests.jsonp = function(debug) { + return console.log('done in test/elixir/test/jsonp_test.exs'); var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); db.createDb(); diff --git a/test/javascript/tests/method_override.js b/test/javascript/tests/method_override.js index fa3e5e88f..94d798f96 100644 --- a/test/javascript/tests/method_override.js +++ b/test/javascript/tests/method_override.js @@ -11,7 +11,9 @@ // the License. // Allow broken HTTP clients to fake a full method vocabulary with an X-HTTP-METHOD-OVERRIDE header +couchTests.elixir = true; couchTests.method_override = function(debug) { + return console.log('done in test/elixir/test/method_override_test.exs'); var result = JSON.parse(CouchDB.request("GET", "/").responseText); T(result.couchdb == "Welcome"); -- cgit v1.2.1 From e520294c7ee3f55c3e8cc7d528ff37a5a93c800f Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 20 Mar 2020 23:14:48 -0400 Subject: Fix database re-creation Previously it was possible for a database to be re-created while a `Db` handle was open and the `Db` handle would continue operating on the new db without any error. To avoid that situation ensure instance UUID is explicitly checked during open and reopen calls. This includes checking it after the metadata is loaded in `fabric2_fdb:open/2` and when fetching the handle from the cache. Also, create a `{uuid, UUID}` option to allow specifying a particular instance UUID when opening a database. If that instance doesn't exist raise a `database_does_not_exist` error. --- src/fabric/src/fabric2_db.erl | 3 ++- src/fabric/src/fabric2_fdb.erl | 29 +++++++++++++++++++++++++--- src/fabric/src/fabric2_server.erl | 12 +++++++----- src/fabric/test/fabric2_db_crud_tests.erl | 32 +++++++++++++++++++++++++++++++ src/fabric/test/fabric2_db_misc_tests.erl | 5 +++-- 5 files changed, 70 insertions(+), 11 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 4d65f306f..129dea2d7 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -178,7 +178,8 @@ create(DbName, Options) -> open(DbName, Options) -> - case fabric2_server:fetch(DbName) of + UUID = fabric2_util:get_value(uuid, Options), + case fabric2_server:fetch(DbName, UUID) of #{} = Db -> Db1 = maybe_set_user_ctx(Db, Options), {ok, require_member_check(Db1)}; diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 6f9373936..5c72a1726 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -255,6 +255,9 @@ open(#{} = Db0, Options) -> UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), Options1 = lists:keydelete(user_ctx, 1, Options), + UUID = fabric2_util:get_value(uuid, Options1), + Options2 = lists:keydelete(uuid, 1, Options1), + Db2 = Db1#{ db_prefix => DbPrefix, db_version => DbVersion, @@ -271,16 +274,28 @@ open(#{} = Db0, Options) -> before_doc_update => undefined, after_doc_read => undefined, - db_options => Options1 + db_options => Options2 }, Db3 = load_config(Db2), + case {UUID, Db3} of + {undefined, _} -> ok; + {<<_/binary>>, #{uuid := UUID}} -> ok; + {<<_/binary>>, #{uuid := _}} -> erlang:error(database_does_not_exist) + end, + load_validate_doc_funs(Db3). -refresh(#{tx := undefined, name := DbName, md_version := OldVer} = Db) -> - case fabric2_server:fetch(DbName) of +refresh(#{tx := undefined} = Db) -> + #{ + name := DbName, + uuid := UUID, + md_version := OldVer + } = Db, + + case fabric2_server:fetch(DbName, UUID) of % Relying on these assumptions about the `md_version` value: % - It is bumped every time `db_version` is bumped % - Is a versionstamp, so we can check which one is newer @@ -304,12 +319,20 @@ reopen(#{} = OldDb) -> #{ tx := Tx, name := DbName, + uuid := UUID, db_options := Options, user_ctx := UserCtx, security_fun := SecurityFun } = OldDb, Options1 = lists:keystore(user_ctx, 1, Options, {user_ctx, UserCtx}), NewDb = open(init_db(Tx, DbName, Options1), Options1), + + % Check if database was re-created + case maps:get(uuid, NewDb) of + UUID -> ok; + _OtherUUID -> error(database_does_not_exist) + end, + NewDb#{security_fun := SecurityFun}. diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index b1c38ef55..1de60f798 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -17,7 +17,7 @@ -export([ start_link/0, - fetch/1, + fetch/2, store/1, remove/1, fdb_directory/0, @@ -48,10 +48,12 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). -fetch(DbName) when is_binary(DbName) -> - case ets:lookup(?MODULE, DbName) of - [{DbName, #{} = Db}] -> Db; - [] -> undefined +fetch(DbName, UUID) when is_binary(DbName) -> + case {UUID, ets:lookup(?MODULE, DbName)} of + {_, []} -> undefined; + {undefined, [{DbName, #{} = Db}]} -> Db; + {<<_/binary>>, [{DbName, #{uuid := UUID} = Db}]} -> Db; + {<<_/binary>>, [{DbName, #{} = _Db}]} -> undefined end. diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index a82afb54d..f409389d6 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -36,6 +36,7 @@ crud_test_() -> ?TDEF_FE(create_db), ?TDEF_FE(open_db), ?TDEF_FE(delete_db), + ?TDEF_FE(recreate_db), ?TDEF_FE(list_dbs), ?TDEF_FE(list_dbs_user_fun), ?TDEF_FE(list_dbs_user_fun_partial), @@ -107,6 +108,37 @@ delete_db(_) -> ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])). +recreate_db(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db1} = fabric2_db:open(DbName, []), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db1)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db2} = fabric2_db:open(DbName, []), + + CurOpts = [{uuid, fabric2_db:get_uuid(Db2)}], + ?assertMatch({ok, #{}}, fabric2_db:open(DbName, CurOpts)), + + % Remove from cache to force it to open through fabric2_fdb:open + fabric2_server:remove(DbName), + ?assertMatch({ok, #{}}, fabric2_db:open(DbName, CurOpts)), + + BadOpts = [{uuid, fabric2_util:uuid()}], + ?assertError(database_does_not_exist, fabric2_db:open(DbName, BadOpts)), + + % Remove from cache to force it to open through fabric2_fdb:open + fabric2_server:remove(DbName), + ?assertError(database_does_not_exist, fabric2_db:open(DbName, BadOpts)). + + list_dbs(_) -> DbName = ?tempdb(), AllDbs1 = fabric2_db:list_dbs(), diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 42a63e2f9..fe0ae9faa 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -302,7 +302,8 @@ metadata_bump({DbName, _, _}) -> {ok, _} = fabric2_db:get_db_info(Db), % Check that db handle in the cache got the new metadata version - ?assertMatch(#{md_version := NewMDVersion}, fabric2_server:fetch(DbName)). + CachedDb = fabric2_server:fetch(DbName, undefined), + ?assertMatch(#{md_version := NewMDVersion}, CachedDb). db_version_bump({DbName, _, _}) -> @@ -326,7 +327,7 @@ db_version_bump({DbName, _, _}) -> {ok, _} = fabric2_db:get_db_info(Db), % After previous operation, the cache should have been cleared - ?assertMatch(undefined, fabric2_server:fetch(DbName)), + ?assertMatch(undefined, fabric2_server:fetch(DbName, undefined)), % Call open again and check that we have the latest db version {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), -- cgit v1.2.1 From a008c6d0b5da269d519bd2a6ef3aa6e9aa22abc2 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sat, 21 Mar 2020 19:31:28 -0400 Subject: Add a fabric doc fold test when db is re-created Check that on a transaction restarts `database_does_not_exist` error is thrown properly if database was re-created. Also we forgot to properly unload the mocked erlfdb module in `tx_too_old_mock_erlfdb/0` so we make sure to do that, otherwise it has a chance of messing up subsequent tests. --- src/fabric/test/fabric2_doc_fold_tests.erl | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl index 7c95dd372..0695b450b 100644 --- a/src/fabric/test/fabric2_doc_fold_tests.erl +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -43,7 +43,8 @@ doc_fold_test_() -> ?TDEF_FE(fold_docs_with_limit), ?TDEF_FE(fold_docs_with_skip), ?TDEF_FE(fold_docs_with_skip_and_limit), - ?TDEF_FE(fold_docs_tx_too_old) + ?TDEF_FE(fold_docs_tx_too_old), + ?TDEF_FE(fold_docs_db_recreated) ] } } @@ -69,12 +70,14 @@ setup() -> {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), {DocId, {[{rev, couch_doc:rev_to_str(Rev)}]}} end, lists:seq(1, ?DOC_COUNT)), + meck:new(erlfdb, [passthrough]), fabric2_test_util:tx_too_old_mock_erlfdb(), {Db, lists:sort(DocIdRevs)}. cleanup({Db, _DocIdRevs}) -> fabric2_test_util:tx_too_old_reset_errors(), + meck:unload(), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -216,6 +219,34 @@ fold_docs_tx_too_old({Db, _DocIdRevs}) -> ?assertEqual({ok, Expected}, FoldDocsFun()). +fold_docs_db_recreated({Db, _DocIdRevs}) -> + DbName = fabric2_db:name(Db), + + RecreateDb = fun() -> + ok = fabric2_db:delete(DbName, []), + {ok, _} = fabric2_db:create(DbName, []) + end, + + FoldFun = fun + ({meta, _}, Acc) -> + {ok, Acc}; + ({row, Row}, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + % After meta and one row emitted, recreate the db + case length(Acc) =:= 1 of + true -> RecreateDb(); + false -> ok + end, + {ok, [Row | Acc]}; + (complete, Acc) -> + {ok, Acc} + end, + % Blow up in user fun after emitting two rows + fabric2_test_util:tx_too_old_setup_errors({2, 1}, 0), + ?assertError(database_does_not_exist, fabric2_db:fold_docs(Db, FoldFun, + [], [{restart_tx, true}])). + + check_all_combos(Db, StartKey, EndKey, Rows) -> Opts1 = make_opts(fwd, StartKey, EndKey, true), {ok, {?DOC_COUNT, Rows1}} = -- cgit v1.2.1 From f6a4f8ee787a9418c527f952b785ed293be806af Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Sat, 21 Mar 2020 19:50:28 +0100 Subject: Fix ported to elixir tag --- test/javascript/tests/design_docs_query.js | 2 +- test/javascript/tests/design_options.js | 2 +- test/javascript/tests/design_paths.js | 2 +- test/javascript/tests/erlang_views.js | 2 +- test/javascript/tests/form_submit.js | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/javascript/tests/design_docs_query.js b/test/javascript/tests/design_docs_query.js index 2aefe49b4..7b4b612c0 100644 --- a/test/javascript/tests/design_docs_query.js +++ b/test/javascript/tests/design_docs_query.js @@ -9,7 +9,7 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.design_docs_query = function(debug) { return console.log('done in test/elixir/test/design_docs_query_test.exs'); diff --git a/test/javascript/tests/design_options.js b/test/javascript/tests/design_options.js index d3f8594d4..aaab39e5b 100644 --- a/test/javascript/tests/design_options.js +++ b/test/javascript/tests/design_options.js @@ -9,7 +9,7 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.design_options = function(debug) { return console.log('done in test/elixir/test/design_options.exs'); var db_name = get_random_db_name(); diff --git a/test/javascript/tests/design_paths.js b/test/javascript/tests/design_paths.js index b85426acf..e1d64ea77 100644 --- a/test/javascript/tests/design_paths.js +++ b/test/javascript/tests/design_paths.js @@ -9,7 +9,7 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.design_paths = function(debug) { return console.log('done in test/elixir/test/design_paths.exs'); if (debug) debugger; diff --git a/test/javascript/tests/erlang_views.js b/test/javascript/tests/erlang_views.js index 9b15e1043..140925f58 100644 --- a/test/javascript/tests/erlang_views.js +++ b/test/javascript/tests/erlang_views.js @@ -9,7 +9,7 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.erlang_views = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); diff --git a/test/javascript/tests/form_submit.js b/test/javascript/tests/form_submit.js index 617686543..f8dd2baf0 100644 --- a/test/javascript/tests/form_submit.js +++ b/test/javascript/tests/form_submit.js @@ -11,6 +11,7 @@ // the License. // Do some basic tests. +couchTests.elixir = true; couchTests.form_submit = function(debug) { return console.log('done in test/elixir/test/form_summit_test.exs'); -- cgit v1.2.1 From 3fba9ff5bea733b4c722d2bb4f924afe416f6e35 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sat, 21 Mar 2020 20:00:17 -0400 Subject: Fix couch_jobs after recent db re-creation commit The e520294c7ee3f55c3e8cc7d528ff37a5a93c800f commit inadvertently changed the `fabric2_fdb:refresh/1` head matching to accept db instances with no names. `couch_jobs` uses those but they are not cached in fabric2_server. So here we return to the previous matching rule where contexts without names don't get refreshed from the cache. --- src/fabric/src/fabric2_fdb.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 5c72a1726..f5f7bec83 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -288,9 +288,10 @@ open(#{} = Db0, Options) -> load_validate_doc_funs(Db3). -refresh(#{tx := undefined} = Db) -> +% Match on `name` in the function head since some non-fabric2 db +% objects might not have names and so they don't get cached +refresh(#{tx := undefined, name := DbName} = Db) -> #{ - name := DbName, uuid := UUID, md_version := OldVer } = Db, -- cgit v1.2.1 From 8074a32f173b683a902d5c5f92115d434b3be262 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 19 Mar 2020 10:54:40 +0000 Subject: no need to deduplicate this list --- src/couch/src/couch_httpd_auth.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 7c55f390e..6b85a02cc 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -211,7 +211,7 @@ get_configured_algorithms() -> re:split(config:get("jwt_auth", "allowed_algorithms", "HS256"), "\s*,\s*", [{return, binary}]). get_configured_claims() -> - lists:usort(re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}])). + re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}]). cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). -- cgit v1.2.1 From bb86d0478412e525e810abbb4cecbdd32c6d3e11 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 19 Mar 2020 16:03:16 +0000 Subject: generate JWT token ourselves --- mix.exs | 2 ++ test/elixir/test/jwtauth_test.exs | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mix.exs b/mix.exs index 2e4a7aa85..bab22f12f 100644 --- a/mix.exs +++ b/mix.exs @@ -65,7 +65,9 @@ defmodule CouchDBTest.Mixfile do {:junit_formatter, "~> 3.0", only: [:dev, :test, :integration]}, {:httpotion, ">= 3.1.3", only: [:dev, :test, :integration], runtime: false}, {:excoveralls, "~> 0.12", only: :test}, + {:b64url, path: Path.expand("src/b64url", __DIR__)}, {:jiffy, path: Path.expand("src/jiffy", __DIR__)}, + {:jwtf, path: Path.expand("src/jwtf", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, {:credo, "~> 1.3.1", only: [:dev, :test, :integration], runtime: false} diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index 2e78ee989..9f2074ccf 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -3,7 +3,7 @@ defmodule JwtAuthTest do @moduletag :authentication - test "jwt auth with secret", _context do + test "jwt auth with HS256 secret", _context do secret = "zxczxc12zxczxc12" @@ -16,13 +16,14 @@ defmodule JwtAuthTest do ] run_on_modified_server(server_config, fn -> - test_fun() + test_fun("HS256", secret) end) end - def test_fun() do + def test_fun(alg, key) do + {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}]}, key) resp = Couch.get("/_session", - headers: [authorization: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJjb3VjaEBhcGFjaGUub3JnIn0.KYHmGXWj0HNHzZCjfOfsIfZWdguEBSn31jUdDUA9118"] + headers: [authorization: "Bearer #{token}"] ) assert resp.body["userCtx"]["name"] == "couch@apache.org" -- cgit v1.2.1 From 5c77ef0b9cf3be98db3da692527e4c8726b2fc78 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 19 Mar 2020 16:16:05 +0000 Subject: test all variants of jwt hmac --- test/elixir/test/jwtauth_test.exs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index 9f2074ccf..aee14b3c5 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -3,7 +3,7 @@ defmodule JwtAuthTest do @moduletag :authentication - test "jwt auth with HS256 secret", _context do + test "jwt auth with HMAC secret", _context do secret = "zxczxc12zxczxc12" @@ -12,12 +12,17 @@ defmodule JwtAuthTest do :section => "jwt_auth", :key => "secret", :value => secret + }, + %{ + :section => "jwt_auth", + :key => "allowed_algorithms", + :value => "HS256, HS384, HS512" } ] - run_on_modified_server(server_config, fn -> - test_fun("HS256", secret) - end) + run_on_modified_server(server_config, fn -> test_fun("HS256", secret) end) + run_on_modified_server(server_config, fn -> test_fun("HS384", secret) end) + run_on_modified_server(server_config, fn -> test_fun("HS512", secret) end) end def test_fun(alg, key) do -- cgit v1.2.1 From db21eda6f423e34944344ead346d63a4350918d4 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 19 Mar 2020 19:06:23 +0000 Subject: support RSA for JWT auth --- rel/overlay/etc/default.ini | 16 ++++++++++++-- src/couch/src/couch_httpd_auth.erl | 21 ++++++++++++++---- test/elixir/test/jwtauth_test.exs | 44 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 82a56590f..25daa4813 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -141,12 +141,24 @@ max_db_number_for_dbs_info_req = 100 ; admin_only_all_dbs = true ;[jwt_auth] -; Symmetric secret to be used when checking JWT token signatures -; secret = ; List of claims to validate ; required_claims = exp ; List of algorithms to accept during checks ; allowed_algorithms = HS256 +; +; [jwt_keys] +; Configure at least one key here if using the JWT auth handler. +; If your JWT tokens do not include a "kid" attribute, use "_default" +; as the config key, otherwise use the kid as the config key. +; Examples +; _default = aGVsbG8= +; foo = aGVsbG8= +; The config values can represent symmetric and asymmetrics keys. +; For symmetrics keys, the value is base64 encoded; +; _default = aGVsbG8= # base64-encoded form of "hello" +; For asymmetric keys, the value is the PEM encoding of the public +; key with newlines replaced with the escape sequence \n. +; foo = -----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEDsr0lz/Dg3luarb+Kua0Wcj9WrfR23os\nwHzakglb8GhWRDn+oZT0Bt/26sX8uB4/ij9PEOLHPo+IHBtX4ELFFVr5GTzlqcJe\nyctaTDd1OOAPXYuc67EWtGZ3pDAzztRs\n-----END PUBLIC KEY-----\n\n [couch_peruser] ; If enabled, couch_peruser ensures that a private per-user database diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 6b85a02cc..62fc694e1 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -189,11 +189,11 @@ proxy_auth_user(Req) -> end. jwt_authentication_handler(Req) -> - case {config:get("jwt_auth", "secret"), header_value(Req, "Authorization")} of - {Secret, "Bearer " ++ Jwt} when Secret /= undefined -> + case header_value(Req, "Authorization") of + "Bearer " ++ Jwt -> RequiredClaims = get_configured_claims(), AllowedAlgorithms = get_configured_algorithms(), - case jwtf:decode(?l2b(Jwt), [{alg, AllowedAlgorithms} | RequiredClaims], fun(_,_) -> Secret end) of + case jwtf:decode(?l2b(Jwt), [{alg, AllowedAlgorithms} | RequiredClaims], fun jwt_keystore/2) of {ok, {Claims}} -> case lists:keyfind(<<"sub">>, 1, Claims) of false -> throw({unauthorized, <<"Token missing sub claim.">>}); @@ -204,7 +204,7 @@ jwt_authentication_handler(Req) -> {error, Reason} -> throw({unauthorized, Reason}) end; - {_, _} -> Req + _ -> Req end. get_configured_algorithms() -> @@ -213,6 +213,19 @@ get_configured_algorithms() -> get_configured_claims() -> re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}]). +jwt_keystore(Alg, undefined) -> + jwt_keystore(Alg, "_default"); +jwt_keystore(Alg, KID) -> + Key = config:get("jwt_keys", KID), + case jwtf:verification_algorithm(Alg) of + {hmac, _} -> + Key; + {public_key, _} -> + BinKey = ?l2b(string:replace(Key, "\\n", "\n", all)), + [PEMEntry] = public_key:pem_decode(BinKey), + public_key:pem_entry_decode(PEMEntry) + end. + cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index aee14b3c5..6b3da9a71 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -9,8 +9,8 @@ defmodule JwtAuthTest do server_config = [ %{ - :section => "jwt_auth", - :key => "secret", + :section => "jwt_keys", + :key => "_default", :value => secret }, %{ @@ -25,8 +25,48 @@ defmodule JwtAuthTest do run_on_modified_server(server_config, fn -> test_fun("HS512", secret) end) end + defmodule RSA do + require Record + Record.defrecord :public, :RSAPublicKey, + Record.extract(:RSAPublicKey, from_lib: "public_key/include/public_key.hrl") + Record.defrecord :private, :RSAPrivateKey, + Record.extract(:RSAPrivateKey, from_lib: "public_key/include/public_key.hrl") + end + + test "jwt auth with RSA secret", _context do + require JwtAuthTest.RSA + + private_key = :public_key.generate_key({:rsa, 2048, 17}) + public_key = RSA.public( + modulus: RSA.private(private_key, :modulus), + publicExponent: RSA.private(private_key, :publicExponent)) + + public_pem = :public_key.pem_encode( + [:public_key.pem_entry_encode( + :SubjectPublicKeyInfo, public_key)]) + public_pem = String.replace(public_pem, "\n", "\\n") + + server_config = [ + %{ + :section => "jwt_keys", + :key => "_default", + :value => public_pem + }, + %{ + :section => "jwt_auth", + :key => "allowed_algorithms", + :value => "RS256, RS384, RS512" + } + ] + + run_on_modified_server(server_config, fn -> test_fun("RS256", private_key) end) + run_on_modified_server(server_config, fn -> test_fun("RS384", private_key) end) + run_on_modified_server(server_config, fn -> test_fun("RS512", private_key) end) + end + def test_fun(alg, key) do {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}]}, key) + resp = Couch.get("/_session", headers: [authorization: "Bearer #{token}"] ) -- cgit v1.2.1 From 623ae9acbed5f60244cde30fc969e0ffb2792abf Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 20 Mar 2020 11:19:44 +0000 Subject: add EC tests --- test/elixir/test/jwtauth_test.exs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index 6b3da9a71..a8f9c50e0 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -64,6 +64,44 @@ defmodule JwtAuthTest do run_on_modified_server(server_config, fn -> test_fun("RS512", private_key) end) end + defmodule EC do + require Record + Record.defrecord :point, :ECPoint, + Record.extract(:ECPoint, from_lib: "public_key/include/public_key.hrl") + Record.defrecord :private, :ECPrivateKey, + Record.extract(:ECPrivateKey, from_lib: "public_key/include/public_key.hrl") + end + + test "jwt auth with EC secret", _context do + require JwtAuthTest.EC + + private_key = :public_key.generate_key({:namedCurve, :secp384r1}) + point = EC.point(point: EC.private(private_key, :publicKey)) + public_key = {point, EC.private(private_key, :parameters)} + + public_pem = :public_key.pem_encode( + [:public_key.pem_entry_encode( + :SubjectPublicKeyInfo, public_key)]) + public_pem = String.replace(public_pem, "\n", "\\n") + + server_config = [ + %{ + :section => "jwt_keys", + :key => "_default", + :value => public_pem + }, + %{ + :section => "jwt_auth", + :key => "allowed_algorithms", + :value => "ES256, ES384, ES512" + } + ] + + run_on_modified_server(server_config, fn -> test_fun("ES256", private_key) end) + run_on_modified_server(server_config, fn -> test_fun("ES384", private_key) end) + run_on_modified_server(server_config, fn -> test_fun("ES512", private_key) end) + end + def test_fun(alg, key) do {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}]}, key) -- cgit v1.2.1 From c1e7c5ac2c754a342fb5fd7dc6473c1630ce422c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 20 Mar 2020 12:32:16 +0000 Subject: Create in-memory cache of JWT keys Decoding RSA and EC keys is a little expensive and we don't want to do it for every single request. Add a cache that is invalidated on config change. --- src/couch/src/couch_httpd_auth.erl | 15 +---- src/jwtf/src/jwtf.app.src | 2 + src/jwtf/src/jwtf_app.erl | 28 +++++++++ src/jwtf/src/jwtf_keystore.erl | 118 +++++++++++++++++++++++++++++++++++++ src/jwtf/src/jwtf_sup.erl | 38 ++++++++++++ 5 files changed, 187 insertions(+), 14 deletions(-) create mode 100644 src/jwtf/src/jwtf_app.erl create mode 100644 src/jwtf/src/jwtf_keystore.erl create mode 100644 src/jwtf/src/jwtf_sup.erl diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 62fc694e1..86d583c56 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -193,7 +193,7 @@ jwt_authentication_handler(Req) -> "Bearer " ++ Jwt -> RequiredClaims = get_configured_claims(), AllowedAlgorithms = get_configured_algorithms(), - case jwtf:decode(?l2b(Jwt), [{alg, AllowedAlgorithms} | RequiredClaims], fun jwt_keystore/2) of + case jwtf:decode(?l2b(Jwt), [{alg, AllowedAlgorithms} | RequiredClaims], fun jwtf_keystore:get/2) of {ok, {Claims}} -> case lists:keyfind(<<"sub">>, 1, Claims) of false -> throw({unauthorized, <<"Token missing sub claim.">>}); @@ -213,19 +213,6 @@ get_configured_algorithms() -> get_configured_claims() -> re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}]). -jwt_keystore(Alg, undefined) -> - jwt_keystore(Alg, "_default"); -jwt_keystore(Alg, KID) -> - Key = config:get("jwt_keys", KID), - case jwtf:verification_algorithm(Alg) of - {hmac, _} -> - Key; - {public_key, _} -> - BinKey = ?l2b(string:replace(Key, "\\n", "\n", all)), - [PEMEntry] = public_key:pem_decode(BinKey), - public_key:pem_entry_decode(PEMEntry) - end. - cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). diff --git a/src/jwtf/src/jwtf.app.src b/src/jwtf/src/jwtf.app.src index 304bb9e0a..24081bf6f 100644 --- a/src/jwtf/src/jwtf.app.src +++ b/src/jwtf/src/jwtf.app.src @@ -18,10 +18,12 @@ kernel, stdlib, b64url, + config, crypto, jiffy, public_key ]}, + {mod, {jwtf_app, []}}, {env,[]}, {modules, []}, {maintainers, []}, diff --git a/src/jwtf/src/jwtf_app.erl b/src/jwtf/src/jwtf_app.erl new file mode 100644 index 000000000..bd708e2a3 --- /dev/null +++ b/src/jwtf/src/jwtf_app.erl @@ -0,0 +1,28 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_app). + +-behaviour(application). + +%% Application callbacks +-export([start/2, stop/1]). + +%% =================================================================== +%% Application callbacks +%% =================================================================== + +start(_StartType, _StartArgs) -> + jwtf_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl new file mode 100644 index 000000000..82df54e5b --- /dev/null +++ b/src/jwtf/src/jwtf_keystore.erl @@ -0,0 +1,118 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_keystore). +-behaviour(gen_server). +-behaviour(config_listener). + +% public api. +-export([ + get/2, + start_link/0 +]). + +% gen_server api. +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + code_change/3, terminate/2]). + +% config_listener api +-export([handle_config_change/5, handle_config_terminate/3]). + +% public functions + +get(Alg, undefined) -> + get(Alg, "_default"); + +get(Alg, KID) when is_binary(KID) -> + get(Alg, binary_to_list(KID)); + +get(Alg, KID) -> + case ets:lookup(?MODULE, KID) of + [] -> + Key = get_from_config(Alg, KID), + ok = gen_server:call(?MODULE, {set, KID, Key}), + Key; + [{KID, Key}] -> + Key + end. + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +% gen_server functions + +init(_) -> + ok = config:listen_for_changes(?MODULE, nil), + ets:new(?MODULE, [public, named_table]), + {ok, nil}. + + +handle_call({set, KID, Key}, _From, State) -> + true = ets:insert(?MODULE, {KID, Key}), + {reply, ok, State}. + + +handle_cast({delete, KID}, State) -> + true = ets:delete(?MODULE, KID), + {noreply, State}; + +handle_cast(_Msg, State) -> + {noreply, State}. + + +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; + +handle_info(_Msg, State) -> + {noreply, State}. + + +terminate(_Reason, _State) -> + ok. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +% config listener callback + +handle_config_change("jwt_keys", KID, _Value, _, _) -> + {ok, gen_server:cast(?MODULE, {delete, KID})}; + +handle_config_change(_, _, _, _, _) -> + {ok, nil}. + +handle_config_terminate(_Server, stop, _State) -> + ok; + +handle_config_terminate(_Server, _Reason, _State) -> + erlang:send_after(100, whereis(?MODULE), restart_config_listener). + +% private functions + +get_from_config(Alg, KID) -> + case config:get("jwt_keys", KID) of + undefined -> + throw({bad_request, <<"Unknown kid">>}); + Key -> + case jwtf:verification_algorithm(Alg) of + {hmac, _} -> + list_to_binary(Key); + {public_key, _} -> + BinKey = iolist_to_binary(string:replace(Key, "\\n", "\n", all)), + [PEMEntry] = public_key:pem_decode(BinKey), + public_key:pem_entry_decode(PEMEntry) + end + end. diff --git a/src/jwtf/src/jwtf_sup.erl b/src/jwtf/src/jwtf_sup.erl new file mode 100644 index 000000000..6f44808de --- /dev/null +++ b/src/jwtf/src/jwtf_sup.erl @@ -0,0 +1,38 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_sup). + +-behaviour(supervisor). + +%% API +-export([start_link/0]). + +%% Supervisor callbacks +-export([init/1]). + +%% Helper macro for declaring children of supervisor +-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). + +%% =================================================================== +%% API functions +%% =================================================================== + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%% =================================================================== +%% Supervisor callbacks +%% =================================================================== + +init([]) -> + {ok, { {one_for_one, 5, 10}, [?CHILD(jwtf_keystore, worker)]} }. -- cgit v1.2.1 From dc88e3623f839246028b722dbe3b4235c27dc69e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 20 Mar 2020 13:43:33 +0000 Subject: throw Reason directly so we send good http error responses --- src/couch/src/couch_httpd_auth.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 86d583c56..f5387d18f 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -202,7 +202,7 @@ jwt_authentication_handler(Req) -> }} end; {error, Reason} -> - throw({unauthorized, Reason}) + throw(Reason) end; _ -> Req end. -- cgit v1.2.1 From 16b3c8d6e1c39e2bd0b0bb8524e3b28ce5457973 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 20 Mar 2020 19:07:51 +0000 Subject: base64 the symmetric jwt keys --- src/jwtf/src/jwtf_keystore.erl | 2 +- test/elixir/test/jwtauth_test.exs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl index 82df54e5b..2f2f24744 100644 --- a/src/jwtf/src/jwtf_keystore.erl +++ b/src/jwtf/src/jwtf_keystore.erl @@ -109,7 +109,7 @@ get_from_config(Alg, KID) -> Key -> case jwtf:verification_algorithm(Alg) of {hmac, _} -> - list_to_binary(Key); + base64:decode(Key); {public_key, _} -> BinKey = iolist_to_binary(string:replace(Key, "\\n", "\n", all)), [PEMEntry] = public_key:pem_decode(BinKey), diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index a8f9c50e0..3f26e1eaf 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -11,7 +11,7 @@ defmodule JwtAuthTest do %{ :section => "jwt_keys", :key => "_default", - :value => secret + :value => :base64.encode(secret) }, %{ :section => "jwt_auth", -- cgit v1.2.1 From 8a5f48b1abc90c195cfef147424e912fbf838f44 Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Sun, 22 Mar 2020 12:51:48 +0100 Subject: Fix missing apexes in test/elixir/README.md --- test/elixir/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/elixir/README.md b/test/elixir/README.md index b2ffbc047..4a6e52a8e 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -120,8 +120,8 @@ Bellow we present a few use cases where code-generation is really helpful. ## How to write ExUnit tests -1. Create new file in test/exunit/ directory (the file name should match *_test.exs) -2. In case it is a first file in the directory create test_helper.exs (look at src/couch/test/exunit/test_helper.exs to get an idea) +1. Create new file in test/exunit/ directory (the file name should match `*_test.exs`) +2. In case it is a first file in the directory create `test_helper.exs` (look at `src/couch/test/exunit/test_helper.exs` to get an idea) 3. define test module which does `use Couch.Test.ExUnit.Case` 4. Define test cases in the module -- cgit v1.2.1 From f81f117033422c463d6230569973c9c70a1d2565 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Mon, 23 Mar 2020 10:43:34 -0700 Subject: add info endpoint for fdb stored views (#2706) * add info endpoint This commit adds the info endpoint for design docs stored in fdb. --- src/chttpd/src/chttpd_db.erl | 2 +- src/couch_views/src/couch_views.erl | 52 ++++++++- src/couch_views/test/couch_views_info_test.erl | 154 +++++++++++++++++++++++++ 3 files changed, 205 insertions(+), 3 deletions(-) create mode 100644 src/couch_views/test/couch_views_info_test.erl diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index b7a149b09..dea992c23 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -368,7 +368,7 @@ bad_action_req(#httpd{path_parts=[_, _, Name|FileNameParts]}=Req, Db, _DDoc) -> handle_design_info_req(#httpd{method='GET'}=Req, Db, #doc{} = DDoc) -> [_, _, Name, _] = Req#httpd.path_parts, - {ok, GroupInfoList} = fabric:get_view_group_info(Db, DDoc), + {ok, GroupInfoList} = couch_views:get_info(Db, DDoc), send_json(Req, 200, {[ {name, Name}, {view_index, {GroupInfoList}} diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 58cfb2467..2268052f8 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -20,10 +20,11 @@ query/6, % fabric2_index behavior - build_indices/2 + build_indices/2, + get_info/2 ]). - +-include("couch_views.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). @@ -74,6 +75,45 @@ build_indices(#{} = Db, DDocs) when is_list(DDocs) -> end, DDocs). +get_info(Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + Sig = fabric2_util:to_hex(Mrst#mrst.sig), + JobId = <>, + {UpdateSeq, DataSize, Status0} = fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx + } = TxDb, + Seq = couch_views_fdb:get_update_seq(TxDb, Mrst), + DataSize = get_total_view_size(TxDb, Mrst), + Status = couch_jobs:get_job_state(Tx, ?INDEX_JOB_TYPE, JobId), + {Seq, DataSize, Status} + end), + Status1 = case Status0 of + pending -> true; + running -> true; + _ -> false + end, + UpdateOptions = get_update_options(Mrst), + {ok, [ + {language, Mrst#mrst.language}, + {signature, Sig}, + {sizes, {[ + {active, DataSize} + ]}}, + {update_seq, UpdateSeq}, + {updater_running, Status1}, + {update_options, UpdateOptions} + ]}. + + +get_total_view_size(TxDb, Mrst) -> + ViewIds = [View#mrview.id_num || View <- Mrst#mrst.views], + lists:foldl(fun (ViewId, Total) -> + Total + couch_views_fdb:get_kv_size(TxDb, Mrst, ViewId) + end, 0, ViewIds). + + read_view(Db, Mrst, ViewName, Callback, Acc0, Args) -> fabric2_fdb:transactional(Db, fun(TxDb) -> try @@ -163,3 +203,11 @@ view_cmp(SK, SKD, EK, EKD) -> PackedSK = erlfdb_tuple:pack({BinSK, SKD}), PackedEK = erlfdb_tuple:pack({BinEK, EKD}), PackedSK =< PackedEK. + + +get_update_options(#mrst{design_opts = Opts}) -> + IncDesign = couch_util:get_value(<<"include_design">>, Opts, false), + LocalSeq = couch_util:get_value(<<"local_seq">>, Opts, false), + UpdateOptions = if IncDesign -> [include_design]; true -> [] end + ++ if LocalSeq -> [local_seq]; true -> [] end, + [atom_to_binary(O, latin1) || O <- UpdateOptions]. diff --git a/src/couch_views/test/couch_views_info_test.erl b/src/couch_views/test/couch_views_info_test.erl new file mode 100644 index 000000000..777cdb3dc --- /dev/null +++ b/src/couch_views/test/couch_views_info_test.erl @@ -0,0 +1,154 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_info_test). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DDoc = create_ddoc(), + Doc1 = doc(0, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + run_query(Db, DDoc, ?MAP_FUN1), + {ok, Info} = couch_views:get_info(Db, DDoc), + {Db, Info}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +views_info_test_() -> + { + "Views index info test", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + fun sig_is_binary/1, + fun language_is_js/1, + fun update_seq_is_binary/1, + fun updater_running_is_boolean/1, + fun active_size_is_non_neg_int/1, + fun update_opts_is_bin_list/1 + ] + } + } + }. + + +sig_is_binary({_, Info}) -> + ?_assert(is_binary(prop(signature, Info))). + + +language_is_js({_, Info}) -> + ?_assertEqual(<<"javascript">>, prop(language, Info)). + + +active_size_is_non_neg_int({_, Info}) -> + ?_assert(check_non_neg_int([sizes, active], Info)). + + +updater_running_is_boolean({_, Info}) -> + ?_assert(is_boolean(prop(updater_running, Info))). + + +update_seq_is_binary({_, Info}) -> + ?_assert(is_binary(prop(update_seq, Info))). + + +update_opts_is_bin_list({_, Info}) -> + Opts = prop(update_options, Info), + ?_assert(is_list(Opts) andalso + (Opts == [] orelse lists:all([is_binary(B) || B <- Opts]))). + + +check_non_neg_int(Key, Info) -> + Size = prop(Key, Info), + is_integer(Size) andalso Size >= 0. + + +prop(Key, {Props}) when is_list(Props) -> + prop(Key, Props); + +prop([Key], Info) -> + prop(Key, Info); + +prop([Key | Rest], Info) -> + prop(Rest, prop(Key, Info)); + +prop(Key, Info) when is_atom(Key), is_list(Info) -> + couch_util:get_value(Key, Info). + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}} + ]}} + ]}). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; + +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; + +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). -- cgit v1.2.1 From 5c52904c2d12e9b75450ed82aebfefc1b6100884 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Mon, 23 Mar 2020 14:48:59 -0400 Subject: Ensure clean PATH for Windows couchdb.cmd (#2708) --- rel/files/couchdb.cmd.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rel/files/couchdb.cmd.in b/rel/files/couchdb.cmd.in index 2504f8c60..df9944196 100644 --- a/rel/files/couchdb.cmd.in +++ b/rel/files/couchdb.cmd.in @@ -23,7 +23,7 @@ FOR /F "tokens=2" %%G IN ("%START_ERL%") DO SET APP_VSN=%%G set BINDIR=%ROOTDIR%/erts-%ERTS_VSN%/bin set EMU=beam set PROGNAME=%~n0 -set PATH=%PATH%;%COUCHDB_BIN_DIR% +set PATH=%COUCHDB_BIN_DIR%;%SystemRoot%\system32;%SystemRoot%;%SystemRoot%\System32\Wbem;%SYSTEMROOT%\System32\WindowsPowerShell\v1.0\ IF NOT DEFINED COUCHDB_QUERY_SERVER_JAVASCRIPT SET COUCHDB_QUERY_SERVER_JAVASCRIPT={{prefix}}/bin/couchjs {{prefix}}/share/server/main.js IF NOT DEFINED COUCHDB_QUERY_SERVER_COFFEESCRIPT SET COUCHDB_QUERY_SERVER_COFFEESCRIPT={{prefix}}/bin/couchjs {{prefix}}/share/server/main-coffee.js -- cgit v1.2.1 From 2c704acc78c4f9f7cf8bb40420be5b8c915d39b9 Mon Sep 17 00:00:00 2001 From: jiangph Date: Mon, 23 Mar 2020 20:03:43 +0800 Subject: set DbPrefix with value allocated with erlfdb_hca Previously we are using the DbName to set DbPrefix for clarity. In order to support soft-deletion while providing efficient value for DbPrefix allocation, we use value allocated with erlfdb_hca for DbPrefix. --- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric2_fdb.erl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index a4f68bdf6..0c0757567 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -21,6 +21,7 @@ -define(CLUSTER_CONFIG, 0). -define(ALL_DBS, 1). +-define(DB_HCA, 2). -define(DBS, 15). -define(TX_IDS, 255). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index f5f7bec83..14a649d50 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -177,10 +177,10 @@ create(#{} = Db0, Options) -> layer_prefix := LayerPrefix } = Db = ensure_current(Db0, false), - % Eventually DbPrefix will be HCA allocated. For now - % we're just using the DbName so that debugging is easier. DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), - DbPrefix = erlfdb_tuple:pack({?DBS, DbName}, LayerPrefix), + HCA = erlfdb_hca:create(erlfdb_tuple:pack({?DB_HCA}, LayerPrefix)), + AllocPrefix = erlfdb_hca:allocate(HCA, Tx), + DbPrefix = erlfdb_tuple:pack({?DBS, AllocPrefix}, LayerPrefix), erlfdb:set(Tx, DbKey, DbPrefix), % This key is responsible for telling us when something in -- cgit v1.2.1 From 1890168af11fec4dff6126991d29a4eedb793ca9 Mon Sep 17 00:00:00 2001 From: Alexander Trauzzi Date: Tue, 24 Mar 2020 12:28:07 -0500 Subject: Add support for roles to be obtained from JWTs. (#2694) Add support for roles to be obtained from JWTs --- src/couch/src/couch_httpd_auth.erl | 3 ++- test/elixir/test/jwtauth_test.exs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index f5387d18f..4ad205255 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -198,7 +198,8 @@ jwt_authentication_handler(Req) -> case lists:keyfind(<<"sub">>, 1, Claims) of false -> throw({unauthorized, <<"Token missing sub claim.">>}); {_, User} -> Req#httpd{user_ctx=#user_ctx{ - name=User + name = User, + roles = couch_util:get_value(<<"roles">>, Claims, []) }} end; {error, Reason} -> diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index 3f26e1eaf..dc3d27df4 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -103,13 +103,14 @@ defmodule JwtAuthTest do end def test_fun(alg, key) do - {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}]}, key) + {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}, {"roles", ["testing"]}]}, key) resp = Couch.get("/_session", headers: [authorization: "Bearer #{token}"] ) assert resp.body["userCtx"]["name"] == "couch@apache.org" + assert resp.body["userCtx"]["roles"] == ["testing"] assert resp.body["info"]["authenticated"] == "jwt" end -- cgit v1.2.1 From ed83bf95de6c3eb4bf82eec5243112244f1d8277 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 23 Mar 2020 14:27:32 -0500 Subject: Implement fabric2_db:get_design_docs/1 This is a more efficient method to get all of the design documents than relying on fabric2_db:fold_docs which doesn't load doc bodies in parallel. --- src/fabric/src/fabric2_db.erl | 32 ++++- src/fabric/test/fabric2_get_design_docs_tests.erl | 138 ++++++++++++++++++++++ 2 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 src/fabric/test/fabric2_get_design_docs_tests.erl diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 129dea2d7..ca9f037ec 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -80,9 +80,7 @@ get_full_doc_info/2, get_full_doc_infos/2, get_missing_revs/2, - %% get_design_doc/2, - %% get_design_docs/1, - %% get_design_doc_count/1, + get_design_docs/1, %% get_purge_infos/2, %% get_minimum_purge_seq/1, @@ -657,6 +655,34 @@ get_missing_revs(Db, JsonIdRevs) -> {ok, AllMissing}. +get_design_docs(Db) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_ALL_DOCS}, DbPrefix), + Options = set_design_doc_keys([]), + FoldFun = fun({Key, Val}, Acc) -> + {DocId} = erlfdb_tuple:unpack(Key, Prefix), + RevId = erlfdb_tuple:unpack(Val), + Rev = #{ + rev_id => RevId, + rev_path => [] + }, + Future = fabric2_fdb:get_doc_body_future(TxDb, DocId, Rev), + [{DocId, Rev, Future} | Acc] + end, + Futures = fabric2_fdb:fold_range(TxDb, Prefix, FoldFun, [], Options), + + % Using foldl instead of map means that the design + % docs come out in sorted order. + lists:foldl(fun({DocId, Rev, Future}, Acc) -> + [fabric2_fdb:get_doc_body_wait(TxDb, DocId, Rev, Future) | Acc] + end, [], Futures) + end). + + validate_docid(<<"">>) -> throw({illegal_docid, <<"Document id must not be empty">>}); validate_docid(<<"_design/">>) -> diff --git a/src/fabric/test/fabric2_get_design_docs_tests.erl b/src/fabric/test/fabric2_get_design_docs_tests.erl new file mode 100644 index 000000000..eb227835c --- /dev/null +++ b/src/fabric/test/fabric2_get_design_docs_tests.erl @@ -0,0 +1,138 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_get_design_docs_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +get_design_docs_test_() -> + { + "Test get_design_docs", + { + setup, + fun setup_all/0, + fun cleanup_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(empty_db), + ?TDEF_FE(get_one), + ?TDEF_FE(get_two), + ?TDEF_FE(get_many), + ?TDEF_FE(get_many_with_regular_docs), + ?TDEF_FE(dont_return_deleted_ddocs) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +cleanup_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +cleanup(Db) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_db(Db) -> + DDocs = fabric2_db:get_design_docs(Db), + ?assertEqual([], DDocs). + + +get_one(Db) -> + DDoc = create_ddoc(Db, <<"foo">>), + DDocs = fabric2_db:get_design_docs(Db), + ?assertEqual([DDoc], DDocs). + + +get_two(Db) -> + DDoc1 = create_ddoc(Db, <<"foo">>), + DDoc2 = create_ddoc(Db, <<"bar">>), + DDocs = fabric2_db:get_design_docs(Db), + % DDocs come back sorted + ?assertEqual([DDoc2, DDoc1], DDocs). + + +get_many(Db) -> + DDocsIn = lists:map(fun(Seq) -> + Id = io_lib:format("~2..0b", [Seq]), + create_ddoc(Db, iolist_to_binary(Id)) + end, lists:seq(1, 10)), + DDocsOut = fabric2_db:get_design_docs(Db), + ?assertEqual(DDocsIn, DDocsOut). + + +get_many_with_regular_docs(Db) -> + RegularIds = [ + <<"0">>, + <<"012aCb">>, + <<"Another_doc">>, + <<"Znother_doc">>, + <<"a_doc_as_well">>, + <<"zebra_doc">> + ], + lists:foreach(fun(DocId) -> + create_doc(Db, DocId) + end, RegularIds), + DDocsIn = lists:map(fun(Seq) -> + Id = io_lib:format("~2..0b", [Seq]), + create_ddoc(Db, iolist_to_binary(Id)) + end, lists:seq(1, 10)), + DDocsOut = fabric2_db:get_design_docs(Db), + ?assertEqual(DDocsIn, DDocsOut). + + +dont_return_deleted_ddocs(Db) -> + DDocsIn = lists:flatmap(fun(Seq) -> + Id = io_lib:format("~2..0b", [Seq]), + DDoc = create_ddoc(Db, iolist_to_binary(Id)), + case Seq rem 2 == 0 of + true -> + delete_ddoc(Db, DDoc), + []; + false -> + [DDoc] + end + end, lists:seq(1, 10)), + DDocsOut = fabric2_db:get_design_docs(Db), + ?assertEqual(DDocsIn, DDocsOut). + + +create_ddoc(Db, Id) -> + create_doc(Db, <<"_design/", Id/binary>>). + + +delete_ddoc(Db, DDoc) -> + {ok, _} = fabric2_db:update_doc(Db, DDoc#doc{deleted = true}). + + +create_doc(Db, Id) -> + Doc = #doc{id = Id}, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc), + Doc#doc{revs = {Pos, [Rev]}}. -- cgit v1.2.1 From 3523c817c903a4fb033a19808d63514754b77194 Mon Sep 17 00:00:00 2001 From: Alexander Trauzzi Date: Tue, 24 Mar 2020 14:38:20 -0500 Subject: Rename the claim used for roles to be more CouchDB specific. --- src/couch/src/couch_httpd_auth.erl | 2 +- test/elixir/test/jwtauth_test.exs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 4ad205255..43fb4161c 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -199,7 +199,7 @@ jwt_authentication_handler(Req) -> false -> throw({unauthorized, <<"Token missing sub claim.">>}); {_, User} -> Req#httpd{user_ctx=#user_ctx{ name = User, - roles = couch_util:get_value(<<"roles">>, Claims, []) + roles = couch_util:get_value(<<"_couchdb.roles">>, Claims, []) }} end; {error, Reason} -> diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index dc3d27df4..de5b3e65d 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -103,7 +103,7 @@ defmodule JwtAuthTest do end def test_fun(alg, key) do - {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}, {"roles", ["testing"]}]}, key) + {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}, {"_couchdb.roles", ["testing"]}]}, key) resp = Couch.get("/_session", headers: [authorization: "Bearer #{token}"] -- cgit v1.2.1 From 49dbb6af0305d0dc10cb0abec3732b2aa0b29993 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 23 Mar 2020 00:14:30 +0100 Subject: Port purge.js into elixir test suite --- test/elixir/README.md | 2 +- test/elixir/test/purge_test.exs | 168 ++++++++++++++++++++++++++++++++++++++++ test/javascript/tests/purge.js | 2 +- 3 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/purge_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 4a6e52a8e..4c81dd59e 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -61,7 +61,7 @@ X means done, - means partially - [X] Port method_override.js - [X] Port multiple_rows.js - [X] Port proxyauth.js - - [ ] Port purge.js + - [X] Port purge.js - [ ] Port reader_acl.js - [ ] Port recreate_doc.js - [X] Port reduce_builtin.js diff --git a/test/elixir/test/purge_test.exs b/test/elixir/test/purge_test.exs new file mode 100644 index 000000000..3920b3f26 --- /dev/null +++ b/test/elixir/test/purge_test.exs @@ -0,0 +1,168 @@ +defmodule PurgeTest do + use CouchTestCase + + @moduletag :purge + + @tag :with_db + test "purge documents", context do + db_name = context[:db_name] + + design_doc = %{ + _id: "_design/test", + language: "javascript", + views: %{ + all_docs_twice: %{ + map: "function(doc) { emit(doc.integer, null); emit(doc.integer, null) }" + }, + single_doc: %{ + map: "function(doc) { if (doc._id == \"1\") { emit(1, null) }}" + } + } + } + + {:ok, _} = create_doc(db_name, design_doc) + + num_docs = 10 + bulk_save(db_name, make_docs(1..(num_docs + 1))) + + test_all_docs_twice(db_name, num_docs, 1) + + info = info(db_name) + + doc1 = open_doc(db_name, 1) + doc2 = open_doc(db_name, 2) + + resp = + Couch.post("/#{db_name}/_purge", + body: %{"1": [doc1["_rev"]], "2": [doc2["_rev"]]} + ) + + assert resp.status_code == 201 + result = resp.body + + assert Enum.at(result["purged"]["1"], 0) == doc1["_rev"] + assert Enum.at(result["purged"]["2"], 0) == doc2["_rev"] + + open_doc(db_name, 1, 404) + open_doc(db_name, 2, 404) + + purged_info = info(db_name) + + assert purged_info["purge_seq"] != info["purge_seq"] + + test_all_docs_twice(db_name, num_docs, 0, 2) + + # purge sequences are preserved after compaction (COUCHDB-1021) + resp = Couch.post("/#{db_name}/_compact") + assert resp.status_code == 202 + + retry_until(fn -> + info(db_name)["compact_running"] == false + end) + + compacted_info = info(db_name) + assert compacted_info["purge_seq"] == purged_info["purge_seq"] + + # purge documents twice in a row without loading views + # (causes full view rebuilds) + + doc3 = open_doc(db_name, 3) + doc4 = open_doc(db_name, 4) + + resp = + Couch.post("/#{db_name}/_purge", + body: %{"3": [doc3["_rev"]]} + ) + + assert resp.status_code == 201 + + resp = + Couch.post("/#{db_name}/_purge", + body: %{"4": [doc4["_rev"]]} + ) + + assert resp.status_code == 201 + + test_all_docs_twice(db_name, num_docs, 0, 4) + end + + @tag :with_db + test "COUCHDB-1065", context do + db_name_a = context[:db_name] + db_name_b = random_db_name() + {:ok, _} = create_db(db_name_b) + + {:ok, doc_a_resp} = create_doc(db_name_a, %{_id: "test", a: 1}) + {:ok, doc_b_resp} = create_doc(db_name_b, %{_id: "test", a: 2}) + replicate(db_name_a, db_name_b) + + open_rev(db_name_b, "test", doc_a_resp.body["rev"], 200) + open_rev(db_name_b, "test", doc_b_resp.body["rev"], 200) + + resp = + Couch.post("/#{db_name_b}/_purge", + body: %{test: [doc_a_resp.body["rev"]]} + ) + + assert resp.status_code == 201 + + open_rev(db_name_b, "test", doc_a_resp.body["rev"], 404) + + resp = + Couch.post("/#{db_name_b}/_purge", + body: %{test: [doc_b_resp.body["rev"]]} + ) + + assert resp.status_code == 201 + + open_rev(db_name_b, "test", doc_b_resp.body["rev"], 404) + + resp = + Couch.post("/#{db_name_b}/_purge", + body: %{test: [doc_a_resp.body["rev"], doc_b_resp.body["rev"]]} + ) + + assert resp.status_code == 201 + + delete_db(db_name_b) + end + + def replicate(src, tgt, options \\ []) do + defaults = [headers: [], body: %{}, timeout: 30_000] + options = defaults |> Keyword.merge(options) |> Enum.into(%{}) + + %{body: body} = options + body = [source: src, target: tgt] |> Enum.into(body) + options = Map.put(options, :body, body) + + resp = Couch.post("/_replicate", Enum.to_list(options)) + assert HTTPotion.Response.success?(resp), "#{inspect(resp)}" + resp.body + end + + defp open_doc(db_name, id, expect \\ 200) do + resp = Couch.get("/#{db_name}/#{id}") + assert resp.status_code == expect + resp.body + end + + defp open_rev(db_name, id, rev, expect) do + resp = Couch.get("/#{db_name}/#{id}?rev=#{rev}") + assert resp.status_code == expect + resp.body + end + + defp test_all_docs_twice(db_name, num_docs, sigle_doc_expect, offset \\ 0) do + resp = Couch.get("/#{db_name}/_design/test/_view/all_docs_twice") + assert resp.status_code == 200 + rows = resp.body["rows"] + + for x <- 0..(num_docs - offset) do + assert Map.get(Enum.at(rows, 2 * x), "key") == x + offset + 1 + assert Map.get(Enum.at(rows, 2 * x + 1), "key") == x + offset + 1 + end + + resp = Couch.get("/#{db_name}/_design/test/_view/single_doc") + assert resp.body["total_rows"] == sigle_doc_expect + end +end diff --git a/test/javascript/tests/purge.js b/test/javascript/tests/purge.js index 0c11d9ad8..15fd63710 100644 --- a/test/javascript/tests/purge.js +++ b/test/javascript/tests/purge.js @@ -9,7 +9,7 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.purge = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From e26d99ee80e473cbfbe7ee0347cd3d518df3cb3c Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Sun, 22 Mar 2020 22:26:57 +0100 Subject: Port view_pagination integration test to elixir test suite --- test/elixir/README.md | 2 +- test/elixir/test/view_pagination_test.exs | 189 ++++++++++++++++++++++++++++++ test/javascript/tests/view_pagination.js | 2 + 3 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/view_pagination_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 4c81dd59e..453614700 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -108,7 +108,7 @@ X means done, - means partially - [ ] Port view_multi_key_design.js - [ ] Port view_multi_key_temp.js - [ ] Port view_offsets.js - - [ ] Port view_pagination.js + - [X] Port view_pagination.js - [ ] Port view_sandboxing.js - [ ] Port view_update_seq.js diff --git a/test/elixir/test/view_pagination_test.exs b/test/elixir/test/view_pagination_test.exs new file mode 100644 index 000000000..322b653cb --- /dev/null +++ b/test/elixir/test/view_pagination_test.exs @@ -0,0 +1,189 @@ +defmodule ViewPaginationTest do + use CouchTestCase + + @moduletag :view_pagination + + @moduledoc """ + Integration tests for pagination. + This is a port of the view_pagination.js test suite. + """ + + @tag :with_db + test "basic view pagination", context do + db_name = context[:db_name] + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + query_function = "function(doc) { emit(doc.integer, null); }" + + 0..99 + |> Enum.filter(fn number -> rem(number, 10) === 0 end) + |> Enum.each(fn i -> + query_options = %{"startkey" => i, "startkey_docid" => i, limit: 10} + result = query(db_name, query_function, nil, query_options) + assert result["total_rows"] === length(docs) + assert length(result["rows"]) === 10 + assert result["offset"] === i + Enum.each(0..9, &assert(Enum.at(result["rows"], &1)["key"] === &1 + i)) + end) + end + + @tag :with_db + test "aliases start_key and start_key_doc_id should work", context do + db_name = context[:db_name] + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + query_function = "function(doc) { emit(doc.integer, null); }" + + 0..99 + |> Enum.filter(fn number -> rem(number, 10) === 0 end) + |> Enum.each(fn i -> + query_options = %{"start_key" => i, "start_key_docid" => i, limit: 10} + result = query(db_name, query_function, nil, query_options) + assert result["total_rows"] === length(docs) + assert length(result["rows"]) === 10 + assert result["offset"] === i + Enum.each(0..9, &assert(Enum.at(result["rows"], &1)["key"] === &1 + i)) + end) + end + + @tag :with_db + test "descending view pagination", context do + db_name = context[:db_name] + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + query_function = "function(doc) { emit(doc.integer, null); }" + + 100..0 + |> Enum.filter(fn number -> rem(number, 10) === 0 end) + |> Enum.map(&(&1 - 1)) + |> Enum.filter(&(&1 > 0)) + |> Enum.each(fn i -> + query_options = %{ + "startkey" => i, + "startkey_docid" => i, + limit: 10, + descending: true + } + + result = query(db_name, query_function, nil, query_options) + assert result["total_rows"] === length(docs) + assert length(result["rows"]) === 10 + assert result["offset"] === length(docs) - i - 1 + Enum.each(0..9, &assert(Enum.at(result["rows"], &1)["key"] === i - &1)) + end) + end + + @tag :with_db + test "descending=false parameter should just be ignored", context do + db_name = context[:db_name] + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + query_function = "function(doc) { emit(doc.integer, null); }" + + 0..99 + |> Enum.filter(fn number -> rem(number, 10) === 0 end) + |> Enum.each(fn i -> + query_options = %{ + "start_key" => i, + "start_key_docid" => i, + limit: 10, + descending: false + } + + result = query(db_name, query_function, nil, query_options) + assert result["total_rows"] === length(docs) + assert length(result["rows"]) === 10 + assert result["offset"] === i + Enum.each(0..9, &assert(Enum.at(result["rows"], &1)["key"] === &1 + i)) + end) + end + + @tag :with_db + test "endkey document id", context do + db_name = context[:db_name] + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + query_function = "function(doc) { emit(null, null); }" + + query_options = %{ + "startkey" => :null, + "startkey_docid" => 1, + "endkey" => :null, + "endkey_docid" => 40, + } + + result = query(db_name, query_function, nil, query_options) + test_end_key_doc_id(result, docs) + end + + @tag :with_db + test "endkey document id, but with end_key_doc_id alias", context do + db_name = context[:db_name] + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + query_function = "function(doc) { emit(null, null); }" + + query_options = %{ + "start_key" => :null, + "start_key_doc_id" => 1, + "end_key" => :null, + "end_key_doc_id" => 40, + } + + result = query(db_name, query_function, nil, query_options) + test_end_key_doc_id(result, docs) + end + + defp test_end_key_doc_id(query_result, docs) do + assert length(query_result["rows"]) === 35 + assert query_result["total_rows"] === length(docs) + assert query_result["offset"] === 1 + assert Enum.at(query_result["rows"], 0)["id"] === "1" + assert Enum.at(query_result["rows"], 1)["id"] === "10" + assert Enum.at(query_result["rows"], 2)["id"] === "11" + assert Enum.at(query_result["rows"], 3)["id"] === "12" + assert Enum.at(query_result["rows"], 4)["id"] === "13" + assert Enum.at(query_result["rows"], 5)["id"] === "14" + assert Enum.at(query_result["rows"], 6)["id"] === "15" + assert Enum.at(query_result["rows"], 7)["id"] === "16" + assert Enum.at(query_result["rows"], 8)["id"] === "17" + assert Enum.at(query_result["rows"], 9)["id"] === "18" + assert Enum.at(query_result["rows"], 10)["id"] === "19" + assert Enum.at(query_result["rows"], 11)["id"] === "2" + assert Enum.at(query_result["rows"], 12)["id"] === "20" + assert Enum.at(query_result["rows"], 13)["id"] === "21" + assert Enum.at(query_result["rows"], 14)["id"] === "22" + assert Enum.at(query_result["rows"], 15)["id"] === "23" + assert Enum.at(query_result["rows"], 16)["id"] === "24" + assert Enum.at(query_result["rows"], 17)["id"] === "25" + assert Enum.at(query_result["rows"], 18)["id"] === "26" + assert Enum.at(query_result["rows"], 19)["id"] === "27" + assert Enum.at(query_result["rows"], 20)["id"] === "28" + assert Enum.at(query_result["rows"], 21)["id"] === "29" + assert Enum.at(query_result["rows"], 22)["id"] === "3" + assert Enum.at(query_result["rows"], 23)["id"] === "30" + assert Enum.at(query_result["rows"], 24)["id"] === "31" + assert Enum.at(query_result["rows"], 25)["id"] === "32" + assert Enum.at(query_result["rows"], 26)["id"] === "33" + assert Enum.at(query_result["rows"], 27)["id"] === "34" + assert Enum.at(query_result["rows"], 28)["id"] === "35" + assert Enum.at(query_result["rows"], 29)["id"] === "36" + assert Enum.at(query_result["rows"], 30)["id"] === "37" + assert Enum.at(query_result["rows"], 31)["id"] === "38" + assert Enum.at(query_result["rows"], 32)["id"] === "39" + assert Enum.at(query_result["rows"], 33)["id"] === "4" + assert Enum.at(query_result["rows"], 34)["id"] === "40" + end +end diff --git a/test/javascript/tests/view_pagination.js b/test/javascript/tests/view_pagination.js index df5390eb3..6da5f8d48 100644 --- a/test/javascript/tests/view_pagination.js +++ b/test/javascript/tests/view_pagination.js @@ -10,6 +10,8 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; + couchTests.view_pagination = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 0f27bf5949b4489f4e47516c1018ee8fcac1f305 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 24 Mar 2020 18:07:07 -0400 Subject: Improve fabric2_events * Avoid a cause clause error in after 0 when the database is deleted * Handle db re-creation by checking the instance UUID during fabric2_db:open/2 Since we added a few extra arguments switch to use a map as the State --- src/fabric/src/fabric2_events.erl | 52 +++++++++++++++++++-------- src/fabric/test/fabric2_db_misc_tests.erl | 59 ++++++++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 16 deletions(-) diff --git a/src/fabric/src/fabric2_events.erl b/src/fabric/src/fabric2_events.erl index 094ca2fdb..e1198243a 100644 --- a/src/fabric/src/fabric2_events.erl +++ b/src/fabric/src/fabric2_events.erl @@ -19,17 +19,24 @@ ]). -export([ - init/5, - poll/5 + init/2, + poll/1 ]). -include_lib("couch/include/couch_db.hrl"). -link_listener(Mod, Fun, St, Options) -> - DbName = fabric2_util:get_value(dbname, Options), - Pid = spawn_link(?MODULE, init, [self(), DbName, Mod, Fun, St]), +link_listener(Mod, Fun, Acc, Options) -> + State = #{ + dbname => fabric2_util:get_value(dbname, Options), + uuid => fabric2_util:get_value(uuid, Options, undefined), + timeout => fabric2_util:get_value(timeout, Options, 1000), + mod => Mod, + callback => Fun, + acc => Acc + }, + Pid = spawn_link(?MODULE, init, [self(), State]), receive {Pid, initialized} -> ok end, @@ -40,29 +47,40 @@ stop_listener(Pid) -> Pid ! stop_listening. -init(Parent, DbName, Mod, Fun, St) -> +init(Parent, #{dbname := DbName} = State) -> {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), Since = fabric2_db:get_update_seq(Db), erlang:monitor(process, Parent), Parent ! {self(), initialized}, - poll(DbName, Since, Mod, Fun, St). + poll(State#{since => Since}). -poll(DbName, Since, Mod, Fun, St) -> +poll(#{} = State) -> + #{ + dbname := DbName, + uuid := DbUUID, + timeout := Timeout, + since := Since, + mod := Mod, + callback := Fun, + acc := Acc + } = State, {Resp, NewSince} = try - case fabric2_db:open(DbName, [?ADMIN_CTX]) of + Opts = [?ADMIN_CTX, {uuid, DbUUID}], + case fabric2_db:open(DbName, Opts) of {ok, Db} -> case fabric2_db:get_update_seq(Db) of Since -> - {{ok, St}, Since}; + {{ok, Acc}, Since}; Other -> - {Mod:Fun(DbName, updated, St), Other} + {Mod:Fun(DbName, updated, Acc), Other} end; Error -> exit(Error) end catch error:database_does_not_exist -> - Mod:Fun(DbName, deleted, St) + Mod:Fun(DbName, deleted, Acc), + {{stop, ok}, Since} end, receive stop_listening -> @@ -71,9 +89,13 @@ poll(DbName, Since, Mod, Fun, St) -> ok after 0 -> case Resp of - {ok, NewSt} -> - timer:sleep(1000), - ?MODULE:poll(DbName, NewSince, Mod, Fun, NewSt); + {ok, NewAcc} -> + timer:sleep(Timeout), + NewState = State#{ + since := NewSince, + acc := NewAcc + }, + ?MODULE:poll(NewState); {stop, _} -> ok end diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index fe0ae9faa..19599823e 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -13,6 +13,12 @@ -module(fabric2_db_misc_tests). +% Used in events_listener test +-export([ + event_listener_callback/3 +]). + + -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -42,7 +48,8 @@ misc_test_() -> ?TDEF(get_full_doc_infos), ?TDEF(ensure_full_commit), ?TDEF(metadata_bump), - ?TDEF(db_version_bump) + ?TDEF(db_version_bump), + ?TDEF(events_listener) ]) } }. @@ -334,3 +341,53 @@ db_version_bump({DbName, _, _}) -> % Check that db handle in the cache got the new metadata version ?assertMatch(#{db_version := NewDbVersion}, Db2). + + +events_listener({DbName, Db, _}) -> + Opts = [ + {dbname, DbName}, + {uuid, fabric2_db:get_uuid(Db)}, + {timeout, 100} + ], + + Fun = event_listener_callback, + {ok, Pid} = fabric2_events:link_listener(?MODULE, Fun, self(), Opts), + unlink(Pid), + Ref = monitor(process, Pid), + + NextEvent = fun(Timeout) -> + receive + {Pid, Evt} when is_pid(Pid) -> Evt; + {'DOWN', Ref, _, _, normal} -> exited_normal + after Timeout -> + timeout + end + end, + + Doc1 = #doc{id = couch_uuids:random()}, + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + ?assertEqual(updated, NextEvent(1000)), + + % Just one update, then expect a timeout + ?assertEqual(timeout, NextEvent(500)), + + Doc2 = #doc{id = couch_uuids:random()}, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + ?assertEqual(updated, NextEvent(1000)), + + % Process is still alive + ?assert(is_process_alive(Pid)), + + % Recreate db + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]), + {ok, _} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ?assertEqual(deleted, NextEvent(1000)), + + % After db is deleted or re-created listener should die + ?assertEqual(exited_normal, NextEvent(1000)). + + +% Callback for event_listener function +event_listener_callback(_DbName, Event, TestPid) -> + TestPid ! {self(), Event}, + {ok, TestPid}. -- cgit v1.2.1 From e06c5f360ad3b217591267eb2b05e305899b8887 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 23 Mar 2020 16:19:16 -0400 Subject: Handle db re-creation in view indexing Add the db instance id to indexing job data. During indexing ensure the database is opened with the `{uuid, DbUUID}` option. After that any stale db reads in `update/3` will throw the `database_does_not_exist` error. In addition, when the indexing job is re-submitted in `build_view_async/2`, check if it contains a reference to an old db instance id and replace the job. That has to happen since couch_jobs doesn't overwrite job data for running jobs. --- src/couch_views/src/couch_views_indexer.erl | 52 +++++----- src/couch_views/src/couch_views_jobs.erl | 15 ++- src/couch_views/test/couch_views_indexer_test.erl | 111 +++++++++++++++++++++- 3 files changed, 152 insertions(+), 26 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 04dbcf815..b41d0679b 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -46,45 +46,35 @@ init() -> Data = upgrade_data(Data0), #{ <<"db_name">> := DbName, + <<"db_uuid">> := DbUUID, <<"ddoc_id">> := DDocId, <<"sig">> := JobSig, <<"retries">> := Retries } = Data, {ok, Db} = try - fabric2_db:open(DbName, [?ADMIN_CTX]) + fabric2_db:open(DbName, [?ADMIN_CTX, {uuid, DbUUID}]) catch error:database_does_not_exist -> - couch_jobs:finish(undefined, Job, Data#{ - error => db_deleted, - reason => "Database was deleted" - }), - exit(normal) + fail_job(Job, Data, db_deleted, "Database was deleted") end, {ok, DDoc} = case fabric2_db:open_doc(Db, DDocId) of {ok, DDoc0} -> {ok, DDoc0}; {not_found, _} -> - couch_jobs:finish(undefined, Job, Data#{ - error => ddoc_deleted, - reason => "Design document was deleted" - }), - exit(normal) + fail_job(Job, Data, ddoc_deleted, "Design document was deleted") end, {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), HexSig = fabric2_util:to_hex(Mrst#mrst.sig), - if HexSig == JobSig -> ok; true -> - couch_jobs:finish(undefined, Job, Data#{ - error => sig_changed, - reason => <<"Design document was modified">> - }), - exit(normal) + if HexSig == JobSig -> ok; true -> + fail_job(Job, Data, sig_changed, "Design document was modified") end, State = #{ tx_db => undefined, + db_uuid => DbUUID, db_seq => undefined, view_seq => undefined, last_seq => undefined, @@ -101,6 +91,8 @@ init() -> catch exit:normal -> ok; + error:database_does_not_exist -> + fail_job(Job, Data, db_deleted, "Database was deleted"); Error:Reason -> NewRetry = Retries + 1, RetryLimit = retry_limit(), @@ -115,18 +107,22 @@ init() -> StateErr = State#{job_data := DataErr, last_seq := <<"0">>}, report_progress(StateErr, update); false -> - NewData = add_error(Error, Reason, Data), - couch_jobs:finish(undefined, Job, NewData), - exit(normal) + fail_job(Job, Data, Error, Reason) end end. upgrade_data(Data) -> - case maps:is_key(<<"retries">>, Data) of - true -> Data; - false -> Data#{<<"retries">> =>0} - end. + Defaults = [ + {<<"retries">>, 0}, + {<<"db_uuid">>, undefined} + ], + lists:foldl(fun({Key, Default}, Acc) -> + case maps:is_key(Key, Acc) of + true -> Acc; + false -> maps:put(Key, Default, Acc) + end + end, Data, Defaults). % Transaction limit exceeded don't retry @@ -433,6 +429,7 @@ report_progress(State, UpdateType) -> #{ <<"db_name">> := DbName, + <<"db_uuid">> := DbUUID, <<"ddoc_id">> := DDocId, <<"sig">> := Sig, <<"retries">> := Retries @@ -442,6 +439,7 @@ report_progress(State, UpdateType) -> % possible existing error state. NewData = #{ <<"db_name">> => DbName, + <<"db_uuid">> => DbUUID, <<"ddoc_id">> => DDocId, <<"sig">> => Sig, <<"view_seq">> => LastSeq, @@ -468,6 +466,12 @@ report_progress(State, UpdateType) -> end. +fail_job(Job, Data, Error, Reason) -> + NewData = add_error(Error, Reason, Data), + couch_jobs:finish(undefined, Job, NewData), + exit(normal). + + num_changes() -> config:get_integer("couch_views", "change_limit", 100). diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 937146ce8..1604841f1 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -43,7 +43,19 @@ build_view(TxDb, Mrst, UpdateSeq) -> build_view_async(TxDb, Mrst) -> JobId = job_id(TxDb, Mrst), JobData = job_data(TxDb, Mrst), - ok = couch_jobs:add(undefined, ?INDEX_JOB_TYPE, JobId, JobData), + DbUUID = fabric2_db:get_uuid(TxDb), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + case couch_jobs:get_job_data(JTx, ?INDEX_JOB_TYPE, JobId) of + {error, not_found} -> + ok; + {ok, #{} = OldJobData} -> + case maps:get(<<"db_uuid">>, OldJobData, undefined) of + DbUUID -> ok; + _ -> couch_jobs:remove(JTx, ?INDEX_JOB_TYPE, JobId) + end + end, + ok = couch_jobs:add(JTx, ?INDEX_JOB_TYPE, JobId, JobData) + end), {ok, JobId}. @@ -95,6 +107,7 @@ job_data(Db, Mrst) -> #{ db_name => fabric2_db:name(Db), + db_uuid => fabric2_db:get_uuid(Db), ddoc_id => DDocId, sig => fabric2_util:to_hex(Sig), retries => 0 diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 5475cf68e..8f8f3c5cb 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). -include_lib("fabric/test/fabric2_test.hrl"). @@ -47,7 +48,9 @@ indexer_test_() -> ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc), ?TDEF_FE(handle_size_key_limits), ?TDEF_FE(handle_size_value_limits), - ?TDEF_FE(index_autoupdater_callback) + ?TDEF_FE(index_autoupdater_callback), + ?TDEF_FE(handle_db_recreated_when_running), + ?TDEF_FE(handle_db_recreated_after_finished) ] } } @@ -75,6 +78,7 @@ foreach_setup() -> foreach_teardown(Db) -> meck:unload(), + config:delete("couch_views", "change_limit"), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -372,6 +376,87 @@ index_autoupdater_callback(Db) -> ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DbSeq)). +handle_db_recreated_when_running(Db) -> + DbName = fabric2_db:name(Db), + + DDoc = create_ddoc(), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, _} = fabric2_db:update_doc(Db, doc(1), []), + + % To intercept job building while it is running ensure updates happen one + % row at a time. + config:set("couch_views", "change_limit", "1", false), + + meck_intercept_job_update(self()), + + [{ok, JobId}] = couch_views:build_indices(Db, [DDoc]), + + {Indexer, _Job, _Data} = wait_indexer_update(10000), + + {ok, State} = couch_jobs:get_job_state(undefined, ?INDEX_JOB_TYPE, JobId), + ?assertEqual(running, State), + + {ok, SubId, running, _} = couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId), + + ok = fabric2_db:delete(DbName, []), + {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), + + Indexer ! continue, + + ?assertMatch({ + ?INDEX_JOB_TYPE, + JobId, + finished, + #{<<"error">> := <<"db_deleted">>} + }, couch_jobs:wait(SubId, infinity)), + + {ok, _} = fabric2_db:update_doc(Db1, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db1, doc(2), []), + {ok, _} = fabric2_db:update_doc(Db1, doc(3), []), + + reset_intercept_job_update(Indexer), + + {ok, Out2} = run_query(Db1, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"2">>, 2, 2), + row(<<"3">>, 3, 3) + ], Out2). + + +handle_db_recreated_after_finished(Db) -> + DbName = fabric2_db:name(Db), + + DDoc = create_ddoc(), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, _} = fabric2_db:update_doc(Db, doc(1), []), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"0">>, 0, 0), + row(<<"1">>, 1, 1) + ], Out1), + + ok = fabric2_db:delete(DbName, []), + + ?assertError(database_does_not_exist, run_query(Db, DDoc, ?MAP_FUN1)), + + {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), + + {ok, _} = fabric2_db:update_doc(Db1, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db1, doc(2), []), + {ok, _} = fabric2_db:update_doc(Db1, doc(3), []), + + ?assertError(database_does_not_exist, run_query(Db, DDoc, ?MAP_FUN1)), + + {ok, Out2} = run_query(Db1, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"2">>, 2, 2), + row(<<"3">>, 3, 3) + ], Out2). + + row(Id, Key, Value) -> {row, [ {id, Id}, @@ -480,3 +565,27 @@ doc(Id, Val) -> run_query(#{} = Db, DDoc, <<_/binary>> = View) -> couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). + + +meck_intercept_job_update(ParentPid) -> + meck:new(couch_jobs, [passthrough]), + meck:expect(couch_jobs, update, fun(Db, Job, Data) -> + ParentPid ! {self(), Job, Data}, + receive continue -> ok end, + meck:passthrough([Db, Job, Data]) + end). + + +reset_intercept_job_update(IndexerPid) -> + meck:expect(couch_jobs, update, fun(Db, Job, Data) -> + meck:passthrough([Db, Job, Data]) + end), + IndexerPid ! continue. + + +wait_indexer_update(Timeout) -> + receive + {Pid, Job, Data} when is_pid(Pid) -> {Pid, Job, Data} + after Timeout -> + error(timeout_in_wait_indexer_update) + end. -- cgit v1.2.1 From f05b3ad2f1676221d5a469d8c12d2431fb871877 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 25 Mar 2020 14:31:52 -0400 Subject: Fix db prefix checks in fabric2_fdb After the recent upgrade to using HCA we forgot to check all the places where the db prefix was constructed so a few places still used the old pattern of {?DBS, DbName}. In the case of `check_metadata_version` we also have to account for the fact that during db creation, there might not be a db_prefix in the `Db` handle yet. --- src/fabric/src/fabric2_fdb.erl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 14a649d50..2911dbdf5 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -1115,8 +1115,6 @@ bump_metadata_version(Tx) -> check_metadata_version(#{} = Db) -> #{ tx := Tx, - layer_prefix := LayerPrefix, - name := DbName, md_version := Version } = Db, @@ -1126,10 +1124,16 @@ check_metadata_version(#{} = Db) -> Version -> put(?PDICT_CHECKED_MD_IS_CURRENT, true), % We want to set a read conflict on the db version as we'd want - % to to conflict with any writes to this particular db - DbPrefix = erlfdb_tuple:pack({?DBS, DbName}, LayerPrefix), - DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), - erlfdb:add_read_conflict_key(Tx, DbVersionKey), + % to conflict with any writes to this particular db. However + % during db creation db prefix might not exist yet so we don't + % add a read-conflict on it then. + case maps:get(db_prefix, Db, not_found) of + not_found -> + ok; + <<_/binary>> = DbPrefix -> + DbVerKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + erlfdb:add_read_conflict_key(Tx, DbVerKey) + end, {current, Db}; NewVersion -> {stale, Db#{md_version := NewVersion}} @@ -1690,10 +1694,8 @@ check_db_instance(#{} = Db) -> #{ tx := Tx, uuid := UUID, - name := DbName, - layer_prefix := LayerPrefix + db_prefix := DbPrefix } = Db1, - DbPrefix = erlfdb_tuple:pack({?DBS, DbName}, LayerPrefix), UUIDKey = erlfdb_tuple:pack({?DB_CONFIG, <<"uuid">>}, DbPrefix), case erlfdb:wait(erlfdb:get(Tx, UUIDKey)) of UUID -> Db1; -- cgit v1.2.1 From d67f5073f935fd971924c8093e2e4b722dfc3439 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 25 Mar 2020 14:40:53 -0400 Subject: Make sure to clear db metadata flag before each transaction Previously we didn't reset the metadata flag in case of a transaction retry so we could have used a stale `?PDICT_CHECKED_MD_IS_CURRENT = true` value. --- src/fabric/src/fabric2_fdb.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 2911dbdf5..22ccc993d 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -1715,6 +1715,7 @@ get_previous_transaction_result() -> execute_transaction(Tx, Fun, LayerPrefix) -> + put(?PDICT_CHECKED_MD_IS_CURRENT, false), put(?PDICT_CHECKED_DB_IS_CURRENT, false), Result = Fun(Tx), case erlfdb:is_read_only(Tx) of -- cgit v1.2.1 From 2247322f5eeabc5ef7f5bb7719f3d6bf1a1f6ee4 Mon Sep 17 00:00:00 2001 From: Russell Branca Date: Fri, 20 Mar 2020 14:32:17 -0700 Subject: Cleanup mem3 shards_db config lookups --- src/mem3/src/mem3_reshard_dbdoc.erl | 3 +-- src/mem3/src/mem3_shards.erl | 12 ++++-------- src/mem3/src/mem3_util.erl | 13 +++++-------- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/mem3/src/mem3_reshard_dbdoc.erl b/src/mem3/src/mem3_reshard_dbdoc.erl index 7eb3e9f13..4a0a35c1f 100644 --- a/src/mem3/src/mem3_reshard_dbdoc.erl +++ b/src/mem3/src/mem3_reshard_dbdoc.erl @@ -146,9 +146,8 @@ replicate_to_all_nodes(TimeoutMSec) -> write_shard_doc(#doc{id = Id} = Doc, Body) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), UpdatedDoc = Doc#doc{body = Body}, - couch_util:with_db(DbName, fun(Db) -> + couch_util:with_db(mem3_sync:shards_db(), fun(Db) -> try {ok, _} = couch_db:update_doc(Db, UpdatedDoc, []) catch diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl index 110e227dd..bfee30279 100644 --- a/src/mem3/src/mem3_shards.erl +++ b/src/mem3/src/mem3_shards.erl @@ -144,8 +144,7 @@ local(DbName) -> lists:filter(Pred, for_db(DbName)). fold(Fun, Acc) -> - DbName = config:get("mem3", "shards_db", "_dbs"), - {ok, Db} = mem3_util:ensure_exists(DbName), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), FAcc = {Db, Fun, Acc}, try {ok, LastAcc} = couch_db:fold_docs(Db, fun fold_fun/2, FAcc), @@ -309,15 +308,13 @@ fold_fun(#doc_info{}=DI, {Db, UFun, UAcc}) -> end. get_update_seq() -> - DbName = config:get("mem3", "shards_db", "_dbs"), - {ok, Db} = mem3_util:ensure_exists(DbName), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), Seq = couch_db:get_update_seq(Db), couch_db:close(Db), Seq. listen_for_changes(Since) -> - DbName = config:get("mem3", "shards_db", "_dbs"), - {ok, Db} = mem3_util:ensure_exists(DbName), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), Args = #changes_args{ feed = "continuous", since = Since, @@ -362,8 +359,7 @@ changes_callback(timeout, _) -> load_shards_from_disk(DbName) when is_binary(DbName) -> couch_stats:increment_counter([mem3, shard_cache, miss]), - X = ?l2b(config:get("mem3", "shards_db", "_dbs")), - {ok, Db} = mem3_util:ensure_exists(X), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), try load_shards_from_db(Db, DbName) after diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index 3fc9b4f8e..619f7810a 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -87,13 +87,11 @@ attach_nodes([S | Rest], Acc, [Node | Nodes], UsedNodes) -> attach_nodes(Rest, [S#shard{node=Node} | Acc], Nodes, [Node | UsedNodes]). open_db_doc(DocId) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), + {ok, Db} = couch_db:open(mem3_sync:shards_db(), [?ADMIN_CTX]), try couch_db:open_doc(Db, DocId, [ejson_body]) after couch_db:close(Db) end. write_db_doc(Doc) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - write_db_doc(DbName, Doc, true). + write_db_doc(mem3_sync:shards_db(), Doc, true). write_db_doc(DbName, #doc{id=Id, body=Body} = Doc, ShouldMutate) -> {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), @@ -118,8 +116,7 @@ write_db_doc(DbName, #doc{id=Id, body=Body} = Doc, ShouldMutate) -> delete_db_doc(DocId) -> gen_server:cast(mem3_shards, {cache_remove, DocId}), - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - delete_db_doc(DbName, DocId, true). + delete_db_doc(mem3_sync:shards_db(), DocId, true). delete_db_doc(DbName, DocId, ShouldMutate) -> {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), @@ -324,7 +321,7 @@ live_nodes() -> % which could be a while. % replicate_dbs_to_all_nodes(Timeout) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), + DbName = mem3_sync:shards_db(), Targets= mem3_util:live_nodes() -- [node()], Res = [start_replication(node(), T, DbName, Timeout) || T <- Targets], collect_replication_results(Res, Timeout). @@ -335,7 +332,7 @@ replicate_dbs_to_all_nodes(Timeout) -> % them until they are all done. % replicate_dbs_from_all_nodes(Timeout) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), + DbName = mem3_sync:shards_db(), Sources = mem3_util:live_nodes() -- [node()], Res = [start_replication(S, node(), DbName, Timeout) || S <- Sources], collect_replication_results(Res, Timeout). -- cgit v1.2.1 From 7c831f68d9049334a2e10a8a3f5d82ba214992e4 Mon Sep 17 00:00:00 2001 From: Russell Branca Date: Fri, 20 Mar 2020 14:34:39 -0700 Subject: Ensure shards are created with db options --- src/couch/include/couch_eunit.hrl | 5 + src/fabric/src/fabric_rpc.erl | 2 +- src/fabric/test/eunit/fabric_rpc_tests.erl | 181 +++++++++++++++++++++++++++++ src/mem3/src/mem3_rpc.erl | 2 +- src/mem3/src/mem3_shards.erl | 10 ++ src/mem3/src/mem3_util.erl | 31 ++++- 6 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 src/fabric/test/eunit/fabric_rpc_tests.erl diff --git a/src/couch/include/couch_eunit.hrl b/src/couch/include/couch_eunit.hrl index d3611c88b..188524893 100644 --- a/src/couch/include/couch_eunit.hrl +++ b/src/couch/include/couch_eunit.hrl @@ -49,6 +49,11 @@ Suffix = couch_uuids:random(), iolist_to_binary(["eunit-test-db-", Suffix]) end). +-define(tempshard, + fun() -> + Suffix = couch_uuids:random(), + iolist_to_binary(["shards/80000000-ffffffff/eunit-test-db-", Suffix]) + end). -define(docid, fun() -> integer_to_list(couch_util:unique_monotonic_integer()) diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index a67dcd148..85da3ff12 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -439,7 +439,7 @@ get_node_seqs(Db, Nodes) -> get_or_create_db(DbName, Options) -> - couch_db:open_int(DbName, [{create_if_missing, true} | Options]). + mem3_util:get_or_create_db(DbName, Options). get_view_cb(#mrargs{extra = Options}) -> diff --git a/src/fabric/test/eunit/fabric_rpc_tests.erl b/src/fabric/test/eunit/fabric_rpc_tests.erl new file mode 100644 index 000000000..b94caf659 --- /dev/null +++ b/src/fabric/test/eunit/fabric_rpc_tests.erl @@ -0,0 +1,181 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric_rpc_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +-define(TDEF(A), {A, fun A/1}). + + +main_test_() -> + { + setup, + spawn, + fun setup_all/0, + fun teardown_all/1, + [ + { + foreach, + fun setup_no_db_or_config/0, + fun teardown_db/1, + lists:map(fun wrap/1, [ + ?TDEF(t_no_config_non_shard_db_create_succeeds) + ]) + }, + { + foreach, + fun setup_shard/0, + fun teardown_noop/1, + lists:map(fun wrap/1, [ + ?TDEF(t_no_db), + ?TDEF(t_no_config_db_create_fails_for_shard), + ?TDEF(t_no_config_db_create_fails_for_shard_rpc) + ]) + }, + { + foreach, + fun setup_shard/0, + fun teardown_db/1, + lists:map(fun wrap/1, [ + ?TDEF(t_db_create_with_config) + ]) + } + + ] + }. + + +setup_all() -> + test_util:start_couch([rexi, mem3, fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup_no_db_or_config() -> + ?tempdb(). + + +setup_shard() -> + ?tempshard(). + + +teardown_noop(_DbName) -> + ok. + +teardown_db(DbName) -> + ok = couch_server:delete(DbName, []). + + +wrap({Name, Fun}) -> + fun(Arg) -> + {timeout, 60, {atom_to_list(Name), fun() -> + process_flag(trap_exit, true), + Fun(Arg) + end}} + end. + + +t_no_db(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])). + + +t_no_config_non_shard_db_create_succeeds(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + ?assertEqual(DbName, mem3:dbname(DbName)), + ?assertMatch({ok, _}, mem3_util:get_or_create_db(DbName, [?ADMIN_CTX])). + + +t_no_config_db_create_fails_for_shard(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + ?assertException(throw, {error, missing_target}, mem3_util:get_or_create_db(DbName, [?ADMIN_CTX])). + + +t_no_config_db_create_fails_for_shard_rpc(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + ?assertException(throw, {error, missing_target}, mem3_util:get_or_create_db(DbName, [?ADMIN_CTX])), + MFA = {fabric_rpc, get_db_info, [DbName]}, + Ref = rexi:cast(node(), self(), MFA), + Resp = receive + Resp0 -> Resp0 + end, + ?assertMatch({Ref, {'rexi_EXIT', {{error, missing_target}, _}}}, Resp). + + +t_db_create_with_config(DbName) -> + MDbName = mem3:dbname(DbName), + DbDoc = #doc{id = MDbName, body = test_db_doc()}, + + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + + %% Write the dbs db config + couch_util:with_db(mem3_sync:shards_db(), fun(Db) -> + ?assertEqual({not_found, missing}, couch_db:open_doc(Db, MDbName, [ejson_body])), + ?assertMatch({ok, _}, couch_db:update_docs(Db, [DbDoc])) + end), + + %% Test get_or_create_db loads the properties as expected + couch_util:with_db(mem3_sync:shards_db(), fun(Db) -> + ?assertMatch({ok, _}, couch_db:open_doc(Db, MDbName, [ejson_body])), + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + Resp = mem3_util:get_or_create_db(DbName, [?ADMIN_CTX]), + ?assertMatch({ok, _}, Resp), + {ok, LDb} = Resp, + + {Body} = test_db_doc(), + DbProps = mem3_util:get_shard_opts(Body), + {Props} = case couch_db_engine:get_props(LDb) of + undefined -> {[]}; + Else -> {Else} + end, + %% We don't normally store the default engine name + EngineProps = case couch_db_engine:get_engine(LDb) of + couch_bt_engine -> + []; + EngineName -> + [{engine, EngineName}] + end, + ?assertEqual([{props, Props} | EngineProps], DbProps) + end). + + +test_db_doc() -> + {[ + {<<"shard_suffix">>, ".1584997648"}, + {<<"changelog">>, [ + [<<"add">>, <<"00000000-7fffffff">>, <<"node1@127.0.0.1">>], + [<<"add">>, <<"00000000-7fffffff">>, <<"node2@127.0.0.1">>], + [<<"add">>, <<"00000000-7fffffff">>, <<"node3@127.0.0.1">>], + [<<"add">>, <<"80000000-ffffffff">>, <<"node1@127.0.0.1">>], + [<<"add">>, <<"80000000-ffffffff">>, <<"node2@127.0.0.1">>], + [<<"add">>, <<"80000000-ffffffff">>, <<"node3@127.0.0.1">>] + ]}, + {<<"by_node">>, {[ + {<<"node1@127.0.0.1">>, [<<"00000000-7fffffff">>, <<"80000000-ffffffff">>]}, + {<<"node2@127.0.0.1">>, [<<"00000000-7fffffff">>, <<"80000000-ffffffff">>]}, + {<<"node3@127.0.0.1">>, [<<"00000000-7fffffff">>, <<"80000000-ffffffff">>]} + ]}}, + {<<"by_range">>, {[ + {<<"00000000-7fffffff">>, [<<"node1@127.0.0.1">>, <<"node2@127.0.0.1">>, <<"node3@127.0.0.1">>]}, + {<<"80000000-ffffffff">>, [<<"node1@127.0.0.1">>, <<"node2@127.0.0.1">>, <<"node3@127.0.0.1">>]} + ]}}, + {<<"props">>, {[ + {partitioned, true}, + {hash, [couch_partition, hash, []]} + ]}} + ]}. + diff --git a/src/mem3/src/mem3_rpc.erl b/src/mem3/src/mem3_rpc.erl index 0991aa745..5d1c62c06 100644 --- a/src/mem3/src/mem3_rpc.erl +++ b/src/mem3/src/mem3_rpc.erl @@ -401,7 +401,7 @@ rexi_call(Node, MFA, Timeout) -> get_or_create_db(DbName, Options) -> - couch_db:open_int(DbName, [{create_if_missing, true} | Options]). + mem3_util:get_or_create_db(DbName, Options). -ifdef(TEST). diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl index bfee30279..4f3323740 100644 --- a/src/mem3/src/mem3_shards.erl +++ b/src/mem3/src/mem3_shards.erl @@ -20,6 +20,7 @@ -export([handle_config_change/5, handle_config_terminate/3]). -export([start_link/0]). +-export([opts_for_db/1]). -export([for_db/1, for_db/2, for_docid/2, for_docid/3, get/3, local/1, fold/2]). -export([for_shard_range/1]). -export([set_max_size/1]). @@ -45,6 +46,15 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). +opts_for_db(DbName) -> + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), + case couch_db:open_doc(Db, DbName, [ejson_body]) of + {ok, #doc{body = {Props}}} -> + mem3_util:get_shard_opts(Props); + {not_found, _} -> + erlang:error(database_does_not_exist, ?b2l(DbName)) + end. + for_db(DbName) -> for_db(DbName, []). diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index 619f7810a..a6ac3a865 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -14,8 +14,9 @@ -export([name_shard/2, create_partition_map/5, build_shards/2, n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, - shard_info/1, ensure_exists/1, open_db_doc/1]). + shard_info/1, ensure_exists/1, open_db_doc/1, get_or_create_db/2]). -export([is_deleted/1, rotate_list/2]). +-export([get_shard_opts/1, get_engine_opt/1, get_props_opt/1]). -export([ iso8601_timestamp/0, live_nodes/0, @@ -506,6 +507,34 @@ sort_ranges_fun({B1, _}, {B2, _}) -> B1 =< B2. +get_or_create_db(DbName, Options) -> + case couch_db:open_int(DbName, Options) of + {ok, _} = OkDb -> + OkDb; + {not_found, no_db_file} -> + try + DbOpts = case mem3:dbname(DbName) of + DbName -> []; + MDbName -> mem3_shards:opts_for_db(MDbName) + end, + Options1 = [{create_if_missing, true} | Options], + Options2 = merge_opts(DbOpts, Options1), + couch_db:open_int(DbName, Options2) + catch error:database_does_not_exist -> + throw({error, missing_target}) + end; + Else -> + Else + end. + + +%% merge two proplists, atom options only valid in Old +merge_opts(New, Old) -> + lists:foldl(fun({Key, Val}, Acc) -> + lists:keystore(Key, 1, Acc, {Key, Val}) + end, Old, New). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -- cgit v1.2.1 From 9912d49b63fa948a42ef505d8055ce290878077b Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 25 Mar 2020 17:08:35 +0000 Subject: Implement AES KW algorithm* For use by the native couchdb at-rest encryption feature. * From NIST Special Publication 800-38F. --- src/couch/src/couch_keywrap.erl | 103 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 src/couch/src/couch_keywrap.erl diff --git a/src/couch/src/couch_keywrap.erl b/src/couch/src/couch_keywrap.erl new file mode 100644 index 000000000..0d1e3f59d --- /dev/null +++ b/src/couch/src/couch_keywrap.erl @@ -0,0 +1,103 @@ +-module(couch_keywrap). + +%% Implementation of NIST Special Publication 800-38F +%% For wrapping and unwrapping keys with AES. + +-export([key_wrap/2, key_unwrap/2]). + +-define(ICV1, 16#A6A6A6A6A6A6A6A6). + +%% Assume old crypto api +-define(aes_ecb_encrypt(Key, Data), + crypto:block_encrypt(aes_ecb, Key, Data)). +-define(aes_ecb_decrypt(Key, Data), + crypto:block_decrypt(aes_ecb, Key, Data)). + +%% Replace macros if new crypto api is available +-ifdef(OTP_RELEASE). +-if(?OTP_RELEASE >= 22). +-define(key_alg(Key), case bit_size(Key) of 128 -> aes_128_ecb; 192 -> aes_192_ecb; 256 -> aes_256_ecb end). +-undef(aes_ecb_encrypt). +-define(aes_ecb_encrypt(Key, Data), + crypto:crypto_one_time(?key_alg(Key), Key, Data, true)). +-undef(aes_ecb_decrypt). +-define(aes_ecb_decrypt(Key, Data), + crypto:crypto_one_time(?key_alg(Key), Key, Data, false)). +-endif. +-endif. + +-spec key_wrap(WrappingKey :: binary(), KeyToWrap :: binary()) -> binary(). +key_wrap(WrappingKey, KeyToWrap) + when is_binary(WrappingKey), bit_size(KeyToWrap) rem 64 == 0 -> + N = bit_size(KeyToWrap) div 64, + wrap(WrappingKey, <>, KeyToWrap, 1, 6 * N). + +wrap(_WrappingKey, A, R, T, End) when T > End -> + <>; +wrap(WrappingKey, A, R, T, End) -> + <> = R, + <> = ?aes_ecb_encrypt(WrappingKey, <>), + wrap(WrappingKey, <<(MSB_B bxor T):64>>, <>, T + 1, End). + + +-spec key_unwrap(WrappingKey :: binary(), KeyToUnwrap :: binary()) -> binary() | fail. +key_unwrap(WrappingKey, KeyToUnwrap) + when is_binary(WrappingKey), bit_size(KeyToUnwrap) rem 64 == 0 -> + N = (bit_size(KeyToUnwrap) div 64), + <> = KeyToUnwrap, + case unwrap(WrappingKey, <>, R, 6 * (N - 1)) of + <> -> + UnwrappedKey; + _ -> + fail + end. + +unwrap(_WrappingKey, A, R, 0) -> + <>; +unwrap(WrappingKey, <>, R, T) -> + RestSize = bit_size(R) - 64, + <> = R, + <> = ?aes_ecb_decrypt(WrappingKey, <<(A bxor T):64, R2:64>>), + unwrap(WrappingKey, <>, <>, T - 1). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +wrap_test_() -> + [ + %% 128 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F:128>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5:192>>), + %% 192 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D:192>>), + %% 256 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7:192>>), + %% 192 KEK / 192 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, + <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, + <<16#031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2:256>>), + %% 256 KEK / 192 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, + <<16#A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1:256>>), + %% 256 KEK / 256 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:256>>, + <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21:320>>)]. + +test_wrap_unwrap(WrappingKey, KeyToWrap, ExpectedWrappedKey) -> + [?_assertEqual(ExpectedWrappedKey, key_wrap(WrappingKey, KeyToWrap)), + ?_assertEqual(KeyToWrap, key_unwrap(WrappingKey, key_wrap(WrappingKey, KeyToWrap)))]. + +fail_test() -> + KEK = <<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + CipherText = <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD20:320>>, + ?assertEqual(fail, key_unwrap(KEK, CipherText)). + +-endif. -- cgit v1.2.1 From 9c956676dad078016e7eb030187ce2d87738183c Mon Sep 17 00:00:00 2001 From: Russell Branca Date: Thu, 26 Mar 2020 10:05:59 -0700 Subject: Add mem3_util:find_dirty_shards function --- src/mem3/src/mem3_util.erl | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index a6ac3a865..28cb17778 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -17,6 +17,7 @@ shard_info/1, ensure_exists/1, open_db_doc/1, get_or_create_db/2]). -export([is_deleted/1, rotate_list/2]). -export([get_shard_opts/1, get_engine_opt/1, get_props_opt/1]). +-export([get_shard_props/1, find_dirty_shards/0]). -export([ iso8601_timestamp/0, live_nodes/0, @@ -535,6 +536,47 @@ merge_opts(New, Old) -> end, Old, New). +get_shard_props(ShardName) -> + case couch_db:open_int(ShardName, []) of + {ok, Db} -> + Props = case couch_db_engine:get_props(Db) of + undefined -> []; + Else -> Else + end, + %% We don't normally store the default engine name + EngineProps = case couch_db_engine:get_engine(Db) of + couch_bt_engine -> + []; + EngineName -> + [{engine, EngineName}] + end, + [{props, Props} | EngineProps]; + {not_found, _} -> + not_found; + Else -> + Else + end. + + +find_dirty_shards() -> + mem3_shards:fold(fun(#shard{node=Node, name=Name, opts=Opts}=Shard, Acc) -> + case Opts of + [] -> + Acc; + [{props, []}] -> + Acc; + _ -> + Props = rpc:call(Node, ?MODULE, get_shard_props, [Name]), + case Props =:= Opts of + true -> + Acc; + false -> + [{Shard, Props} | Acc] + end + end + end, []). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -- cgit v1.2.1 From bf887c58952f5a2a6a9ba95851befd4d169c3538 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 27 Mar 2020 11:29:20 -0400 Subject: Return a 400 error code for an invalid update sequence Currently we return a 500 but a 400 return code makes more sense ``` $ http $DB1/db1/_changes?since=0-1345 HTTP/1.1 400 Bad Request { "error": "invalid_since_seq", "reason": "0-1345", "ref": 442671026 } ``` --- src/chttpd/src/chttpd.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 598436153..2641007f7 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -943,6 +943,8 @@ error_info({error, {database_name_too_long, DbName}}) -> <<"At least one path segment of `", DbName/binary, "` is too long.">>}; error_info({doc_validation, Reason}) -> {400, <<"doc_validation">>, Reason}; +error_info({invalid_since_seq, Reason}) -> + {400, <<"invalid_since_seq">>, Reason}; error_info({missing_stub, Reason}) -> {412, <<"missing_stub">>, Reason}; error_info(request_entity_too_large) -> -- cgit v1.2.1 From a799b67642216d02ef54dbd3895c80d0785a97b2 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 27 Mar 2020 20:13:00 +0000 Subject: Only trust the servers declaration of JWT key type --- rel/overlay/etc/default.ini | 9 ++-- src/jwtf/src/jwtf_keystore.erl | 95 +++++++++++++++++++++++++---------- src/jwtf/test/jwtf_keystore_tests.erl | 57 +++++++++++++++++++++ test/elixir/test/jwtauth_test.exs | 6 +-- 4 files changed, 134 insertions(+), 33 deletions(-) create mode 100644 src/jwtf/test/jwtf_keystore_tests.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 25daa4813..25f1027d2 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -151,14 +151,15 @@ max_db_number_for_dbs_info_req = 100 ; If your JWT tokens do not include a "kid" attribute, use "_default" ; as the config key, otherwise use the kid as the config key. ; Examples -; _default = aGVsbG8= -; foo = aGVsbG8= +; hmac:_default = aGVsbG8= +; hmac:foo = aGVsbG8= ; The config values can represent symmetric and asymmetrics keys. ; For symmetrics keys, the value is base64 encoded; -; _default = aGVsbG8= # base64-encoded form of "hello" +; hmac:_default = aGVsbG8= # base64-encoded form of "hello" ; For asymmetric keys, the value is the PEM encoding of the public ; key with newlines replaced with the escape sequence \n. -; foo = -----BEGIN PUBLIC KEY-----\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEDsr0lz/Dg3luarb+Kua0Wcj9WrfR23os\nwHzakglb8GhWRDn+oZT0Bt/26sX8uB4/ij9PEOLHPo+IHBtX4ELFFVr5GTzlqcJe\nyctaTDd1OOAPXYuc67EWtGZ3pDAzztRs\n-----END PUBLIC KEY-----\n\n +; rsa:foo = -----BEGIN PUBLIC KEY-----\nMIIBIjAN...IDAQAB\n-----END PUBLIC KEY-----\n +; ec:bar = -----BEGIN PUBLIC KEY-----\nMHYwEAYHK...AzztRs\n-----END PUBLIC KEY-----\n [couch_peruser] ; If enabled, couch_peruser ensures that a private per-user database diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl index 2f2f24744..be261e67c 100644 --- a/src/jwtf/src/jwtf_keystore.erl +++ b/src/jwtf/src/jwtf_keystore.erl @@ -14,6 +14,8 @@ -behaviour(gen_server). -behaviour(config_listener). +-include_lib("public_key/include/public_key.hrl"). + % public api. -export([ get/2, @@ -29,19 +31,18 @@ % public functions -get(Alg, undefined) -> - get(Alg, "_default"); - -get(Alg, KID) when is_binary(KID) -> - get(Alg, binary_to_list(KID)); +get(Alg, undefined) when is_binary(Alg) -> + get(Alg, <<"_default">>); -get(Alg, KID) -> - case ets:lookup(?MODULE, KID) of +get(Alg, KID0) when is_binary(Alg), is_binary(KID0) -> + Kty = kty(Alg), + KID = binary_to_list(KID0), + case ets:lookup(?MODULE, {Kty, KID}) of [] -> - Key = get_from_config(Alg, KID), - ok = gen_server:call(?MODULE, {set, KID, Key}), + Key = get_from_config(Kty, KID), + ok = gen_server:call(?MODULE, {set, Kty, KID, Key}), Key; - [{KID, Key}] -> + [{{Kty, KID}, Key}] -> Key end. @@ -57,13 +58,13 @@ init(_) -> {ok, nil}. -handle_call({set, KID, Key}, _From, State) -> - true = ets:insert(?MODULE, {KID, Key}), +handle_call({set, Kty, KID, Key}, _From, State) -> + true = ets:insert(?MODULE, {{Kty, KID}, Key}), {reply, ok, State}. -handle_cast({delete, KID}, State) -> - true = ets:delete(?MODULE, KID), +handle_cast({delete, Kty, KID}, State) -> + true = ets:delete(?MODULE, {Kty, KID}), {noreply, State}; handle_cast(_Msg, State) -> @@ -88,8 +89,14 @@ code_change(_OldVsn, State, _Extra) -> % config listener callback -handle_config_change("jwt_keys", KID, _Value, _, _) -> - {ok, gen_server:cast(?MODULE, {delete, KID})}; +handle_config_change("jwt_keys", ConfigKey, _ConfigValue, _, _) -> + case string:split(ConfigKey, ":") of + [Kty, KID] -> + gen_server:cast(?MODULE, {delete, Kty, KID}); + _ -> + ignored + end, + {ok, nil}; handle_config_change(_, _, _, _, _) -> {ok, nil}. @@ -102,17 +109,53 @@ handle_config_terminate(_Server, _Reason, _State) -> % private functions -get_from_config(Alg, KID) -> - case config:get("jwt_keys", KID) of +get_from_config(Kty, KID) -> + case config:get("jwt_keys", string:join([Kty, KID], ":")) of undefined -> throw({bad_request, <<"Unknown kid">>}); - Key -> - case jwtf:verification_algorithm(Alg) of - {hmac, _} -> - base64:decode(Key); - {public_key, _} -> - BinKey = iolist_to_binary(string:replace(Key, "\\n", "\n", all)), - [PEMEntry] = public_key:pem_decode(BinKey), - public_key:pem_entry_decode(PEMEntry) + Encoded -> + case Kty of + "hmac" -> + try + base64:decode(Encoded) + catch + error:_ -> + throw({bad_request, <<"Not a valid key">>}) + end; + "rsa" -> + case pem_decode(Encoded) of + #'RSAPublicKey'{} = Key -> + Key; + _ -> + throw({bad_request, <<"not an RSA public key">>}) + end; + "ec" -> + case pem_decode(Encoded) of + {#'ECPoint'{}, _} = Key -> + Key; + _ -> + throw({bad_request, <<"not an EC public key">>}) + end end end. + +pem_decode(PEM) -> + BinPEM = iolist_to_binary(string:replace(PEM, "\\n", "\n", all)), + case public_key:pem_decode(BinPEM) of + [PEMEntry] -> + public_key:pem_entry_decode(PEMEntry); + [] -> + throw({bad_request, <<"Not a valid key">>}) + end. + +kty(<<"HS", _/binary>>) -> + "hmac"; + +kty(<<"RS", _/binary>>) -> + "rsa"; + +kty(<<"ES", _/binary>>) -> + "ec"; + +kty(_) -> + throw({bad_request, <<"Unknown kty">>}). diff --git a/src/jwtf/test/jwtf_keystore_tests.erl b/src/jwtf/test/jwtf_keystore_tests.erl new file mode 100644 index 000000000..9ec943653 --- /dev/null +++ b/src/jwtf/test/jwtf_keystore_tests.erl @@ -0,0 +1,57 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_keystore_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("public_key/include/public_key.hrl"). + +-define(HMAC_SECRET, "aGVsbG8="). +-define(RSA_SECRET, "-----BEGIN PUBLIC KEY-----\\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAztanwQtIx0sms+x7m1SF\\nh7EHJHkM2biTJ41jR89FsDE2gd3MChpaqxemS5GpNvfFKRvuHa4PUZ3JtRCBG1KM\\n/7EWIVTy1JQDr2mb8couGlQNqz4uXN2vkNQ0XszgjU4Wn6ZpvYxmqPFbmkRe8QSn\\nAy2Wf8jQgjsbez8eaaX0G9S1hgFZUN3KFu7SVmUDQNvWpQdaJPP+ms5Z0CqF7JLa\\nvJmSdsU49nlYw9VH/XmwlUBMye6HgR4ZGCLQS85frqF0xLWvi7CsMdchcIjHudXH\\nQK1AumD/VVZVdi8Q5Qew7F6VXeXqnhbw9n6Px25cCuNuh6u5+E6GUzXRrMpqo9vO\\nqQIDAQAB\\n-----END PUBLIC KEY-----\\n"). +-define(EC_SECRET, "-----BEGIN PUBLIC KEY-----\\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEDsr0lz/Dg3luarb+Kua0Wcj9WrfR23os\\nwHzakglb8GhWRDn+oZT0Bt/26sX8uB4/ij9PEOLHPo+IHBtX4ELFFVr5GTzlqcJe\\nyctaTDd1OOAPXYuc67EWtGZ3pDAzztRs\\n-----END PUBLIC KEY-----\\n"). + +setup() -> + test_util:start_applications([config, jwtf]), + config:set("jwt_keys", "hmac:hmac", ?HMAC_SECRET), + config:set("jwt_keys", "rsa:hmac", ?HMAC_SECRET), + config:set("jwt_keys", "ec:hmac", ?HMAC_SECRET), + + config:set("jwt_keys", "hmac:rsa", ?RSA_SECRET), + config:set("jwt_keys", "rsa:rsa", ?RSA_SECRET), + config:set("jwt_keys", "ec:rsa", ?RSA_SECRET), + + config:set("jwt_keys", "hmac:ec", ?EC_SECRET), + config:set("jwt_keys", "rsa:ec", ?EC_SECRET), + config:set("jwt_keys", "ec:ec", ?EC_SECRET). + +teardown(_) -> + test_util:stop_applications([config, jwtf]). + +jwtf_keystore_test_() -> + { + setup, + fun setup/0, + fun teardown/1, + [ + ?_assertEqual(<<"hello">>, jwtf_keystore:get(<<"HS256">>, <<"hmac">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"RS256">>, <<"hmac">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"ES256">>, <<"hmac">>)), + + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"HS256">>, <<"rsa">>)), + ?_assertMatch(#'RSAPublicKey'{}, jwtf_keystore:get(<<"RS256">>, <<"rsa">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"ES256">>, <<"rsa">>)), + + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"HS256">>, <<"ec">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"RS256">>, <<"ec">>)), + ?_assertMatch({#'ECPoint'{}, _}, jwtf_keystore:get(<<"ES256">>, <<"ec">>)) + ] + }. diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index de5b3e65d..c50225cbd 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -10,7 +10,7 @@ defmodule JwtAuthTest do server_config = [ %{ :section => "jwt_keys", - :key => "_default", + :key => "hmac:_default", :value => :base64.encode(secret) }, %{ @@ -49,7 +49,7 @@ defmodule JwtAuthTest do server_config = [ %{ :section => "jwt_keys", - :key => "_default", + :key => "rsa:_default", :value => public_pem }, %{ @@ -87,7 +87,7 @@ defmodule JwtAuthTest do server_config = [ %{ :section => "jwt_keys", - :key => "_default", + :key => "ec:_default", :value => public_pem }, %{ -- cgit v1.2.1 From 7ca2ca5e87d18cb3d9a3f0568e545e8219cd3d0c Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Sat, 28 Mar 2020 15:52:29 -0700 Subject: Don't advertise unimplemented features Removes the following features from the welcome message: - reshard - partitioned - pluggable-storage-engines - scheduler Although `scheduler` at least will presumably be returned once that feature is complete. --- src/couch/src/couch_server.erl | 6 ------ src/couch_replicator/src/couch_replicator_scheduler.erl | 3 ++- src/mem3/src/mem3_reshard.erl | 5 ----- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index 909e23898..18fa3fe61 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -237,12 +237,6 @@ init([]) -> couch_util:set_mqd_off_heap(?MODULE), couch_util:set_process_priority(?MODULE, high), - % Mark pluggable storage engines as a supported feature - config:enable_feature('pluggable-storage-engines'), - - % Mark partitioned databases as a supported feature - config:enable_feature(partitioned), - % Mark being able to receive documents with an _access property as a supported feature config:enable_feature('access-ready'), diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 53c040e8c..00a352bee 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -225,7 +225,8 @@ update_job_stats(JobId, Stats) -> %% gen_server functions init(_) -> - config:enable_feature('scheduler'), + % Temporarily disable on FDB, as it's not fully implemented yet + % config:enable_feature('scheduler'), EtsOpts = [named_table, {keypos, #job.id}, {read_concurrency, true}, {write_concurrency, true}], ?MODULE = ets:new(?MODULE, EtsOpts), diff --git a/src/mem3/src/mem3_reshard.erl b/src/mem3/src/mem3_reshard.erl index 620b1bc73..234670c34 100644 --- a/src/mem3/src/mem3_reshard.erl +++ b/src/mem3/src/mem3_reshard.erl @@ -213,11 +213,6 @@ reset_state() -> % Gen server functions init(_) -> - % Advertise resharding API feature only if it is not disabled - case is_disabled() of - true -> ok; - false -> config:enable_feature('reshard') - end, couch_log:notice("~p start init()", [?MODULE]), EtsOpts = [named_table, {keypos, #job.id}, {read_concurrency, true}], ?MODULE = ets:new(?MODULE, EtsOpts), -- cgit v1.2.1 From 9b325b75814418b85ffb3642a5115635416f56a8 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 30 Mar 2020 09:58:39 +0200 Subject: change _all_docs to raw collation --- src/fabric/src/fabric2_util.erl | 8 ++------ test/elixir/test/all_docs_test.exs | 3 ++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 46f9abeef..d74ef2718 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -294,13 +294,9 @@ uuid() -> to_hex(crypto:strong_rand_bytes(16)). -encode_all_doc_key(null) -> <<>>; -encode_all_doc_key(true) -> <<>>; -encode_all_doc_key(false) -> <<>>; -encode_all_doc_key(N) when is_number(N) -> <<>>; encode_all_doc_key(B) when is_binary(B) -> B; -encode_all_doc_key(L) when is_list(L) -> <<255>>; -encode_all_doc_key({O}) when is_list(O) -> <<255>>. +encode_all_doc_key(Term) when Term < <<>> -> <<>>; +encode_all_doc_key(_) -> <<255>>. pmap(Fun, Args) -> diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index 9501b3bec..16641aa95 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -319,7 +319,8 @@ defmodule AllDocsTest do resp = Couch.get("/#{db_name}/_all_docs", query: %{:startkey => "[1,2]"}).body rows = resp["rows"] - assert length(rows) === 0 + assert length(rows) === 3 + assert get_ids(resp) == ["a", "m", "z"] resp = Couch.get("/#{db_name}/_all_docs", query: %{:end_key => 0}).body rows = resp["rows"] -- cgit v1.2.1 From d291847c97576c28ed4996ad06e09bb0c905d036 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 30 Mar 2020 11:07:24 +0100 Subject: Remove enhanced alg check This mechanism is replaced by the much stronger tying of verification algorithm to the key directly in the server config. --- rel/overlay/etc/default.ini | 2 -- src/couch/src/couch_httpd_auth.erl | 6 +----- src/jwtf/src/jwtf.erl | 7 +++---- src/jwtf/test/jwtf_tests.erl | 12 +----------- 4 files changed, 5 insertions(+), 22 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 25f1027d2..24f504726 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -143,8 +143,6 @@ max_db_number_for_dbs_info_req = 100 ;[jwt_auth] ; List of claims to validate ; required_claims = exp -; List of algorithms to accept during checks -; allowed_algorithms = HS256 ; ; [jwt_keys] ; Configure at least one key here if using the JWT auth handler. diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 43fb4161c..4f19728e9 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -192,8 +192,7 @@ jwt_authentication_handler(Req) -> case header_value(Req, "Authorization") of "Bearer " ++ Jwt -> RequiredClaims = get_configured_claims(), - AllowedAlgorithms = get_configured_algorithms(), - case jwtf:decode(?l2b(Jwt), [{alg, AllowedAlgorithms} | RequiredClaims], fun jwtf_keystore:get/2) of + case jwtf:decode(?l2b(Jwt), [alg | RequiredClaims], fun jwtf_keystore:get/2) of {ok, {Claims}} -> case lists:keyfind(<<"sub">>, 1, Claims) of false -> throw({unauthorized, <<"Token missing sub claim.">>}); @@ -208,9 +207,6 @@ jwt_authentication_handler(Req) -> _ -> Req end. -get_configured_algorithms() -> - re:split(config:get("jwt_auth", "allowed_algorithms", "HS256"), "\s*,\s*", [{return, binary}]). - get_configured_claims() -> re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}]). diff --git a/src/jwtf/src/jwtf.erl b/src/jwtf/src/jwtf.erl index b558bdc63..098a41d24 100644 --- a/src/jwtf/src/jwtf.erl +++ b/src/jwtf/src/jwtf.erl @@ -158,11 +158,10 @@ validate_alg(Props, Checks) -> case {Required, Alg} of {undefined, _} -> ok; - {Required, undefined} when Required /= undefined -> + {true, undefined} -> throw({bad_request, <<"Missing alg header parameter">>}); - {Required, Alg} when Required == true; is_list(Required) -> - AllowedAlg = if Required == true -> true; true -> lists:member(Alg, Required) end, - case AllowedAlg andalso lists:member(Alg, valid_algorithms()) of + {true, Alg} -> + case lists:member(Alg, valid_algorithms()) of true -> ok; false -> diff --git a/src/jwtf/test/jwtf_tests.erl b/src/jwtf/test/jwtf_tests.erl index e445e5fc9..df3866f23 100644 --- a/src/jwtf/test/jwtf_tests.erl +++ b/src/jwtf/test/jwtf_tests.erl @@ -82,16 +82,6 @@ invalid_alg_test() -> ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, jwtf:decode(Encoded, [alg], nil)). -not_allowed_alg_test() -> - Encoded = encode({[{<<"alg">>, <<"HS256">>}]}, []), - ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, - jwtf:decode(Encoded, [{alg, [<<"RS256">>]}], nil)). - -reject_unknown_alg_test() -> - Encoded = encode({[{<<"alg">>, <<"NOPE">>}]}, []), - ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, - jwtf:decode(Encoded, [{alg, [<<"NOPE">>]}], nil)). - missing_iss_test() -> Encoded = encode(valid_header(), {[]}), @@ -190,7 +180,7 @@ hs256_test() -> "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, KS = fun(<<"HS256">>, <<"123456">>) -> <<"secret">> end, - Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, {alg, [<<"HS256">>]}, kid], + Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], ?assertMatch({ok, _}, catch jwtf:decode(EncodedToken, Checks, KS)). -- cgit v1.2.1 From 1ab4ff362b08a09b2b95c08805c2f5027ffa7b59 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 30 Mar 2020 12:04:38 +0100 Subject: Enhance valid claims checks to detect binaries, etc --- src/couch/src/couch_httpd_auth.erl | 8 +++++++- src/jwtf/src/jwtf.erl | 20 +++++++++++++++++++- src/jwtf/test/jwtf_tests.erl | 14 +++++++++++--- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 4f19728e9..2383be798 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -208,7 +208,13 @@ jwt_authentication_handler(Req) -> end. get_configured_claims() -> - re:split(config:get("jwt_auth", "required_claims", ""), "\s*,\s*", [{return, binary}]). + Claims = config:get("jwt_auth", "required_claims", ""), + case re:split(Claims, "\s*,\s*", [{return, list}]) of + [[]] -> + []; %% if required_claims is the empty string. + List -> + [list_to_existing_atom(C) || C <- List] + end. cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). diff --git a/src/jwtf/src/jwtf.erl b/src/jwtf/src/jwtf.erl index 098a41d24..d7fb2e7d4 100644 --- a/src/jwtf/src/jwtf.erl +++ b/src/jwtf/src/jwtf.erl @@ -123,8 +123,15 @@ validate(Header0, Payload0, Signature, Checks, KS) -> Key = key(Header1, Checks, KS), verify(Alg, Header0, Payload0, Signature, Key). + validate_checks(Checks) when is_list(Checks) -> - UnknownChecks = proplists:get_keys(Checks) -- ?CHECKS, + case {lists:usort(Checks), lists:sort(Checks)} of + {L, L} -> + ok; + {L1, L2} -> + error({duplicate_checks, L2 -- L1}) + end, + {_, UnknownChecks} = lists:partition(fun valid_check/1, Checks), case UnknownChecks of [] -> ok; @@ -132,6 +139,17 @@ validate_checks(Checks) when is_list(Checks) -> error({unknown_checks, UnknownChecks}) end. + +valid_check(Check) when is_atom(Check) -> + lists:member(Check, ?CHECKS); + +valid_check({Check, _}) when is_atom(Check) -> + lists:member(Check, ?CHECKS); + +valid_check(_) -> + false. + + validate_header(Props, Checks) -> validate_typ(Props, Checks), validate_alg(Props, Checks). diff --git a/src/jwtf/test/jwtf_tests.erl b/src/jwtf/test/jwtf_tests.erl index df3866f23..9f232241e 100644 --- a/src/jwtf/test/jwtf_tests.erl +++ b/src/jwtf/test/jwtf_tests.erl @@ -168,9 +168,17 @@ malformed_token_test() -> ?assertEqual({error, {bad_request, <<"Malformed token">>}}, jwtf:decode(<<"a.b.c.d">>, [], nil)). -unknown_check_test() -> - ?assertError({unknown_checks, [bar, foo]}, - jwtf:decode(<<"a.b.c">>, [exp, foo, iss, bar, exp], nil)). +unknown_atom_check_test() -> + ?assertError({unknown_checks, [foo, bar]}, + jwtf:decode(<<"a.b.c">>, [exp, foo, iss, bar], nil)). + +unknown_binary_check_test() -> + ?assertError({unknown_checks, [<<"bar">>]}, + jwtf:decode(<<"a.b.c">>, [exp, iss, <<"bar">>], nil)). + +duplicate_check_test() -> + ?assertError({duplicate_checks, [exp]}, + jwtf:decode(<<"a.b.c">>, [exp, exp], nil)). %% jwt.io generated -- cgit v1.2.1 From 6b6ddf0f257eba27596b42bc8978551b7a53e59a Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 30 Mar 2020 12:42:21 +0100 Subject: Verify all presented claims All claims in the header and payload are verified if present. The required_claims config setting is now separate and only causes CouchDB to reject JWT tokens without those claims. --- rel/overlay/etc/default.ini | 2 +- src/jwtf/src/jwtf.erl | 24 ++++++++++++------------ src/jwtf/test/jwtf_tests.erl | 2 +- test/elixir/test/jwtauth_test.exs | 18 +++++++++++++++++- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 24f504726..6fe2260b4 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -142,7 +142,7 @@ max_db_number_for_dbs_info_req = 100 ;[jwt_auth] ; List of claims to validate -; required_claims = exp +; required_claims = ; ; [jwt_keys] ; Configure at least one key here if using the JWT auth handler. diff --git a/src/jwtf/src/jwtf.erl b/src/jwtf/src/jwtf.erl index d7fb2e7d4..247f2b508 100644 --- a/src/jwtf/src/jwtf.erl +++ b/src/jwtf/src/jwtf.erl @@ -159,11 +159,11 @@ validate_typ(Props, Checks) -> Required = prop(typ, Checks), TYP = prop(<<"typ">>, Props), case {Required, TYP} of - {undefined, _} -> + {undefined, undefined} -> ok; {true, undefined} -> throw({bad_request, <<"Missing typ header parameter">>}); - {true, <<"JWT">>} -> + {_, <<"JWT">>} -> ok; {true, _} -> throw({bad_request, <<"Invalid typ header parameter">>}) @@ -174,11 +174,11 @@ validate_alg(Props, Checks) -> Required = prop(alg, Checks), Alg = prop(<<"alg">>, Props), case {Required, Alg} of - {undefined, _} -> + {undefined, undefined} -> ok; {true, undefined} -> throw({bad_request, <<"Missing alg header parameter">>}); - {true, Alg} -> + {_, Alg} -> case lists:member(Alg, valid_algorithms()) of true -> ok; @@ -202,9 +202,9 @@ validate_iss(Props, Checks) -> ActualISS = prop(<<"iss">>, Props), case {ExpectedISS, ActualISS} of - {undefined, _} -> + {undefined, undefined} -> ok; - {_ISS, undefined} -> + {ISS, undefined} when ISS /= undefined -> throw({bad_request, <<"Missing iss claim">>}); {ISS, ISS} -> ok; @@ -218,11 +218,11 @@ validate_iat(Props, Checks) -> IAT = prop(<<"iat">>, Props), case {Required, IAT} of - {undefined, _} -> + {undefined, undefined} -> ok; {true, undefined} -> throw({bad_request, <<"Missing iat claim">>}); - {true, IAT} when is_integer(IAT) -> + {_, IAT} when is_integer(IAT) -> ok; {true, _} -> throw({bad_request, <<"Invalid iat claim">>}) @@ -234,11 +234,11 @@ validate_nbf(Props, Checks) -> NBF = prop(<<"nbf">>, Props), case {Required, NBF} of - {undefined, _} -> + {undefined, undefined} -> ok; {true, undefined} -> throw({bad_request, <<"Missing nbf claim">>}); - {true, IAT} -> + {_, IAT} -> assert_past(<<"nbf">>, IAT) end. @@ -248,11 +248,11 @@ validate_exp(Props, Checks) -> EXP = prop(<<"exp">>, Props), case {Required, EXP} of - {undefined, _} -> + {undefined, undefined} -> ok; {true, undefined} -> throw({bad_request, <<"Missing exp claim">>}); - {true, EXP} -> + {_, EXP} -> assert_future(<<"exp">>, EXP) end. diff --git a/src/jwtf/test/jwtf_tests.erl b/src/jwtf/test/jwtf_tests.erl index 9f232241e..ba944f7c7 100644 --- a/src/jwtf/test/jwtf_tests.erl +++ b/src/jwtf/test/jwtf_tests.erl @@ -275,7 +275,7 @@ header(Alg) -> claims() -> - EpochSeconds = 1496205841, + EpochSeconds = os:system_time(second), {[ {<<"iat">>, EpochSeconds}, {<<"exp">>, EpochSeconds + 3600} diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index c50225cbd..2fb89c3af 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -103,7 +103,23 @@ defmodule JwtAuthTest do end def test_fun(alg, key) do - {:ok, token} = :jwtf.encode({[{"alg", alg}, {"typ", "JWT"}]}, {[{"sub", "couch@apache.org"}, {"_couchdb.roles", ["testing"]}]}, key) + now = DateTime.to_unix(DateTime.utc_now()) + {:ok, token} = :jwtf.encode( + { + [ + {"alg", alg}, + {"typ", "JWT"} + ] + }, + { + [ + {"nbf", now - 60}, + {"exp", now + 60}, + {"sub", "couch@apache.org"}, + {"_couchdb.roles", ["testing"] + } + ] + }, key) resp = Couch.get("/_session", headers: [authorization: "Bearer #{token}"] -- cgit v1.2.1 From 4dca84e181a8469dbf3e17edc1073da7eb6ab6b2 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 31 Mar 2020 16:25:38 -0500 Subject: Do not copy the #server.lru field to async openers This copy slowed down the `erlang:spawn_link/3` call considerably. Measurements in the wild showed the cost of that `spawn_link/3` going from roughly 8 uS to 800 uS. --- src/couch/src/couch_server.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index 909e23898..b2f8fdead 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -381,10 +381,13 @@ maybe_close_lru_db(#server{lru=Lru}=Server) -> end. open_async(Server, From, DbName, Options) -> + NoLRUServer = Server#server{ + lru = redacted + }, Parent = self(), T0 = os:timestamp(), Opener = spawn_link(fun() -> - Res = open_async_int(Server, DbName, Options), + Res = open_async_int(NoLRUServer, DbName, Options), IsSuccess = case Res of {ok, _} -> true; _ -> false -- cgit v1.2.1 From 24524a442ac8c7c56f8af595526c16023bdfb032 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Apr 2020 15:54:13 +0100 Subject: Switch erlfdb to the couchdb repo at tag v1.0.0 --- rebar.config.script | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/rebar.config.script b/rebar.config.script index d315c75ad..269511d8e 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -150,6 +150,7 @@ DepDescs = [ %% Independent Apps {config, "config", {tag, "2.1.7"}}, {b64url, "b64url", {tag, "1.0.2"}}, +{erlfdb, "erlfdb", {tag, "v1.0.0"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, {khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.4"}}, @@ -170,10 +171,7 @@ DepDescs = [ {meck, "meck", {tag, "0.8.8"}}, {recon, "recon", {tag, "2.5.0"}}, {passage, "passage", {tag, "0.2.6"}}, -{thrift_protocol, "thrift-protocol", {tag, "0.1.5"}}, - -%% TMP - Until this is moved to a proper Apache repo -{erlfdb, "erlfdb", {branch, "master"}} +{thrift_protocol, "thrift-protocol", {tag, "0.1.5"}} ], WithProper = lists:keyfind(with_proper, 1, CouchConfig) == {with_proper, true}, @@ -188,8 +186,6 @@ end, BaseUrl = "https://github.com/apache/", MakeDep = fun - ({erlfdb, _, Version}) -> - {erlfdb, ".*", {git, "https://github.com/cloudant-labs/couchdb-erlfdb", {branch, "master"}}}; ({AppName, {url, Url}, Version}) -> {AppName, ".*", {git, Url, Version}}; ({AppName, {url, Url}, Version, Options}) -> -- cgit v1.2.1 From 42d20da6da3078b069ceee0a836fcb06322ff69f Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Fri, 13 Mar 2020 13:58:49 +0100 Subject: fix: require_valid_user exception logic Co-authored-by: Robert Newson --- src/chttpd/src/chttpd_auth.erl | 19 ++-- src/chttpd/test/eunit/chttpd_auth_tests.erl | 129 ++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 7 deletions(-) create mode 100644 src/chttpd/test/eunit/chttpd_auth_tests.erl diff --git a/src/chttpd/src/chttpd_auth.erl b/src/chttpd/src/chttpd_auth.erl index 1b6d16eb3..ffae78171 100644 --- a/src/chttpd/src/chttpd_auth.erl +++ b/src/chttpd/src/chttpd_auth.erl @@ -58,19 +58,24 @@ jwt_authentication_handler(Req) -> party_mode_handler(#httpd{method='POST', path_parts=[<<"_session">>]} = Req) -> % See #1947 - users should always be able to attempt a login Req#httpd{user_ctx=#user_ctx{}}; +party_mode_handler(#httpd{path_parts=[<<"_up">>]} = Req) -> + RequireValidUser = config:get_boolean("chttpd", "require_valid_user", false), + RequireValidUserExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", false), + require_valid_user(Req, RequireValidUser andalso not RequireValidUserExceptUp); + party_mode_handler(Req) -> RequireValidUser = config:get_boolean("chttpd", "require_valid_user", false), - ExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", true), - case RequireValidUser andalso not ExceptUp of - true -> - throw({unauthorized, <<"Authentication required.">>}); - false -> - case config:get("admins") of + RequireValidUserExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", false), + require_valid_user(Req, RequireValidUser orelse RequireValidUserExceptUp). + +require_valid_user(_Req, true) -> + throw({unauthorized, <<"Authentication required.">>}); +require_valid_user(Req, false) -> + case config:get("admins") of [] -> Req#httpd{user_ctx = ?ADMIN_USER}; _ -> Req#httpd{user_ctx=#user_ctx{}} - end end. handle_session_req(Req) -> diff --git a/src/chttpd/test/eunit/chttpd_auth_tests.erl b/src/chttpd/test/eunit/chttpd_auth_tests.erl new file mode 100644 index 000000000..b4a8eabfb --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_auth_tests.erl @@ -0,0 +1,129 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_auth_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +setup() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + BaseUrl = lists:concat(["http://", Addr, ":", Port]), + BaseUrl. + +teardown(_Url) -> + ok. + + +require_valid_user_exception_test_() -> + { + "_up", + { + setup, + fun chttpd_test_util:start_couch/0, + fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun should_handle_require_valid_user_except_up_on_up_route/1, + fun should_handle_require_valid_user_except_up_on_non_up_routes/1 + ] + } + } + }. + +set_require_user_false() -> + ok = config:set("chttpd", "require_valid_user", "false", _Persist=false). + +set_require_user_true() -> + ok = config:set("chttpd", "require_valid_user", "true", _Persist=false). + +set_require_user_except_for_up_false() -> + ok = config:set("chttpd", "require_valid_user_except_for_up", "false", _Persist=false). + +set_require_user_except_for_up_true() -> + ok = config:set("chttpd", "require_valid_user_except_for_up", "true", _Persist=false). + +should_handle_require_valid_user_except_up_on_up_route(_Url) -> + ?_test(begin + % require_valid_user | require_valid_user_except_up | up needs auth + % 1 F | F | F + % 2 F | T | F + % 3 T | F | T + % 4 T | T | F + + UpRequest = #httpd{path_parts=[<<"_up">>]}, + % we use ?ADMIN_USER here because these tests run under admin party + % so this is equivalent to an unauthenticated request + ExpectAuth = {unauthorized, <<"Authentication required.">>}, + ExpectNoAuth = #httpd{user_ctx=?ADMIN_USER,path_parts=[<<"_up">>]}, + + % 1 + set_require_user_false(), + set_require_user_except_for_up_false(), + Result1 = chttpd_auth:party_mode_handler(UpRequest), + ?assertEqual(ExpectNoAuth, Result1), + + % 2 + set_require_user_false(), + set_require_user_except_for_up_true(), + Result2 = chttpd_auth:party_mode_handler(UpRequest), + ?assertEqual(ExpectNoAuth, Result2), + + % 3 + set_require_user_true(), + set_require_user_except_for_up_false(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(UpRequest)), + + % 4 + set_require_user_true(), + set_require_user_except_for_up_true(), + Result4 = chttpd_auth:party_mode_handler(UpRequest), + ?assertEqual(ExpectNoAuth, Result4) + + end). + +should_handle_require_valid_user_except_up_on_non_up_routes(_Url) -> + ?_test(begin + % require_valid_user | require_valid_user_except_up | everything not _up requires auth + % 5 F | F | F + % 6 F | T | T + % 7 T | F | T + % 8 T | T | T + + NonUpRequest = #httpd{path_parts=[<<"/">>]}, + ExpectAuth = {unauthorized, <<"Authentication required.">>}, + ExpectNoAuth = #httpd{user_ctx=?ADMIN_USER,path_parts=[<<"/">>]}, + % 5 + set_require_user_false(), + set_require_user_except_for_up_false(), + Result5 = chttpd_auth:party_mode_handler(NonUpRequest), + ?assertEqual(ExpectNoAuth, Result5), + + % 6 + set_require_user_false(), + set_require_user_except_for_up_true(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(NonUpRequest)), + + % 7 + set_require_user_true(), + set_require_user_except_for_up_false(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(NonUpRequest)), + + % 8 + set_require_user_true(), + set_require_user_except_for_up_true(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(NonUpRequest)) + end). -- cgit v1.2.1 From f3a3312424c0ca780f7c7a49d1adc871996735db Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 23 Mar 2020 00:39:56 +0100 Subject: Improve test initialization --- test/elixir/test/cookie_auth_test.exs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/elixir/test/cookie_auth_test.exs b/test/elixir/test/cookie_auth_test.exs index b10ee84f1..abc0fd767 100644 --- a/test/elixir/test/cookie_auth_test.exs +++ b/test/elixir/test/cookie_auth_test.exs @@ -34,13 +34,14 @@ defmodule CookieAuthTest do # Create db if not exists Couch.put("/#{@users_db}") - resp = - Couch.get( - "/#{@users_db}/_changes", - query: [feed: "longpoll", timeout: 5000, filter: "_design"] - ) - - assert resp.body + retry_until(fn -> + resp = + Couch.get( + "/#{@users_db}/_changes", + query: [feed: "longpoll", timeout: 5000, filter: "_design"] + ) + length(resp.body["results"]) > 0 + end) on_exit(&tear_down/0) -- cgit v1.2.1 From fb9d40442854aa8bd5c4ed9c7448eaaad456bd87 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Thu, 19 Mar 2020 23:37:04 +0100 Subject: Update Makefile.win to Include locad configs and clean configs in devclean --- Makefile | 2 +- Makefile.win | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7d56dd1ab..60b6e3d07 100644 --- a/Makefile +++ b/Makefile @@ -480,7 +480,7 @@ endif # target: devclean - Remove dev cluster artifacts devclean: @rm -rf dev/lib/*/data - + @rm -rf dev/lib/*/etc ################################################################################ # Misc diff --git a/Makefile.win b/Makefile.win index 30ebe0ee3..92c60bbbb 100644 --- a/Makefile.win +++ b/Makefile.win @@ -200,7 +200,9 @@ python-black-update: .venv/bin/black elixir: export MIX_ENV=integration elixir: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 elixir: elixir-init elixir-check-formatted elixir-credo devclean - @dev\run $(TEST_OPTS) -a adm:pass -n 1 --enable-erlang-views --no-eval 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' + @dev\run $(TEST_OPTS) -a adm:pass -n 1 --enable-erlang-views \ + --locald-config test/elixir/test/config/test-config.ini \ + --no-eval 'mix test --trace --exclude without_quorum_test --exclude with_quorum_test $(EXUNIT_OPTS)' .PHONY: elixir-init elixir-init: MIX_ENV=test @@ -405,6 +407,9 @@ devclean: -@rmdir /s/q dev\lib\node1\data -@rmdir /s/q dev\lib\node2\data -@rmdir /s/q dev\lib\node3\data + -@rmdir /s/q dev\lib\node1\etc + -@rmdir /s/q dev\lib\node2\etc + -@rmdir /s/q dev\lib\node3\etc ################################################################################ -- cgit v1.2.1 From 54a05e43c3098f6d37b12ea8831a8cc11e062391 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Tue, 24 Mar 2020 08:51:33 +0100 Subject: allow to run 'javascript' target with other test targets in the same 'make' process --- Makefile | 4 +++- Makefile.win | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 60b6e3d07..97fc97c85 100644 --- a/Makefile +++ b/Makefile @@ -260,7 +260,9 @@ elixir-credo: elixir-init .PHONY: javascript # target: javascript - Run JavaScript test suites or specific ones defined by suites option javascript: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 -javascript: devclean +javascript: + + @$(MAKE) devclean @mkdir -p share/www/script/test ifeq ($(IN_RELEASE), true) @cp test/javascript/tests/lorem*.txt share/www/script/test/ diff --git a/Makefile.win b/Makefile.win index 92c60bbbb..bdecc7315 100644 --- a/Makefile.win +++ b/Makefile.win @@ -237,7 +237,8 @@ elixir-credo: elixir-init .PHONY: javascript # target: javascript - Run JavaScript test suites or specific ones defined by suites option javascript: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 -javascript: devclean +javascript: + @$(MAKE) devclean -@mkdir share\www\script\test ifeq ($(IN_RELEASE), true) @copy test\javascript\tests\lorem*.txt share\www\script\test -- cgit v1.2.1 From 85f81d88018fe526f1f216d673a9bbc847cbd81c Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 13 Feb 2020 12:23:42 -0800 Subject: Use `couch_rate` application for `couch_view` --- .credo.exs | 1 + .gitignore | 1 + Makefile | 1 + mix.exs | 8 +- mix.lock | 1 + rebar.config.script | 1 + rel/files/eunit.ini | 3 + rel/overlay/etc/default.ini | 4 + src/couch_rate/README.md | 155 +++++++++ src/couch_rate/src/couch_rate.app.src | 24 ++ src/couch_rate/src/couch_rate.erl | 318 +++++++++++++++++ src/couch_rate/src/couch_rate.hrl | 19 + src/couch_rate/src/couch_rate_app.erl | 28 ++ src/couch_rate/src/couch_rate_config.erl | 66 ++++ src/couch_rate/src/couch_rate_ets.erl | 119 +++++++ src/couch_rate/src/couch_rate_limiter.erl | 387 +++++++++++++++++++++ src/couch_rate/src/couch_rate_pd.erl | 90 +++++ src/couch_rate/src/couch_rate_sup.erl | 36 ++ .../test/exunit/couch_rate_config_test.exs | 88 +++++ .../test/exunit/couch_rate_limiter_test.exs | 350 +++++++++++++++++++ src/couch_rate/test/exunit/test_helper.exs | 14 + src/couch_views/README.md | 33 ++ src/couch_views/src/couch_views.app.src | 3 +- src/couch_views/src/couch_views_indexer.erl | 60 ++-- src/couch_views/test/couch_views_indexer_test.erl | 55 ++- .../test/couch_views_trace_index_test.erl | 2 +- 26 files changed, 1839 insertions(+), 28 deletions(-) create mode 100644 src/couch_rate/README.md create mode 100644 src/couch_rate/src/couch_rate.app.src create mode 100644 src/couch_rate/src/couch_rate.erl create mode 100644 src/couch_rate/src/couch_rate.hrl create mode 100644 src/couch_rate/src/couch_rate_app.erl create mode 100644 src/couch_rate/src/couch_rate_config.erl create mode 100644 src/couch_rate/src/couch_rate_ets.erl create mode 100644 src/couch_rate/src/couch_rate_limiter.erl create mode 100644 src/couch_rate/src/couch_rate_pd.erl create mode 100644 src/couch_rate/src/couch_rate_sup.erl create mode 100644 src/couch_rate/test/exunit/couch_rate_config_test.exs create mode 100644 src/couch_rate/test/exunit/couch_rate_limiter_test.exs create mode 100644 src/couch_rate/test/exunit/test_helper.exs diff --git a/.credo.exs b/.credo.exs index bd26f407c..112561b95 100644 --- a/.credo.exs +++ b/.credo.exs @@ -37,6 +37,7 @@ ~r"/src/metrics", ~r"/src/minerl", ~r"/src/parse_trans", + ~r"/src/stream_data", ~r"/src/ssl_verify_fun", ~r"/test/elixir/deps/" ] diff --git a/.gitignore b/.gitignore index bf45d1a4f..955403a98 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,7 @@ src/rebar/ src/recon/ src/smoosh/ src/snappy/ +src/stream_data/ src/ssl_verify_fun/ src/thrift_protocol/ src/triq/ diff --git a/Makefile b/Makefile index 2f5df90b9..b3eb64c99 100644 --- a/Makefile +++ b/Makefile @@ -162,6 +162,7 @@ endif check-fdb: make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs + make exunit tests=src/couch_rate/test/exunit/ .PHONY: eunit # target: eunit - Run EUnit tests, use EUNIT_OPTS to provide custom options diff --git a/mix.exs b/mix.exs index 29c81fa49..480d426b1 100644 --- a/mix.exs +++ b/mix.exs @@ -49,11 +49,14 @@ defmodule CouchDBTest.Mixfile do # Run "mix help compile.app" to learn about applications. def application do [ - extra_applications: [:logger], + extra_applications: extra_applications(Mix.env()), applications: [:httpotion] ] end + defp extra_applications(:test), do: [:logger, :stream_data] + defp extra_applications(_), do: [:logger] + # Specifies which paths to compile per environment. defp elixirc_paths(:test), do: ["test/elixir/lib", "test/elixir/test/support"] defp elixirc_paths(:integration), do: ["test/elixir/lib", "test/elixir/test/support"] @@ -68,7 +71,8 @@ defmodule CouchDBTest.Mixfile do {:jiffy, path: Path.expand("src/jiffy", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, - {:credo, "~> 1.2.0", only: [:dev, :test, :integration], runtime: false} + {:credo, "~> 1.2.0", only: [:dev, :test, :integration], runtime: false}, + {:stream_data, "~> 0.4.3", only: [:dev, :test, :integration], runtime: false} ] end diff --git a/mix.lock b/mix.lock index c03e11f64..7a155c6bb 100644 --- a/mix.lock +++ b/mix.lock @@ -14,5 +14,6 @@ "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"}, "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm", "17ef63abde837ad30680ea7f857dd9e7ced9476cdd7b0394432af4bfc241b960"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.5", "6eaf7ad16cb568bb01753dbbd7a95ff8b91c7979482b95f38443fe2c8852a79b", [:make, :mix, :rebar3], [], "hexpm", "13104d7897e38ed7f044c4de953a6c28597d1c952075eb2e328bc6d6f2bfc496"}, + "stream_data": {:hex, :stream_data, "0.4.3", "62aafd870caff0849a5057a7ec270fad0eb86889f4d433b937d996de99e3db25", [:mix], [], "hexpm", "7dafd5a801f0bc897f74fcd414651632b77ca367a7ae4568778191fc3bf3a19a"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm", "1d1848c40487cdb0b30e8ed975e34e025860c02e419cb615d255849f3427439d"}, } diff --git a/rebar.config.script b/rebar.config.script index 269511d8e..6f9f65c73 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -123,6 +123,7 @@ SubDirs = [ "src/couch_index", "src/couch_mrview", "src/couch_js", + "src/couch_rate", "src/couch_replicator", "src/couch_plugins", "src/couch_pse_tests", diff --git a/rel/files/eunit.ini b/rel/files/eunit.ini index 2b73ab307..20277f288 100644 --- a/rel/files/eunit.ini +++ b/rel/files/eunit.ini @@ -40,3 +40,6 @@ startup_jitter = 0 [fabric] ; disable index auto-updater to avoid interfering with some of the tests index_updater_enabled = false + +[couch_rate.views] +opts = #{budget => 100, target => 500, window => 6000, sensitivity => 200, congested_delay => 1} \ No newline at end of file diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 749cdd27f..fd0aa7763 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -699,3 +699,7 @@ compaction = false ; log every generated trace by including the following: ; ; all = (#{}) -> true + +[couch_rate.views] +limiter = couch_rate_limiter +opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} \ No newline at end of file diff --git a/src/couch_rate/README.md b/src/couch_rate/README.md new file mode 100644 index 000000000..530da1a99 --- /dev/null +++ b/src/couch_rate/README.md @@ -0,0 +1,155 @@ +# Description + +The `couch_rate` application implements a generic rate limiter which can +be used to control batch size and delay between batches. It was initially +designed for background index build to find an optimal batch size to utilize +the FDB transaction up to configured `target` parameter. The application +provides an API to plug custom rate limiting logic when need to. + +# Default rate limit logic + +The `couch_rate_limiter` is the rate limit module used by default. +The module tracks average number of reads and writes over specified +time period. It uses average read/write numbers to calculate an +approximate value for read/write ratio. Then the read/write ratio is +used to convert estimated amount of writes into batch size. + +# Configuration + +## API based usage + +In the simplest use case the only mandatory keys `new/3` expects are: +* `budget` - the initial value for estimated batch size +* `target` - the amount in msec which we try to maintain for batch processing time +* `window` - time interval for contention detector +* `sensitivity` - minimal interval within the `window` + +We choose sane default values for the rest of the parameters. + +* `window_size = window div sensitivity + 1` +* `underload_threshold = round(target * 0.95)` +* `overload_threshold = round(target * 1.05)` +* `delay_threshold = round(target * 1.07)` + +Due to the use of `round` in defaults calculation the `target` cannot be less +than `36` msec. Otherwise some of the thresholds become equal which breaks the +algorithm. + +In the case when you need to specify custom parameters, the following keys +are supported: + +* `window_size` - how many batches to consider in contention detector +* `timer` - this is used for testing to fast forward time `fun() -> current_time_in_ms() end` +* `target` - the amount in msec which we try to maintain for batch processing time +* `underload_threshold` - a threshold bellow which we would try to increase the budget +* `overload_threshold` - a threshold above which we would start decreasing the budget +* `delay_threshold` - a threshold above which we would start introducing delays between batches +* `multiplicative_factor` - determines how fast we are going to decrease budget (must be in (0..1) range) +* `regular_delay` - delay between batches when there is no overload +* `congested_delay` - delay between batches when there is an overload +* `initial_budget` - initial value for budget to start with + +## default.ini based usage + +The users of the `couch_rate` application pass the `ConfigId` parameter. +When calling `couch_rate:new` and `couch_rate:create_if_missing`. +The `couch_rate` application uses this information to construct name of the +configuration section to use to get configuration parameters. The configration +section is constructed using `"couch_rate." ++ ConfigId`. +The parameters are encoded using erlang map syntax. +Limitation of the map parser: + +* Keys must be atoms +* Values are either integers or floats +* We only support positive values in the map +* Configuration object cannot use erlang reserved words in keys: + `after`, `and`, `andalso`, `band`, `begin`, `bnot`, `bor`, + `bsl`, `bsr`, `bxor`, `case`, `catch`, `cond`, `div`, `end` + `fun`, `if`, `let`, `not`, `of`, `or`, `orelse`, `receive` + `rem`, `try`, `when`, `xor` + +The auxilary `couch_rate_config` module implements the following API: + +* `couch_rate_config:from_str/1` - parses a string representation of parameters +* `couch_rate_config:to_str/1` - converts parameters to string (used in testing) + +Here is the example of configuration used in `couch_view` application: + +``` +[couch_rate.views] +limiter = couch_rate_limiter +opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} +``` + +In the `couch_view` application it is used as follows: + +``` +Limiter = couch_rate:create_if_missing({DbName, DDocId}, "views"), +``` + +# API + +The application implements two APIs. Both APIs are supported by `couch_rate` +module. The API variants are: + +* explicit state passing +* state store based approach + +The API is chosen baed on the `StoreModule` argument passed to `new/4`. +Currently we support following values for `StoreModule`: + +* `nil` - this value indicates that explicit state passing would be used +* `couch_rate_ets` - ets based global state store (ets tables are owned by app supervisor) +* `couch_rate_pd` - process dicionary based local state store + +The "explicit state passing" style returns a tuple `{Result :: term(), state()}`. +The result is the same as for state store based API. + + +## State store based APIs of `couch_rate` module. + +All functions can return `{error, Reason :: term()}` in case of errors. +This detail is ommited bellow. + +* `create_if_missing(Id :: id(), Module :: module(), Store :: module(), Options :: map()) -> limiter()` - create new rate limiter instance +* `new(Id :: id(), Module :: module(), Store :: module(), Options :: map()) -> limiter()` - create new rate limiter instance +* `budget(limiter()) -> Budget :: integer().` - get batch size +* `delay(limiter()) -> Delay :: timeout().` - return delay in msec between batches +* `wait(limiter()) -> ok` - block the caller for amount of time returned by `delay/1` +* `in(limiter(), Reads :: integer()) -> limiter()` - notify rate limiter on the amount of reads were actually done (could be less than `budget`) +* `success(limiter(), Writes :: integer()) -> limiter()` - how many writes happen +* `failure(limiter()) -> limiter()` - called instead of `success/2` when failure happen +* `is_congestion(limiter()) -> boolean()` - returns `false` when congestion is detected +* `format(limiter()) -> [{Key :: atom(), Value :: term()}]` - return key value list representing important aspects of the limiter state +* `id(limitter()) -> id()` - returns `id()` of the rate limiter +* `module(limiter()) -> module()` - returns callback module implementing rate limiting logic. +* `state(limiter()) -> state()` - returns internal state of rate limiter. +* `store(limiter()) -> module() | nil` - returns store state backend. + +# Testing + +The test suite is written in Elixir. + +## Running all tests + +``` +make couch && ERL_LIBS=`pwd`/src mix test --trace src/couch_rate/test/exunit/ +``` + +## Running specific test suite + +``` +make couch && ERL_LIBS=`pwd`/src mix test --trace src/couch_rate/test/exunit/couch_rate_limiter_test.exs +``` + +## Running specific test using line number + +``` +make couch && ERL_LIBS=`pwd`/src mix test --trace src/couch_rate/test/exunit/couch_rate_limiter_test.exs:10 +``` + +## Running traces with stats output + +``` +make couch && ERL_LIBS=`pwd`/src EXUNIT_DEBUG=true mix test --trace src/couch_rate/test/exunit/couch_rate_limiter_test.exs +``` \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate.app.src b/src/couch_rate/src/couch_rate.app.src new file mode 100644 index 000000000..ed6de81d6 --- /dev/null +++ b/src/couch_rate/src/couch_rate.app.src @@ -0,0 +1,24 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + {application, couch_rate, [ + {description, "Simple rate limiter"}, + {vsn, git}, + {registered, [ + ]}, + {applications, [ + kernel, + stdlib, + syntax_tools + ]}, + {mod, {couch_rate_app, []}} +]}. diff --git a/src/couch_rate/src/couch_rate.erl b/src/couch_rate/src/couch_rate.erl new file mode 100644 index 000000000..24bbcc2a5 --- /dev/null +++ b/src/couch_rate/src/couch_rate.erl @@ -0,0 +1,318 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate). + +-include("couch_rate.hrl"). + +-export([ + create_if_missing/2, + create_if_missing/3, + create_if_missing/4, + new/2, + new/3, + new/4, + from_map/4, + budget/1, + delay/1, + wait/1, + in/2, + success/2, + failure/1, + is_congestion/1, + min_latency/1, + format/1, + to_map/1, + id/1, + module/1, + state/1, + store/1 +]). + +-define(LIMITER, ?MODULE). + +-type id() :: term(). +-type state() :: term(). +-type store() :: module(). + +-opaque limiter() :: #?LIMITER{}. + +-export_type([ + id/0, + state/0, + limiter/0 +]). + +-spec create_if_missing(id(), string()) -> + couch_rate:limiter() | {error, Reason :: term()}. + +create_if_missing(Id, ConfigId) -> + ?MODULE:create_if_missing(Id, ConfigId, couch_rate_ets). + +-spec create_if_missing(id(), string(), nil | module()) -> + couch_rate:limiter() | {error, Reason :: term()}. + +create_if_missing(Id, ConfigId, StateStore) -> + {Module, Options} = get_config(ConfigId), + ?MODULE:create_if_missing(Id, Module, StateStore, Options). + +-spec create_if_missing(id(), module(), nil | module(), map()) -> + couch_rate:limiter() | {error, Reason :: term()}. + +create_if_missing(Id, Module, nil, Options) -> + #?LIMITER{ + id = Id, + module = Module, + store = nil, + state = Module:new(Id, Options) + }; + +create_if_missing(Id, Module, Store, Options) -> + case Store:create_if_missing(Id, Module:new(Id, Options)) of + {error, _} = Error -> + Error; + State -> + #?LIMITER{ + id = Id, + module = Module, + store = Store, + state = State + } + end. + + +-spec new(id(), string()) -> + couch_rate:limiter() | {error, Reason :: term()}. + +new(Id, ConfigId) -> + ?MODULE:new(Id, ConfigId, couch_rate_ets). + +-spec new(id(), string(), module()) -> + couch_rate:limiter() | {error, Reason :: term()}. + +new(Id, ConfigId, StateStore) -> + {Module, Options} = get_config(ConfigId), + ?MODULE:new(Id, Module, StateStore, Options). + + +-spec new(id(), module(), nil | module(), map()) -> + couch_rate:limiter() | {error, Reason :: term()}. + +new(Id, Module, nil, Options) -> + #?LIMITER{ + id = Id, + module = Module, + store = nil, + state = Module:new(Id, Options) + }; + +new(Id, Module, Store, Options) -> + case Store:new(Id, Module:new(Id, Options)) of + {error, _} = Error -> + Error; + State -> + #?LIMITER{ + id = Id, + module = Module, + store = Store, + state = State + } + end. + + +-spec from_map(id(), module(), store(), map()) -> + couch_rate:limiter() + | {error, Reason :: term()}. + +from_map(Id, Module, nil, Map) -> + #?LIMITER{ + id = Id, + module = Module, + store = nil, + state = Module:from_map(Map) + }; + +from_map(Id, Module, Store, Map) -> + case Store:new(Id, Module:from_map(Map)) of + {error, _} = Error -> + Error; + State -> + #?LIMITER{ + id = Id, + module = Module, + store = Store, + state = State + } + end. + + +-spec update(limiter(), ( + fun( + (id(), state()) -> + {Result :: term(), state()} + | {error, Reason :: term()} + ) + )) -> + Result :: term() + | {Result :: term(), state()} + | {error, Reason :: term()}. + +update(#?LIMITER{store = nil, id = Id, state = State0} = Limiter, Fun) -> + case Fun(Id, State0) of + {error, _Reason} = Error -> + Error; + {Result, State1} -> + {Result, Limiter#?LIMITER{state = State1}} + end; + +update(#?LIMITER{id = Id, store = Store, state = State}, Fun) -> + Store:update(Id, State, Fun). + + +-spec budget(limiter()) -> + Budget :: integer() + | {Budget :: integer(), limiter()} + | {error, term()}. + +budget(#?LIMITER{module = Module} = Limiter) -> + update(Limiter, fun(Id, StateIn) -> + Module:budget(Id, StateIn) + end). + + +-spec delay(limiter()) -> + DelayTime :: integer() + | {DelayTime :: integer(), limiter()} + | {error, term()}. + +delay(#?LIMITER{module = Module} = Limiter) -> + update(Limiter, fun(Id, State) -> + Module:delay(Id, State) + end). + + +-spec wait(limiter()) -> + ok + | {ok, limiter()} + | {error, term()}. + +wait(#?LIMITER{module = Module} = Limiter) -> + update(Limiter, fun(Id, State) -> + Module:wait(Id, State) + end). + + +-spec in(limiter(), integer()) -> + ok + | {ok, limiter()} + | {error, term()}. + +in(#?LIMITER{module = Module} = Limiter, Reads) -> + update(Limiter, fun(Id, State) -> + Module:in(Id, State, Reads) + end). + + +-spec success(limiter(), integer()) -> + ok + | limiter() + | {error, term()}. + +success(#?LIMITER{module = Module} = Limiter, Writes) -> + update(Limiter, fun(Id, State) -> + Module:success(Id, State, Writes) + end). + + +-spec failure(limiter()) -> + ok + | limiter() + | {error, term()}. + +failure(#?LIMITER{module = Module} = Limiter) -> + update(Limiter, fun(Id, State) -> + Module:failure(Id, State) + end). + + +-spec is_congestion(limiter()) -> boolean(). + +is_congestion(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> + Module:is_congestion(Id, State); + +is_congestion(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> + Module:is_congestion(Id, Store:lookup(Id, State)). + + +-spec format(limiter()) -> [{Key :: atom(), Value :: term()}]. + +format(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> + Module:format(Id, State); + +format(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> + Module:format(Id, Store:lookup(Id, State)). + + +-spec to_map(limiter()) -> map(). + +to_map(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> + Module:to_map(Id, State); + +to_map(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> + Module:to_map(Id, Store:lookup(Id, State)). + +-spec min_latency(limiter()) -> pos_integer(). + +min_latency(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> + Module:min_latency(Id, State); + +min_latency(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> + Module:to_map(Id, Store:lookup(Id, State)). + + +-spec id(limiter()) -> module(). + +id(Limiter) -> + Limiter#?LIMITER.id. + + +-spec module(limiter()) -> module(). + +module(Limiter) -> + Limiter#?LIMITER.module. + + +-spec state(limiter()) -> state(). + +state(Limiter) -> + Limiter#?LIMITER.state. + +-spec store(limiter()) -> module() | nil. + +store(Limiter) -> + Limiter#?LIMITER.store. + + +get_config(ConfigId) -> + ConfigSection = "couch_rate." ++ ConfigId, + ModuleStr = config:get(ConfigSection, "limiter", "couch_rate_limiter"), + Module = list_to_existing_atom(ModuleStr), + case config:get(ConfigSection, "opts", undefined) of + undefined -> + {error, #{missing_key => "opts", in => ConfigSection}}; + OptionsStr -> + Options = couch_rate_config:from_str(OptionsStr), + lists:map(fun(Key) -> + maps:is_key(Key, Options) orelse error(#{missing_key => Key, in => Options}) + end, [budget, target, window, sensitivity]), + {Module, Options} + end. diff --git a/src/couch_rate/src/couch_rate.hrl b/src/couch_rate/src/couch_rate.hrl new file mode 100644 index 000000000..d19f7d8e4 --- /dev/null +++ b/src/couch_rate/src/couch_rate.hrl @@ -0,0 +1,19 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-record(couch_rate, + { + id :: couch_rate:id(), + module = couch_rate_limiter :: module(), + store = couch_rate_ets :: module() | nil, + state :: couch_rate:state() + }). diff --git a/src/couch_rate/src/couch_rate_app.erl b/src/couch_rate/src/couch_rate_app.erl new file mode 100644 index 000000000..2bb1621c3 --- /dev/null +++ b/src/couch_rate/src/couch_rate_app.erl @@ -0,0 +1,28 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate_app). + +-behaviour(application). + +-export([ + start/2, + stop/1 +]). + + +start(_StartType, _StartArgs) -> + couch_rate_sup:start_link(). + + +stop(_State) -> + ok. diff --git a/src/couch_rate/src/couch_rate_config.erl b/src/couch_rate/src/couch_rate_config.erl new file mode 100644 index 000000000..709fbc3d3 --- /dev/null +++ b/src/couch_rate/src/couch_rate_config.erl @@ -0,0 +1,66 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate_config). + +% This parser supports only maps where key is atom and value +% is positive float or positive integer. + +-include_lib("syntax_tools/include/merl.hrl"). + +-export([ + from_str/1, + to_str/1 +]). + +from_str(String) -> + parse_map(merl:quote(String)). + + +to_str(Map) when is_map(Map) -> + StringArgs = maps:fold(fun(Key, Val, Acc) -> + Acc ++ [atom_to_list(Key) ++ " => " ++ number_to_list(Val)] + end, [], Map), + "#{" ++ string:join(StringArgs, ", ") ++ "}". + + +number_to_list(Int) when is_integer(Int) -> + integer_to_list(Int); + +number_to_list(Float) when is_float(Float) -> + float_to_list(Float). + + +parse_map(MapAST) -> + erl_syntax:type(MapAST) == map_expr + orelse fail("Only #{field => pos_integer() | float()} syntax is supported"), + %% Parsing map manually, since merl does not support maps + lists:foldl(fun(AST, Bindings) -> + NameAST = erl_syntax:map_field_assoc_name(AST), + erl_syntax:type(NameAST) == atom + orelse fail("Only atoms are supported as field names"), + Name = erl_syntax:atom_value(NameAST), + ValueAST = erl_syntax:map_field_assoc_value(AST), + Value = case erl_syntax:type(ValueAST) of + integer -> + erl_syntax:integer_value(ValueAST); + float -> + erl_syntax:float_value(ValueAST); + _ -> + fail("Only pos_integer() or float() alowed as values") + end, + Bindings#{Name => Value} + end, #{}, erl_syntax:map_expr_fields(MapAST)). + + +fail(Msg) -> + throw({error, Msg}). \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate_ets.erl b/src/couch_rate/src/couch_rate_ets.erl new file mode 100644 index 000000000..edd9d965c --- /dev/null +++ b/src/couch_rate/src/couch_rate_ets.erl @@ -0,0 +1,119 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate_ets). + +-include("couch_rate.hrl"). + +-export([ + create_tables/0, + delete_tables/0, + create_if_missing/2, + new/2, + lookup/2, + update/3 +]). + + +-define(SHARDS_N, 16). + +-type id() :: term(). +-type state() :: term(). +-type result() :: term(). +-type store_state() :: term(). + + +-spec create_if_missing(couch_rate:id(), state()) -> + store_state(). + +create_if_missing(Id, State) -> + Tid = term_to_table(Id), + case ets:lookup(Tid, Id) of + [_ | _] -> ok; + _ -> ets:insert(Tid, {Id, State}) + end, + ok. + + +-spec new(couch_rate:id(), state()) -> + store_state() + | {error, term()}. + +new(Id, State) -> + Tid = term_to_table(Id), + case ets:insert_new(Tid, {Id, State}) of + true -> ok; + false -> {error, #{reason => already_exists, id => Id}} + end. + + +-spec update(id(), store_state(), fun( + (id(), state()) -> {state(), result()} + )) -> + result() + | {error, term()}. + +update(Id, _StoreState, Fun) -> + Tid = term_to_table(Id), + case ets:lookup(Tid, Id) of + [{Id, State0}] -> + case Fun(Id, State0) of + {Result, State1} -> + ets:insert(Tid, {Id, State1}), + Result; + Error -> + Error + end; + _ -> + {error, #{reason => cannot_find, id => Id}} + end. + + +-spec lookup(id(), store_state()) -> + state() + | {error, term()}. + +lookup(Id, _StoreState) -> + Tid = term_to_table(Id), + case ets:lookup(Tid, Id) of + [{Id, State}] -> + State; + _ -> + {error, #{reason => cannot_find, id => Id}} + end. + + +create_tables() -> + Opts = [named_table, public, {read_concurrency, true}], + [ets:new(TableName, Opts) || TableName <- table_names()], + ok. + +delete_tables() -> + [ets:delete(TableName) || TableName <- table_names()], + ok. + + +-spec term_to_table(any()) -> atom(). +term_to_table(Term) -> + PHash = erlang:phash2(Term), + table_name(PHash rem ?SHARDS_N). + + +-dialyzer({no_return, table_names/0}). + +-spec table_names() -> [atom()]. +table_names() -> + [table_name(N) || N <- lists:seq(0, ?SHARDS_N - 1)]. + +-spec table_name(non_neg_integer()) -> atom(). +table_name(Id) when is_integer(Id), Id >= 0 andalso Id < ?SHARDS_N -> + list_to_atom(atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id)). \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate_limiter.erl b/src/couch_rate/src/couch_rate_limiter.erl new file mode 100644 index 000000000..349da8d5a --- /dev/null +++ b/src/couch_rate/src/couch_rate_limiter.erl @@ -0,0 +1,387 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate_limiter). + +%% This module implements an algorithm to control the consumption rate +%% parameters such as: +%% - batch size +%% - delay between batches +%% The components of the algorithm use +%% - [ascending minima algorithm](http://web.archive.org/web/20120805114719/http://home.tiac.net/~cri/2001/slidingmin.html) +%% - "Welford's method" of calculating average + +-export([ + new/2, + from_map/2, + budget/2, + delay/2, + wait/2, + in/3, + success/3, + failure/2, + is_congestion/2, + min_latency/2, + format/2, + to_map/2 +]). + +-type msec() :: non_neg_integer(). + +-define(STATE, ?MODULE). + +%% This is the number below which the math would not work due to round errors +%% In particular the default values for thresholds would be equal +-define(MIN_TARGET, 36). + +-define(record_to_keyval(Name, Record), + lists:zip(record_info(fields, Name), + tl(tuple_to_list(Record)))). + +-define(map_to_record(RecordName, Map), + element(1, lists:foldl(fun(Field, {Record, Idx}) -> + {setelement(Idx, Record, maps:get(Field, Map, element(Idx, Record))), Idx + 1} + end, {#RecordName{}, 2}, record_info(fields, RecordName)))). + + +-define(record_to_map(RecordName, Record), + element(1, lists:foldl(fun(Field, {Map, Idx}) -> + { + maps:put(Field, element(Idx, Record), Map), + Idx + 1 + } + end, {#{}, 2}, record_info(fields, RecordName)))). + +-record(?STATE, { + window_size = 0 :: 0 | pos_integer(), + timer = fun now_msec/0, + size = 1 :: pos_integer(), + epoch = 1 :: pos_integer(), + minimums :: queue:queue() | undefined, + start_ts = undefined, + mean_reads = 0.0, + mean_writes = 0.0, + reads = 0, + writes = 0, + target = 4500, + underload_threshold = 4275, %% target * 0.95 + overload_threshold = 4725, %% target * 1.05 + delay_threshold = 4950, %% target * 1.10 + multiplicative_factor = 0.7, + regular_delay = 100 :: timeout(), + congested_delay = 5000 :: timeout(), + initial_budget = 100, + latency = 0 +}). + +-type state() :: #?STATE{}. + + +-spec new(couch_rate:id(), Opts :: map()) -> state(). + +new(_Id, #{sensitivity := S}) when S =< 0 -> + error("expected SensitivityTimeWindow > 0"); + +new(_Id, #{target := T}) when T < ?MIN_TARGET -> + error("the target is too small"); + +new(_Id, #{budget := B, target := T, window := W, sensitivity := S} = Opts) -> + WinSize = W div S + 1, + validate_arguments(?map_to_record(?STATE, maps:merge(#{ + minimums => queue:new(), + window_size => WinSize, + initial_budget => B, + underload_threshold => round(T * 0.95), + overload_threshold => round(T * 1.05), + delay_threshold => round(T * 1.07) + }, maps:without([budget, window, sensitivity], Opts)))). + + +-spec from_map(couch_rate:id(), map()) -> state(). + +from_map(_Id, Map) -> + ?map_to_record(?STATE, Map). + + +-spec budget(couch_rate:id(), state()) -> + {pos_integer(), state()}. + +budget(Id, #?STATE{} = State) -> + #?STATE{ + reads = R, + writes = W, + mean_writes = MW, + mean_reads = MR, + multiplicative_factor = MultiplicativeFactor, + target = Target, + initial_budget = InitialBudget, + latency = Latency + } = State, + case pattern(Id, State) of + optimal -> + {max(1, round(MR)), State}; + failed -> + %% decrease budget + {max(1, round(R * MultiplicativeFactor)), State}; + overloaded -> + %% decrease budget + {max(1, round(R * MultiplicativeFactor)), State}; + underloaded -> + ReadWriteRatio = min(1, MR / max(1, MW)), + SingleWrite = Latency / W, + EstimatedWrites = floor(Target / SingleWrite), + {max(1, round(ReadWriteRatio * EstimatedWrites)), State}; + init -> + {InitialBudget, State} + end. + +-spec delay(couch_rate:id(), state()) -> + {pos_integer(), state()}. + +delay(Id, #?STATE{} = State) -> + #?STATE{ + regular_delay = RD, + congested_delay = CD + } = State, + case pattern(Id, State) of + failed -> + {CD, State}; + _ -> + {RD, State} + end. + + +-spec wait(couch_rate:id(), state()) -> + ok. + +wait(Id, State) -> + {Delay, _} = delay(Id, State), + timer:sleep(Delay). + + +-spec in(couch_rate:id(), state(), Reads :: pos_integer()) -> + {ok, state()}. + +in(_Id, #?STATE{timer = TimerFun} = State, Reads) -> + {ok, State#?STATE{ + reads = Reads, + start_ts = TimerFun() + }}. + + +-spec success(couch_rate:id(), state(), Writes :: pos_integer()) -> + {ok, state()}. + +success(_Id, #?STATE{start_ts = undefined} = State, _Writes) -> + {ok, State}; + +success(_Id, #?STATE{} = State, Writes) -> + #?STATE{ + start_ts = TS, + timer = TimerFun, + reads = Reads, + mean_reads = MeanReads, + mean_writes = MeanWrites, + window_size = WinSize + } = State, + {ok, update_min(State#?STATE{ + writes = Writes, + mean_writes = average(MeanWrites, WinSize, Writes), + mean_reads = average(MeanReads, WinSize, Reads), + latency = TimerFun() - TS + })}. + + +-spec failure(couch_rate:id(), state()) -> {ok, state()}. + +failure(_Id, #?STATE{start_ts = undefined} = State) -> + {ok, State}; + +failure(_Id, #?STATE{} = State) -> + #?STATE{ + timer = TimerFun, + start_ts = TS + } = State, + {ok, update_min(State#?STATE{ + writes = 0, + latency = TimerFun() - TS + })}. + + +-spec is_congestion(couch_rate:id(), state()) -> boolean(). + +is_congestion(Id, #?STATE{} = State) -> + case pattern(Id, State) of + overloaded -> true; + failed -> true; + _ -> false + end. + + +-spec format(couch_rate:id(), state()) -> [{Key :: atom(), Value :: term()}]. + +format(_Id, #?STATE{minimums = M} = State) -> + Map = ?record_to_map(?STATE, State), + Minimums = lists:map(fun({D, V}) -> + [{value, V}, {death, D}] + end, queue:to_list(M)), + maps:to_list(maps:merge(Map, #{ + minimums => Minimums + })). + + +-spec to_map(couch_rate:id(), state()) -> map(). + +to_map(_Id, #?STATE{} = State) -> + ?record_to_map(?STATE, State). + + +-spec update_min(state()) -> state(). + +update_min(#?STATE{latency = ProcessingDelay} = Q0) -> + Q1 = remove_greater_than(Q0, ProcessingDelay), + Q2 = append(Q1, ProcessingDelay), + maybe_remove_first(Q2). + + +-spec pattern(couch_rate:id(), state()) -> + init + | underloaded + | overloaded + | optimal + | failed. + +pattern(Id, #?STATE{} = State) -> + #?STATE{ + underload_threshold = UnderloadThreshold, + overload_threshold = OverloadThreshold, + writes = W, + mean_writes = MW + } = State, + case min_latency(Id, State) of + MinRollingLatency when MinRollingLatency > OverloadThreshold -> + overloaded; + MinRollingLatency when MinRollingLatency > UnderloadThreshold -> + optimal; + MinRollingLatency when MinRollingLatency > 0 andalso W == 0 -> + failed; + MinRollingLatency when MinRollingLatency == 0 andalso MW == 0.0 -> + init; + _ -> + underloaded + end. + + +-spec min_latency(couch_rate:id(), state()) -> pos_integer() | 0. + +min_latency(_Id, #?STATE{size = 1}) -> + 0; + +min_latency(_Id, #?STATE{minimums = Minimums}) -> + {value, {_, Min}} = head(Minimums), + Min. + + +validate_arguments(#?STATE{timer = TimerFun}) + when not is_function(TimerFun, 0) -> + error("expected `timer` to be an arity 0 function"); + +validate_arguments(#?STATE{window_size = WinSize}) + when WinSize < 1 -> + error("expected `window_size` to be greater than 1"); + +validate_arguments(#?STATE{initial_budget = Budget}) + when Budget < 1 -> + error("expected `initial_budget` to be greater than 1"); + +validate_arguments(#?STATE{overload_threshold = OT, target = T}) + when OT =< T -> + error("expected `overload_threshold` to be greater than `target`"); + +validate_arguments(#?STATE{underload_threshold = UT, target = T}) + when UT >= T -> + error("expected `underload_threshold` to be less than `target`"); + +validate_arguments(#?STATE{delay_threshold = DT, overload_threshold = OT}) + when DT =< OT -> + error("expected `delay_threshold` to be greater than `overload_threshold`"); + +validate_arguments(#?STATE{multiplicative_factor = MF}) + when MF < 0 orelse MF > 1 -> + error("expected `multiplicative_factor` to be in the (0, 1) range"); + +validate_arguments(#?STATE{} = State) -> + State. + + +-spec remove_greater_than(state(), pos_integer()) -> state(). + +remove_greater_than(#?STATE{minimums = Minimums, size = S} = State, Value) -> + case tail(Minimums) of + {value, {_, T}} when Value =< T -> + NewState = State#?STATE{minimums = tail_drop(Minimums), size = S - 1}, + remove_greater_than(NewState, Value); + {value, _} -> + State; + empty -> + State#?STATE{epoch = 1} + end. + + +-spec append(state(), pos_integer()) -> state(). + +append(#?STATE{minimums = Minimums, epoch = E, window_size = S} = State, Value) -> + Death = E + S, + State#?STATE{ + minimums = tail_put(Minimums, {Death, Value}), + epoch = E + 1, + size = S + 1 + }. + + +-spec maybe_remove_first(state()) -> state(). + +maybe_remove_first(#?STATE{minimums = Minimums, epoch = E, size = S} = State) -> + case head(Minimums) of + {value, {E, _V}} -> + State#?STATE{minimums = head_drop(Minimums), size = S - 1}; + _ -> + State + end. + + +% Donald Knuth’s Art of Computer Programming, Vol 2, page 232, 3rd +% Welford method +average(Avg, WindowSize, Value) -> + Delta = Value - Avg, + Avg + Delta / WindowSize. + +%% The helper functions are added because queue module +%% naming conventions are weird +head(Q) -> queue:peek_r(Q). + + +head_drop(Q) -> queue:drop_r(Q). + +tail(Q) -> queue:peek(Q). + + +tail_put(Q, V) -> queue:in_r(V, Q). + + +tail_drop(Q) -> queue:drop(Q). + + +-spec now_msec() -> msec(). +now_msec() -> + {Mega, Sec, Micro} = os:timestamp(), + ((Mega * 1000000) + Sec) * 1000 + Micro div 1000. \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate_pd.erl b/src/couch_rate/src/couch_rate_pd.erl new file mode 100644 index 000000000..5d79f7890 --- /dev/null +++ b/src/couch_rate/src/couch_rate_pd.erl @@ -0,0 +1,90 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate_pd). + +-include("couch_rate.hrl"). + + +-export([ + new/2, + create_if_missing/2, + lookup/2, + update/3 +]). + +-type id() :: term(). +-type state() :: term(). +-type result() :: term(). +-type store_state() :: term(). + +-define(STATE_KEY, couch_rate_state). + + +-spec create_if_missing(couch_rate:id(), state()) -> store_state(). + +create_if_missing(Id, State) -> + case get({?STATE_KEY, Id}) of + undefined -> + put({?STATE_KEY, Id}, State), + ok; + _ -> + ok + end. + + +-spec new(couch_rate:id(), state()) -> + store_state() + | {error, term()}. + +new(Id, State) -> + case get({?STATE_KEY, Id}) of + undefined -> + put({?STATE_KEY, Id}, State), + ok; + _ -> + {error, #{reason => already_exists, id => Id}} + end. + + +-spec lookup(id(), store_state()) -> + state() + | {error, term()}. + +lookup(Id, _StoreState) -> + case get({?STATE_KEY, Id}) of + undefined -> + {error, #{reason => cannot_find, id => Id}}; + State -> + State + end. + + +-spec update(id(), store_state(), fun( + (id(), state()) -> {state(), result()} + )) -> + result() + | {error, term()}. + +update(Id, _StoreState, Fun) -> + case get({?STATE_KEY, Id}) of + undefined -> + {error, #{reason => cannot_find, id => Id}}; + State -> + case Fun(Id, State) of + {Result, State} -> + put({?STATE_KEY, Id}, State), + Result; + Error -> + Error + end + end. diff --git a/src/couch_rate/src/couch_rate_sup.erl b/src/couch_rate/src/couch_rate_sup.erl new file mode 100644 index 000000000..1ce01b644 --- /dev/null +++ b/src/couch_rate/src/couch_rate_sup.erl @@ -0,0 +1,36 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_rate_sup). +-behaviour(supervisor). +-vsn(1). + +-export([ + start_link/0, + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +init([]) -> + couch_rate_ets:create_tables(), + Flags = #{ + strategy => one_for_one, + intensity => 5, + period => 10 + }, + Children = [ + ], + {ok, {Flags, Children}}. \ No newline at end of file diff --git a/src/couch_rate/test/exunit/couch_rate_config_test.exs b/src/couch_rate/test/exunit/couch_rate_config_test.exs new file mode 100644 index 000000000..7db30d272 --- /dev/null +++ b/src/couch_rate/test/exunit/couch_rate_config_test.exs @@ -0,0 +1,88 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +defmodule Couch.Rate.Config.Test do + use ExUnit.Case, async: true + use ExUnitProperties + import StreamData + + @erlang_reserved_words MapSet.new([ + "after", + "and", + "andalso", + "band", + "begin", + "bnot", + "bor", + "bsl", + "bsr", + "bxor", + "case", + "catch", + "cond", + "div", + "end", + "fun", + "if", + "let", + "not", + "of", + "or", + "orelse", + "receive", + "rem", + "try", + "when", + "xor" + ]) + + alias :couch_rate_config, as: RLC + + test "parse valid configuration" do + parsed = RLC.from_str(~S(#{foo => 1, bar => 2.0})) + assert %{foo: 1, bar: 2} == parsed + end + + property "roundtrip" do + check all(options <- valid_config()) do + parsed = RLC.from_str(RLC.to_str(options)) + assert options == parsed + end + end + + defp valid_config() do + map_of( + erlang_atom(), + one_of([ + positive_integer(), + # we only support positive float + float(min: 0.0) + ]) + ) + end + + defp erlang_atom() do + bind(string(:alphanumeric), fn str -> + bind(integer(?a..?z), fn char -> + erlang_atom(str, char) + end) + end) + end + + defp erlang_atom(str, char) do + if MapSet.member?(@erlang_reserved_words, <>) do + String.to_atom(<>) + else + String.to_atom(<>) + end + end +end diff --git a/src/couch_rate/test/exunit/couch_rate_limiter_test.exs b/src/couch_rate/test/exunit/couch_rate_limiter_test.exs new file mode 100644 index 000000000..ff70f793a --- /dev/null +++ b/src/couch_rate/test/exunit/couch_rate_limiter_test.exs @@ -0,0 +1,350 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +defmodule Couch.Rate.Limiter.Test do + use ExUnit.Case, async: true + + @transaction_timeout 5_000 + + alias :couch_rate, as: RL + + describe "Stats :" do + @scenario %{rw_ratio: 1 / 1, target: 400, write_time: 100} + test "#{__ENV__.line} : #{inspect(@scenario)} (underloaded)" do + {rate_limiter, measurments} = simulate(@scenario, 1000) + stats = statistics(measurments) + maybe_debug(rate_limiter, measurments, stats) + + assert stats.wait_time.p90 == 100, + "expected no artificial delays for more than 90% of batches" + + budget = stats.budget + + assert floor(budget.p95) in 1..7, + "expected budget to converge into the 1..7 range (got #{budget.p95})" + + reads = stats.mean_reads + + assert floor(reads.p95) in 1..7, + "expected mean_read to converge into the 1..7 range (got #{reads.p95})" + + writes = stats.mean_writes + assert round(writes.p99) in 2..6 + "expected mean_writes to converge into the 2..6 range (got #{writes.p95})" + + assert stats.latency.p95 < @transaction_timeout, + "expected latency for 95% batches under @transaction_timout" + + found_after = initial_search_speed(measurments) + + assert found_after < 5, + "expected to find acceptable budget in less than 5 iterations (got: #{ + found_after + })" + + measurments + |> initial_search() + |> Enum.reduce(101, fn row, prev_budget -> + assert row.budget < prev_budget, + "expected to reduce budget while we fail" + + row.budget + end) + end + + @scenario %{rw_ratio: 1 / 8, target: 3900, write_time: 100} + test "#{__ENV__.line} : #{inspect(@scenario)} (optimal)" do + {rate_limiter, measurments} = simulate(@scenario, 1000) + stats = statistics(measurments) + maybe_debug(rate_limiter, measurments, stats) + + assert stats.wait_time.p90 == 100, + "expected no artificial delays for more than 90% of batches" + + budget = stats.budget + + assert floor(budget.p95) in 4..7, + "expected budget to converge into the 4..7 range (got #{budget.p95})" + + reads = stats.mean_reads + + assert floor(reads.p95) in 4..7, + "expected mean_read to converge into the 4..7 range (got #{reads.p95})" + + writes = stats.mean_writes + assert round(writes.p99) in 39..41 + "expected mean_writes to converge into the 39..41 range (got #{writes.p95})" + + assert stats.latency.p95 < @transaction_timeout, + "expected latency for 95% of batches under @transaction_timout" + + found_after = initial_search_speed(measurments) + + assert found_after < 10, + "expected to find acceptable budget in less than 10 iterations (got: #{ + found_after + })" + + measurments + |> initial_search() + |> Enum.reduce(101, fn row, prev_budget -> + assert row.budget < prev_budget, + "expected to reduce budget while we fail" + + row.budget + end) + end + + @scenario %{rw_ratio: 1 / 20, target: 3900, write_time: 100} + test "#{__ENV__.line} : #{inspect(@scenario)} (overloaded)" do + # This is a worst case scenario due to big variability of wait_time and + # big value read/write ratio + {rate_limiter, measurments} = simulate(@scenario, 1000) + stats = statistics(measurments) + maybe_debug(rate_limiter, measurments, stats) + + assert stats.wait_time.p90 == 100, + "expected no artificial delays for more than 90% of batches" + + budget = stats.budget + assert floor(budget.p95) in 1..4 + "expected budget to converge into the 1..4 range (got #{budget.p95})" + reads = stats.mean_reads + assert floor(reads.p95) in 1..4 + "expected mean_read to converge into the 1..4 range (got #{reads.p95})" + writes = stats.mean_writes + assert round(writes.p99) in 39..41 + "expected mean_writes to converge into the 39..41 range (got #{writes.p95})" + + assert stats.latency.p90 < @transaction_timeout, + "expected latency for 90% of batches under @transaction_timout" + + found_after = initial_search_speed(measurments) + + assert found_after < 16, + "expected to find acceptable budget in less than 16 iterations (got: #{ + found_after + })" + + measurments + |> initial_search() + |> Enum.reduce(101, fn row, prev_budget -> + assert row.budget < prev_budget, + "expected to reduce budget while we fail" + + row.budget + end) + end + end + + defp simulate(scenario, iterations) do + :couch_rate_ets.create_tables() + + limiter = + RL.new(:limiter_id, :couch_rate_limiter, nil, %{ + budget: 100, + target: scenario.target, + # average over 20 last measurments + window: scenario.write_time * 20, + sensitivity: scenario.write_time, + timer: &timer/0 + }) + + result = + Enum.reduce(0..iterations, {limiter, []}, fn _idx, {limiter, stats} -> + {budget, limiter} = step(limiter, scenario.rw_ratio, scenario.write_time) + {limiter, update_measurments(limiter, stats, budget)} + end) + + :couch_rate_ets.delete_tables() + result + end + + defp step(limiter, read_write_ratio, write_time) do + {reads, limiter} = RL.budget(limiter) + writes = round(reads / read_write_ratio) + {delay, limiter} = RL.delay(limiter) + sleep(delay) + data_before = RL.to_map(limiter) + {:ok, limiter} = RL.in(limiter, reads) + data_after = RL.to_map(limiter) + + assert data_after.size <= data_after.window_size + 1, + "The number of elements in minimums container shouldn't grow (got: #{ + data_after.size + })" + + if data_before.writes == 0 and + data_after.writes == 0 and + data_before.reads != 0 do + assert data_before.reads > data_after.reads, + "expected to reduce number of reads while transaction fails" + end + + total_write_time = + 0..writes + |> Enum.reduce_while(0, fn _, acc -> + write_time = :rand.normal(write_time, write_time * 0.25) + + if acc < @transaction_timeout do + {:cont, acc + write_time} + else + {:halt, acc} + end + end) + + sleep(total_write_time) + + if total_write_time < @transaction_timeout do + {:ok, limiter} = RL.success(limiter, writes) + {reads, limiter} + else + {:ok, limiter} = RL.failure(limiter) + {reads, limiter} + end + end + + defp update_measurments(limiter, stats, budget) do + data = RL.to_map(limiter) + {wait_time, _} = RL.delay(limiter) + + stats ++ + [ + %{ + budget: budget, + slack: data.target - data.latency, + rw_ratio: data.mean_reads / max(1, data.mean_writes), + latency: data.latency, + new_budget: budget, + minimum_latency: RL.min_latency(limiter), + wait_time: wait_time, + elements_in_min_queue: data.size, + mean_reads: data.mean_reads, + mean_writes: data.mean_writes, + total_reads: data.reads, + total_writes: data.writes + } + ] + end + + defp timer() do + now = Process.get(:time, 1) + Process.put(:time, now + 1) + now + end + + defp sleep(sleep_time_in_ms) do + now = timer() + Process.put(:time, now + sleep_time_in_ms - 1) + end + + defp format_table([first | _] = rows) do + spec = + first + |> Map.keys() + |> Enum.map(fn h -> {h, String.length(to_str(h))} end) + + header = first |> Map.keys() |> Enum.map(&to_str/1) |> Enum.join(" , ") + + lines = + Enum.map(rows, fn row -> + fields = + Enum.map(spec, fn {field, size} -> + String.pad_trailing("#{to_str(Map.get(row, field))}", size) + end) + + Enum.join(fields, " , ") + end) + + Enum.join([header | lines], "\n") + end + + defp initial_search_speed(measurments) do + length(initial_search(measurments)) + end + + defp initial_search(measurments) do + Enum.reduce_while(measurments, [], fn row, acc -> + if row.total_writes == 0 do + {:cont, acc ++ [row]} + else + {:halt, acc} + end + end) + end + + defp statistics(measurments) do + data = + Enum.reduce(measurments, %{}, fn row, acc -> + Enum.reduce(row, acc, fn {key, value}, acc -> + Map.update(acc, key, [], fn metric -> + metric ++ [value] + end) + end) + end) + + Enum.reduce(data, %{}, fn {key, values}, acc -> + stats = Enum.into(:bear.get_statistics(values), %{}) + {percentile, stats} = Map.pop(stats, :percentile) + + stats = + Enum.reduce(percentile, stats, fn {key, value}, acc -> + Map.put(acc, String.to_atom("p#{to_str(key)}"), value) + end) + + Map.put(acc, key, stats) + end) + end + + defp format_stats(stats) do + rows = + Enum.map(stats, fn {key, values} -> + values + |> Enum.into(%{}) + |> Map.put(:metric, key) + |> Map.delete(:histogram) + end) + + format_table(rows) + end + + defp to_str(int) when is_integer(int) do + "#{int}" + end + + defp to_str(float) when is_float(float) do + "#{Float.to_string(Float.round(float, 2))}" + end + + defp to_str(atom) when is_atom(atom) do + Atom.to_string(atom) + end + + defp to_str(string) when is_binary(string) do + string + end + + defp to_map(rate_limiter) do + RL.to_map(rate_limiter) + end + + defp maybe_debug(rate_limiter, measurments, stats) do + if System.fetch_env("EXUNIT_DEBUG") != :error do + IO.puts("") + IO.puts("rate_limiter: #{inspect(to_map(rate_limiter))}") + IO.puts("measurments: #{inspect(measurments)}") + IO.puts("stats: #{inspect(stats)}") + + IO.puts("\n" <> format_table(measurments) <> "\n" <> format_stats(stats)) + end + end +end diff --git a/src/couch_rate/test/exunit/test_helper.exs b/src/couch_rate/test/exunit/test_helper.exs new file mode 100644 index 000000000..9b9d6ef94 --- /dev/null +++ b/src/couch_rate/test/exunit/test_helper.exs @@ -0,0 +1,14 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +ExUnit.configure(formatters: [JUnitFormatter, ExUnit.CLIFormatter]) +ExUnit.start() diff --git a/src/couch_views/README.md b/src/couch_views/README.md index 49cd82b98..5647913f0 100644 --- a/src/couch_views/README.md +++ b/src/couch_views/README.md @@ -13,3 +13,36 @@ Code layout: * `couch_views_fdb` - Maps view operations to FoundationDB logic. * `couch_views_encoding` - Encodes view keys that are byte comparable following CouchDB view sort order. * `couch_views_server` - Spawns `couch_views_indexer` workers to handle index update jobs. + +# Configuration + +## Configuring rate limiter + +Here is the example of configuration used in `couch_view` application: + +``` +[couch_rate.views] +limiter = couch_rate_limiter +opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} +``` + +Supported fields in `opts`: + +* `budget` - the initial value for estimated batch size +* `target` - the amount in msec which we try to maintain for batch processing time +* `window` - time interval for contention detector +* `sensitivity` - minimal interval within the `window` + +Unsupported fields in `opts` (if you really know what you are doing): + +* `window_size` - how many batches to consider in contention detector +* `timer` - this is used for testing to fast forward time `fun() -> current_time_in_ms() end` +* `target` - the amount in msec which we try to maintain for batch processing time +* `underload_threshold` - a threshold below which we would try to increase the budget +* `overload_threshold` - a threshold above which we would start decreasing the budget +* `delay_threshold` - a threshold above which we would start introducing delays between batches +* `multiplicative_factor` - determines how fast we are going to decrease budget (must be in (0..1) range) +* `regular_delay` - delay between batches when there is no overload +* `congested_delay` - delay between batches when there is an overload +* `initial_budget` - initial value for budget to start with + diff --git a/src/couch_views/src/couch_views.app.src b/src/couch_views/src/couch_views.app.src index 0d666affd..b704c9745 100644 --- a/src/couch_views/src/couch_views.app.src +++ b/src/couch_views/src/couch_views.app.src @@ -27,6 +27,7 @@ couch_stats, fabric, couch_jobs, - couch_eval + couch_eval, + couch_rate ]} ]}. diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index b41d0679b..0127bacec 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -72,6 +72,8 @@ init() -> fail_job(Job, Data, sig_changed, "Design document was modified") end, + Limiter = couch_rate:create_if_missing({DbName, DDocId}, "views"), + State = #{ tx_db => undefined, db_uuid => DbUUID, @@ -81,7 +83,7 @@ init() -> job => Job, job_data => Data, count => 0, - limit => num_changes(), + limiter => Limiter, doc_acc => [], design_opts => Mrst#mrst.design_opts }, @@ -94,6 +96,7 @@ init() -> error:database_does_not_exist -> fail_job(Job, Data, db_deleted, "Database was deleted"); Error:Reason -> + couch_rate:failure(Limiter), NewRetry = Retries + 1, RetryLimit = retry_limit(), @@ -152,7 +155,25 @@ add_error(Error, Reason, Data) -> update(#{} = Db, Mrst0, State0) -> - {Mrst2, State4} = fabric2_fdb:transactional(Db, fun(TxDb) -> + Limiter = maps:get(limiter, State0), + case couch_rate:budget(Limiter) of + 0 -> + couch_rate:wait(Limiter), + update(Db, Mrst0, State0); + Limit -> + {Mrst1, State1} = do_update(Db, Mrst0, State0#{limit => Limit, limiter => Limiter}), + case State1 of + finished -> + couch_eval:release_map_context(Mrst1#mrst.qserver); + _ -> + couch_rate:wait(Limiter), + update(Db, Mrst1, State1) + end + end. + + +do_update(Db, Mrst0, State0) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> % In the first iteration of update we need % to populate our db and view sequences State1 = case State0 of @@ -174,14 +195,18 @@ update(#{} = Db, Mrst0, State0) -> #{ count := Count, - limit := Limit, doc_acc := DocAcc, - last_seq := LastSeq + last_seq := LastSeq, + limit := Limit, + limiter := Limiter } = State2, - DocAcc1 = fetch_docs(TxDb, DocAcc), + couch_rate:in(Limiter, Count), + {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), - write_docs(TxDb, Mrst1, MappedDocs, State2), + WrittenDocs = write_docs(TxDb, Mrst1, MappedDocs, State2), + + couch_rate:success(Limiter, WrittenDocs), case Count < Limit of true -> @@ -196,14 +221,7 @@ update(#{} = Db, Mrst0, State0) -> view_seq := LastSeq }} end - end), - - case State4 of - finished -> - couch_eval:release_map_context(Mrst2#mrst.qserver); - _ -> - update(Db, Mrst2, State4) - end. + end). fold_changes(State) -> @@ -304,12 +322,14 @@ write_docs(TxDb, Mrst, Docs, State) -> KeyLimit = key_size_limit(), ValLimit = value_size_limit(), - lists:foreach(fun(Doc0) -> + DocsNumber = lists:foldl(fun(Doc0, N) -> Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), - couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1) - end, Docs), + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1), + N + 1 + end, 0, Docs), - couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). + couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq), + DocsNumber. fetch_docs(Db, Changes) -> @@ -472,10 +492,6 @@ fail_job(Job, Data, Error, Reason) -> exit(normal). -num_changes() -> - config:get_integer("couch_views", "change_limit", 100). - - retry_limit() -> config:get_integer("couch_views", "retry_limit", 3). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 8f8f3c5cb..43b58284d 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -50,7 +50,8 @@ indexer_test_() -> ?TDEF_FE(handle_size_value_limits), ?TDEF_FE(index_autoupdater_callback), ?TDEF_FE(handle_db_recreated_when_running), - ?TDEF_FE(handle_db_recreated_after_finished) + ?TDEF_FE(handle_db_recreated_after_finished), + ?TDEF_FE(index_budget_is_changing) ] } } @@ -375,6 +376,55 @@ index_autoupdater_callback(Db) -> ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DbSeq)). +index_budget_is_changing(Db) -> + ok = meck:new(couch_rate, [passthrough]), + ok = meck:expect(couch_rate, budget, fun(State) -> + meck:passthrough([State]) + end), + + LimiterOpts = #{ + budget => 100, + sensitivity => 500, + target => 500, + timer => fun timer/0, + window => 2000 + }, + + ok = meck:expect(couch_rate, create_if_missing, fun(Id, Module, Store, _Options) -> + meck:passthrough([Id, Module, Store, LimiterOpts]) + end), + + ok = meck:expect(couch_rate, wait, fun(State) -> + Delay = couch_rate:delay(State), + put(time, timer() + Delay - 1) + end), + + DDoc = create_ddoc(), + Docs = lists:map(fun doc/1, lists:seq(1, 200)), + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, _Out} = couch_views:query( + Db, + DDoc, + <<"map_fun2">>, + fun fold_fun/2, + [], + #mrargs{} + ), + ?assert(length(lists:usort(budget_history())) > 1). + + +timer() -> + get(time) == undefined andalso put(time, 1), + Now = get(time), + put(time, Now + 1), + Now. + + +budget_history() -> + [Result || {_Pid, {couch_rate, budget, _}, Result} <- meck:history(couch_rate)]. + handle_db_recreated_when_running(Db) -> DbName = fabric2_db:name(Db), @@ -386,7 +436,8 @@ handle_db_recreated_when_running(Db) -> % To intercept job building while it is running ensure updates happen one % row at a time. - config:set("couch_views", "change_limit", "1", false), + ok = meck:new(couch_rate, [passthrough]), + ok = meck:expect(couch_rate, budget, ['_'], meck:val(1)), meck_intercept_job_update(self()), diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl index c4f76d897..f8a5ce535 100644 --- a/src/couch_views/test/couch_views_trace_index_test.erl +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -51,7 +51,7 @@ indexer_test_() -> setup() -> - test_util:start_couch([fabric, couch_js]). + test_util:start_couch([fabric, couch_js, couch_rate]). cleanup(Ctx) -> -- cgit v1.2.1 From 2ed662e4e06dd078f26116a7cb5a2d4eb28781fe Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Thu, 26 Mar 2020 23:27:35 +0100 Subject: Port view_offset.js to elixir test suite --- test/elixir/README.md | 2 +- test/elixir/lib/couch/db_test.ex | 26 ++++++++- test/elixir/test/view_offsets_test.exs | 100 +++++++++++++++++++++++++++++++++ test/javascript/tests/view_offsets.js | 2 + 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/view_offsets_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 453614700..0bd69660b 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -107,7 +107,7 @@ X means done, - means partially - [ ] Port view_multi_key_all_docs.js - [ ] Port view_multi_key_design.js - [ ] Port view_multi_key_temp.js - - [ ] Port view_offsets.js + - [X] Port view_offsets.js - [X] Port view_pagination.js - [ ] Port view_sandboxing.js - [ ] Port view_update_seq.js diff --git a/test/elixir/lib/couch/db_test.ex b/test/elixir/lib/couch/db_test.ex index 47a067652..e3f32f839 100644 --- a/test/elixir/lib/couch/db_test.ex +++ b/test/elixir/lib/couch/db_test.ex @@ -192,6 +192,13 @@ defmodule Couch.DBTest do resp.body end + def save(db_name, document) do + resp = Couch.put("/#{db_name}/#{document["_id"]}", body: document) + assert resp.status_code in [201, 202] + assert resp.body["ok"] + Map.put(document, "_rev", resp.body["rev"]) + end + def bulk_save(db_name, docs) do resp = Couch.post( @@ -271,6 +278,24 @@ defmodule Couch.DBTest do resp.body end + def view(db_name, view_name, options \\ nil, keys \\ nil) do + [view_root, view_name] = String.split(view_name, "/") + + resp = + case keys do + nil -> + Couch.get("/#{db_name}/_design/#{view_root}/_view/#{view_name}", query: options) + + _ -> + Couch.post("/#{db_name}/_design/#{view_root}/_view/#{view_name}", + body: %{"keys" => keys} + ) + end + + assert resp.status_code in [200, 201] + resp + end + def sample_doc_foo do %{ _id: "foo", @@ -300,7 +325,6 @@ defmodule Couch.DBTest do end end - def request_stats(path_steps, is_test) do path = List.foldl( diff --git a/test/elixir/test/view_offsets_test.exs b/test/elixir/test/view_offsets_test.exs new file mode 100644 index 000000000..20aa1ca9d --- /dev/null +++ b/test/elixir/test/view_offsets_test.exs @@ -0,0 +1,100 @@ +defmodule ViewOffsetTest do + use CouchTestCase + + @moduletag :view_offsets + + @moduledoc """ + Tests about view offsets. + This is a port of the view_offsets.js javascript test suite. + """ + + @docs [ + %{"_id" => "a1", "letter" => "a", "number" => 1, "foo" => "bar"}, + %{"_id" => "a2", "letter" => "a", "number" => 2, "foo" => "bar"}, + %{"_id" => "a3", "letter" => "a", "number" => 3, "foo" => "bar"}, + %{"_id" => "b1", "letter" => "b", "number" => 1, "foo" => "bar"}, + %{"_id" => "b2", "letter" => "b", "number" => 2, "foo" => "bar"}, + %{"_id" => "b3", "letter" => "b", "number" => 3, "foo" => "bar"}, + %{"_id" => "b4", "letter" => "b", "number" => 4, "foo" => "bar"}, + %{"_id" => "b5", "letter" => "b", "number" => 5, "foo" => "bar"}, + %{"_id" => "c1", "letter" => "c", "number" => 1, "foo" => "bar"}, + %{"_id" => "c2", "letter" => "c", "number" => 2, "foo" => "bar"} + ] + + @design_doc %{ + "_id" => "_design/test", + "views" => %{ + "offset" => %{ + "map" => "function(doc) { emit([doc.letter, doc.number], doc); }" + } + } + } + + @tag :with_db + test "basic view offsets", context do + db_name = context[:db_name] + save(db_name, @design_doc) + bulk_save(db_name, @docs) + + [ + [["c", 2], 0], + [["c", 1], 1], + [["b", 5], 2], + [["b", 4], 3], + [["b", 3], 4], + [["b", 2], 5], + [["b", 1], 6], + [["a", 3], 7], + [["a", 2], 8], + [["a", 1], 9] + ] + |> Enum.each(fn [start_key, offset] -> + result = + view(db_name, "test/offset", %{ + "startkey" => :jiffy.encode(start_key), + "descending" => true + }) + + assert result.body["offset"] === offset + end) + end + + test "repeated view offsets" do + 0..14 |> Enum.each(fn _ -> repeated_view_offset_test_fun end) + end + + def repeated_view_offset_test_fun do + db_name = random_db_name() + create_db(db_name) + + save(db_name, @design_doc) + bulk_save(db_name, @docs) + + first_response = + view(db_name, "test/offset", %{ + "startkey" => :jiffy.encode(["b", 4]), + "startkey_docid" => "b4", + "endkey" => :jiffy.encode(["b"]), + "descending" => true, + "limit" => 2, + "skip" => 1 + }) + + second_response = + view(db_name, "test/offset", %{ + "startkey" => :jiffy.encode(["c", 3]) + }) + + third_response = + view(db_name, "test/offset", %{ + "startkey" => :jiffy.encode(["b", 6]), + "endkey" => :jiffy.encode(["b", 7]) + }) + + assert first_response.body["offset"] === 4 + assert second_response.body["offset"] === length(@docs) + assert third_response.body["offset"] === 8 + + delete_db(db_name) + end +end diff --git a/test/javascript/tests/view_offsets.js b/test/javascript/tests/view_offsets.js index 8b39cc247..179c96360 100644 --- a/test/javascript/tests/view_offsets.js +++ b/test/javascript/tests/view_offsets.js @@ -10,6 +10,8 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; + couchTests.view_offsets = function(debug) { if (debug) debugger; -- cgit v1.2.1 From c22d0075df034d7b0bf501ae28a85713233c8f1e Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 18 Mar 2020 09:39:50 +0200 Subject: fix all_docs call to return row --- src/chttpd/src/chttpd_db.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index dea992c23..174d46487 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -985,7 +985,8 @@ apply_args_to_keylist(Args, Keys0) -> view_cb({row, Row}, {iter, Db, Args, VAcc}) -> NewRow = case lists:keymember(doc, 1, Row) of true -> - chttpd_stats:incr_reads(); + chttpd_stats:incr_reads(), + Row; false when Args#mrargs.include_docs -> {id, DocId} = lists:keyfind(id, 1, Row), chttpd_stats:incr_reads(), -- cgit v1.2.1 From cc2d2320c3acac05e76b3ce67dc42482e29aeaad Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 18 Mar 2020 09:40:24 +0200 Subject: move all_doc view options to fabric2_util --- src/chttpd/src/chttpd_db.erl | 30 +----------------------------- src/fabric/src/fabric2_util.erl | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 174d46487..e9b33f001 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -867,7 +867,7 @@ all_docs_view(Req, Db, Keys, OP) -> send_all_docs(Db, #mrargs{keys = undefined} = Args, VAcc0) -> - Opts0 = all_docs_view_opts(Args), + Opts0 = fabric2_util:all_docs_view_opts(Args), Opts = Opts0 ++ [{restart_tx, true}], NS = couch_util:get_value(namespace, Opts), FoldFun = case NS of @@ -939,34 +939,6 @@ send_all_docs_keys(Db, #mrargs{} = Args, VAcc0) -> end, VAcc1, Keys). -all_docs_view_opts(Args) -> - NS = couch_util:get_value(namespace, Args#mrargs.extra), - StartKey = case Args#mrargs.start_key of - undefined -> Args#mrargs.start_key_docid; - SKey -> SKey - end, - EndKey = case Args#mrargs.end_key of - undefined -> Args#mrargs.end_key_docid; - EKey -> EKey - end, - StartKeyOpts = case StartKey of - undefined -> []; - _ -> [{start_key, fabric2_util:encode_all_doc_key(StartKey)}] - end, - EndKeyOpts = case {EndKey, Args#mrargs.inclusive_end} of - {undefined, _} -> []; - {_, false} -> [{end_key_gt, fabric2_util:encode_all_doc_key(EndKey)}]; - {_, true} -> [{end_key, fabric2_util:encode_all_doc_key(EndKey)}] - end, - [ - {dir, Args#mrargs.direction}, - {limit, Args#mrargs.limit}, - {skip, Args#mrargs.skip}, - {update_seq, Args#mrargs.update_seq}, - {namespace, NS} - ] ++ StartKeyOpts ++ EndKeyOpts. - - apply_args_to_keylist(Args, Keys0) -> Keys1 = case Args#mrargs.direction of fwd -> Keys0; diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index d74ef2718..97bfedc2c 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -38,6 +38,7 @@ uuid/0, encode_all_doc_key/1, + all_docs_view_opts/1, pmap/2, pmap/3 @@ -45,6 +46,7 @@ -include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). revinfo_to_revs(RevInfo) -> @@ -299,6 +301,42 @@ encode_all_doc_key(Term) when Term < <<>> -> <<>>; encode_all_doc_key(_) -> <<255>>. +all_docs_view_opts(#mrargs{} = Args) -> + NS = couch_util:get_value(namespace, Args#mrargs.extra), + StartKey = case Args#mrargs.start_key of + undefined -> Args#mrargs.start_key_docid; + SKey -> SKey + end, + EndKey = case Args#mrargs.end_key of + undefined -> Args#mrargs.end_key_docid; + EKey -> EKey + end, + StartKeyOpts = case StartKey of + undefined -> []; + _ -> [{start_key, encode_all_doc_key(StartKey)}] + end, + EndKeyOpts = case {EndKey, Args#mrargs.inclusive_end} of + {undefined, _} -> []; + {_, false} -> [{end_key_gt, encode_all_doc_key(EndKey)}]; + {_, true} -> [{end_key, encode_all_doc_key(EndKey)}] + end, + + DocOpts = case Args#mrargs.conflicts of + true -> [conflicts | Args#mrargs.doc_options]; + _ -> Args#mrargs.doc_options + end, + + [ + {dir, Args#mrargs.direction}, + {limit, Args#mrargs.limit}, + {skip, Args#mrargs.skip}, + {update_seq, Args#mrargs.update_seq}, + {namespace, NS}, + {include_docs, Args#mrargs.include_docs}, + {doc_opts, DocOpts} + ] ++ StartKeyOpts ++ EndKeyOpts. + + pmap(Fun, Args) -> pmap(Fun, Args, []). -- cgit v1.2.1 From a8b930d7d94316b86243034b8faa537cf8486661 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 10 Mar 2020 18:30:44 +0200 Subject: add include_docs option to fold_docs --- src/fabric/src/fabric2_db.erl | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index ca9f037ec..fb6ae5176 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -832,11 +832,23 @@ fold_docs(Db, UserFun, UserAcc0, Options) -> UserAcc2 = fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> {DocId} = erlfdb_tuple:unpack(K, Prefix), RevId = erlfdb_tuple:unpack(V), - maybe_stop(UserFun({row, [ + Row0 = [ {id, DocId}, {key, DocId}, {value, {[{rev, couch_doc:rev_to_str(RevId)}]}} - ]}, Acc)) + ], + + DocOpts = couch_util:get_value(doc_opts, Options, []), + OpenOpts = [deleted | DocOpts], + + Row1 = case lists:keyfind(include_docs, 1, Options) of + {include_docs, true} -> + Row0 ++ open_json_doc(Db, DocId, OpenOpts, DocOpts); + _ -> + Row0 + end, + + maybe_stop(UserFun({row, Row1}, Acc)) end, UserAcc1, Options), {ok, maybe_stop(UserFun(complete, UserAcc2))} @@ -1878,3 +1890,14 @@ stem_revisions(#{} = Db, #doc{} = Doc) -> true -> Doc#doc{revs = {RevPos, lists:sublist(Revs, RevsLimit)}}; false -> Doc end. + + +open_json_doc(Db, DocId, OpenOpts, DocOpts) -> + case fabric2_db:open_doc(Db, DocId, OpenOpts) of + {not_found, missing} -> + []; + {ok, #doc{deleted = true}} -> + [{doc, null}]; + {ok, #doc{} = Doc} -> + [{doc, couch_doc:to_json_obj(Doc, DocOpts)}] + end. -- cgit v1.2.1 From 551bd91d1f0c195d44c5a5f660c1bdcfcc117b1a Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 19 Mar 2020 17:05:54 +0200 Subject: add fabric2 after_doc_write plugin --- src/fabric/src/fabric2_db_plugin.erl | 5 +++++ src/fabric/src/fabric2_fdb.erl | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/fabric/src/fabric2_db_plugin.erl b/src/fabric/src/fabric2_db_plugin.erl index fb83ed407..1d923dd96 100644 --- a/src/fabric/src/fabric2_db_plugin.erl +++ b/src/fabric/src/fabric2_db_plugin.erl @@ -15,6 +15,7 @@ -export([ validate_dbname/3, before_doc_update/3, + after_doc_write/6, after_doc_read/2, validate_docid/1, check_is_admin/1, @@ -49,6 +50,10 @@ before_doc_update(Db, Doc0, UpdateType) -> end. +after_doc_write(Db, Doc, NewWinner, OldWinner, NewRevId, Seq)-> + with_pipe(after_doc_write, [Db, Doc, NewWinner, OldWinner, NewRevId, Seq]). + + after_doc_read(Db, Doc0) -> Fun = fabric2_db:get_after_doc_read_fun(Db), case with_pipe(after_doc_read, [Doc0, Db]) of diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 22ccc993d..912d4dfa8 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -832,6 +832,9 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> ok end, + fabric2_db_plugin:after_doc_write(Db, Doc, NewWinner, OldWinner, + NewRevId, WinnerVS), + % Update database size AddSize = sum_add_rev_sizes([NewWinner | ToUpdate]), RemSize = sum_rem_rev_sizes(ToRemove), -- cgit v1.2.1 From 9d27c6e817692ee98acdd1369b771ccb1b8bd79d Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 23 Mar 2020 14:28:37 +0200 Subject: Add couch_views_indexer build to creation versionstamp This creates a versionstamp for when an indexed was created and build status for indexes. if the index has a creation_vs, then couch_views_indexer will built the index to this creation versionstamp. --- src/couch_views/include/couch_views.hrl | 6 ++ src/couch_views/src/couch_views_fdb.erl | 76 +++++++++++++++++++++ src/couch_views/src/couch_views_indexer.erl | 77 ++++++++++++++++------ src/couch_views/src/couch_views_jobs.erl | 21 ++++-- src/couch_views/test/couch_views_indexer_test.erl | 42 +++++++++++- .../test/couch_views_trace_index_test.erl | 5 +- src/fabric/src/fabric2_fdb.erl | 12 ++-- 7 files changed, 206 insertions(+), 33 deletions(-) diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl index c40bb0212..3d0110f65 100644 --- a/src/couch_views/include/couch_views.hrl +++ b/src/couch_views/include/couch_views.hrl @@ -18,6 +18,8 @@ -define(VIEW_UPDATE_SEQ, 0). -define(VIEW_ROW_COUNT, 1). -define(VIEW_KV_SIZE, 2). +-define(VIEW_BUILD_STATUS, 3). +-define(VIEW_CREATION_VS, 4). % Data keys -define(VIEW_ID_RANGE, 0). @@ -25,3 +27,7 @@ % jobs api -define(INDEX_JOB_TYPE, <<"views">>). + +% indexing progress +-define(INDEX_BUILDING, <<"building">>). +-define(INDEX_READY, <<"ready">>). diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index a0224b2b8..3b008d44b 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -13,6 +13,12 @@ -module(couch_views_fdb). -export([ + new_interactive_index/3, + new_creation_vs/3, + get_creation_vs/2, + get_build_status/2, + set_build_status/3, + get_update_seq/2, set_update_seq/3, @@ -39,6 +45,60 @@ -include_lib("fabric/include/fabric2.hrl"). +new_interactive_index(Db, Mrst, VS) -> + couch_views_fdb:new_creation_vs(Db, Mrst, VS), + couch_views_fdb:set_build_status(Db, Mrst, ?INDEX_BUILDING). + + +%Interactive View Creation Versionstamp +%(, ?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig) = VS + +new_creation_vs(TxDb, #mrst{} = Mrst, VS) -> + #{ + tx := Tx + } = TxDb, + Key = creation_vs_key(TxDb, Mrst#mrst.sig), + Value = erlfdb_tuple:pack_vs({VS}), + ok = erlfdb:set_versionstamped_value(Tx, Key, Value). + + +get_creation_vs(TxDb, #mrst{} = Mrst) -> + get_creation_vs(TxDb, Mrst#mrst.sig); + +get_creation_vs(TxDb, Sig) -> + #{ + tx := Tx + } = TxDb, + Key = creation_vs_key(TxDb, Sig), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> + not_found; + EK -> + {VS} = erlfdb_tuple:unpack(EK), + VS + end. + + +%Interactive View Build Status +%(, ?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig) = INDEX_BUILDING | INDEX_READY + +get_build_status(TxDb, #mrst{sig = Sig}) -> + #{ + tx := Tx + } = TxDb, + Key = build_status_key(TxDb, Sig), + erlfdb:wait(erlfdb:get(Tx, Key)). + + +set_build_status(TxDb, #mrst{sig = Sig}, State) -> + #{ + tx := Tx + } = TxDb, + + Key = build_status_key(TxDb, Sig), + ok = erlfdb:set(Tx, Key, State). + + % View Build Sequence Access % (, ?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ) = Sequence @@ -340,6 +400,22 @@ map_idx_range(DbPrefix, Sig, ViewId, MapKey, DocId) -> erlfdb_tuple:range(Key, DbPrefix). +creation_vs_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). + + +build_status_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). + + process_rows(Rows) -> Encoded = lists:map(fun({K, V}) -> EK1 = couch_views_encoding:encode(K, key), diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 0127bacec..ab5aaade2 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -18,7 +18,9 @@ -export([ - init/0 + init/0, + map_docs/2, + write_docs/4 ]). -ifdef(TEST). @@ -80,6 +82,7 @@ init() -> db_seq => undefined, view_seq => undefined, last_seq => undefined, + view_vs => undefined, job => Job, job_data => Data, count => 0, @@ -174,22 +177,7 @@ update(#{} = Db, Mrst0, State0) -> do_update(Db, Mrst0, State0) -> fabric2_fdb:transactional(Db, fun(TxDb) -> - % In the first iteration of update we need - % to populate our db and view sequences - State1 = case State0 of - #{db_seq := undefined} -> - ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst0), - State0#{ - tx_db := TxDb, - db_seq := fabric2_db:get_update_seq(TxDb), - view_seq := ViewSeq, - last_seq := ViewSeq - }; - _ -> - State0#{ - tx_db := TxDb - } - end, + State1 = get_update_start_state(TxDb, Mrst0, State0), {ok, State2} = fold_changes(State1), @@ -198,7 +186,8 @@ do_update(Db, Mrst0, State0) -> doc_acc := DocAcc, last_seq := LastSeq, limit := Limit, - limiter := Limiter + limiter := Limiter, + view_vs := ViewVS } = State2, DocAcc1 = fetch_docs(TxDb, DocAcc), couch_rate:in(Limiter, Count), @@ -210,6 +199,8 @@ do_update(Db, Mrst0, State0) -> case Count < Limit of true -> + maybe_set_build_status(TxDb, Mrst1, ViewVS, + ?INDEX_READY), report_progress(State2, finished), {Mrst1, finished}; false -> @@ -224,6 +215,33 @@ do_update(Db, Mrst0, State0) -> end). +maybe_set_build_status(_TxDb, _Mrst1, not_found, _State) -> + ok; + +maybe_set_build_status(TxDb, Mrst1, _ViewVS, State) -> + couch_views_fdb:set_build_status(TxDb, Mrst1, State). + + +% In the first iteration of update we need +% to populate our db and view sequences +get_update_start_state(TxDb, Mrst, #{db_seq := undefined} = State) -> + ViewVS = couch_views_fdb:get_creation_vs(TxDb, Mrst), + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + + State#{ + tx_db := TxDb, + db_seq := fabric2_db:get_update_seq(TxDb), + view_vs := ViewVS, + view_seq := ViewSeq, + last_seq := ViewSeq + }; + +get_update_start_state(TxDb, _Idx, State) -> + State#{ + tx_db := TxDb + }. + + fold_changes(State) -> #{ view_seq := SinceSeq, @@ -240,7 +258,8 @@ process_changes(Change, Acc) -> #{ doc_acc := DocAcc, count := Count, - design_opts := DesignOpts + design_opts := DesignOpts, + view_vs := ViewVS } = Acc, #{ @@ -263,8 +282,22 @@ process_changes(Change, Acc) -> last_seq := LastSeq } end, - {ok, Acc1}. + DocVS = fabric2_fdb:seq_to_vs(LastSeq), + + Go = maybe_stop_at_vs(ViewVS, DocVS), + {Go, Acc1}. + + +maybe_stop_at_vs({versionstamp, _} = ViewVS, DocVS) when DocVS >= ViewVS -> + stop; + +maybe_stop_at_vs(_, _) -> + ok. + + +map_docs(Mrst, []) -> + {Mrst, []}; map_docs(Mrst, Docs) -> % Run all the non deleted docs through the view engine and @@ -328,7 +361,9 @@ write_docs(TxDb, Mrst, Docs, State) -> N + 1 end, 0, Docs), - couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq), + if LastSeq == false -> ok; true -> + couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq) + end, DocsNumber. diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 1604841f1..b97e7ce0f 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -40,11 +40,12 @@ build_view(TxDb, Mrst, UpdateSeq) -> end. -build_view_async(TxDb, Mrst) -> - JobId = job_id(TxDb, Mrst), - JobData = job_data(TxDb, Mrst), - DbUUID = fabric2_db:get_uuid(TxDb), - couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> +build_view_async(TxDb0, Mrst) -> + JobId = job_id(TxDb0, Mrst), + JobData = job_data(TxDb0, Mrst), + DbUUID = fabric2_db:get_uuid(TxDb0), + TxDb1 = ensure_correct_tx(TxDb0), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(TxDb1), fun(JTx) -> case couch_jobs:get_job_data(JTx, ?INDEX_JOB_TYPE, JobId) of {error, not_found} -> ok; @@ -59,6 +60,16 @@ build_view_async(TxDb, Mrst) -> {ok, JobId}. +ensure_correct_tx(#{tx := undefined} = TxDb) -> + TxDb; + +ensure_correct_tx(#{tx := Tx} = TxDb) -> + case erlfdb:is_read_only(Tx) of + true -> TxDb#{tx := undefined}; + false -> TxDb + end. + + wait_for_job(JobId, UpdateSeq) -> case couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId) of {ok, Subscription, _State, _Data} -> diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 43b58284d..8ddb64b9c 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -51,7 +51,8 @@ indexer_test_() -> ?TDEF_FE(index_autoupdater_callback), ?TDEF_FE(handle_db_recreated_when_running), ?TDEF_FE(handle_db_recreated_after_finished), - ?TDEF_FE(index_budget_is_changing) + ?TDEF_FE(index_budget_is_changing), + ?TDEF_FE(index_can_recover_from_crash, 60) ] } } @@ -508,6 +509,41 @@ handle_db_recreated_after_finished(Db) -> ], Out2). +index_can_recover_from_crash(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> + case Section == "couch_views" andalso Key == "change_limit" of + true -> 1; + _ -> Default + end + end), + meck:new(couch_eval, [passthrough]), + meck:expect(couch_eval, map_docs, fun(State, Docs) -> + Doc = hd(Docs), + case Doc#doc.id == <<"2">> of + true -> + % remove the mock so that next time the doc is processed + % it will work + meck:unload(couch_eval), + throw({fake_crash, test_jobs_restart}); + false -> + meck:passthrough([State, Docs]) + end + end), + + DDoc = create_ddoc(), + Docs = make_docs(3), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_docs(Db, Docs, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"1">>, 1, 1), + row(<<"2">>, 2, 2), + row(<<"3">>, 3, 3) + ], Out). + + row(Id, Key, Value) -> {row, [ {id, Id}, @@ -603,6 +639,10 @@ create_ddoc(multi_emit_key_limit) -> ]}). +make_docs(Count) -> + [doc(I) || I <- lists:seq(1, Count)]. + + doc(Id) -> doc(Id, Id). diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl index f8a5ce535..5b15a4ce2 100644 --- a/src/couch_views/test/couch_views_trace_index_test.erl +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -77,10 +77,13 @@ trace_single_doc(Db) -> {ok, _} = fabric2_db:update_doc(Db, Doc, []), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + HexSig = fabric2_util:to_hex(Mrst#mrst.sig), JobData = #{ <<"db_name">> => DbName, + <<"db_uuid">> => fabric2_db:get_uuid(Db), <<"ddoc_id">> => <<"_design/bar">>, - <<"sig">> => fabric2_util:to_hex(Mrst#mrst.sig) + <<"sig">> => HexSig, + <<"retries">> => 0 }, meck:expect(couch_jobs, accept, 2, {ok, job, JobData}), meck:expect(couch_jobs, update, 3, {ok, job}), diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 912d4dfa8..2295a5648 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -64,6 +64,8 @@ seq_to_vs/1, next_vs/1, + new_versionstamp/1, + debug_cluster/0, debug_cluster/2 ]). @@ -1021,6 +1023,11 @@ next_vs({versionstamp, VS, Batch, TxId}) -> {versionstamp, V, B, T}. +new_versionstamp(Tx) -> + TxId = erlfdb:get_next_tx_id(Tx), + {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF, TxId}. + + debug_cluster() -> debug_cluster(<<>>, <<16#FE, 16#FF, 16#FF>>). @@ -1763,11 +1770,6 @@ get_transaction_id(Tx, LayerPrefix) -> end. -new_versionstamp(Tx) -> - TxId = erlfdb:get_next_tx_id(Tx), - {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF, TxId}. - - on_commit(Tx, Fun) when is_function(Fun, 0) -> % Here we rely on Tx objects matching. However they contain a nif resource % object. Before Erlang 20.0 those would have been represented as empty -- cgit v1.2.1 From 34ca5e40cfac4bb6fbdc8d9084602781c17d87de Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 25 Mar 2020 15:10:00 +0200 Subject: All couch_view queries to run across transactions --- src/couch_views/src/couch_views_reader.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl index 76dbed11f..ce7f16354 100644 --- a/src/couch_views/src/couch_views_reader.erl +++ b/src/couch_views/src/couch_views_reader.erl @@ -184,7 +184,8 @@ mrargs_to_fdb_options(Args) -> [ {dir, Direction}, {limit, Limit + Skip}, - {streaming_mode, want_all} + {streaming_mode, want_all}, + {restart_tx, true} ] ++ StartKeyOpts ++ EndKeyOpts. -- cgit v1.2.1 From b856501628359fba0a08087b4ce75a0606cae7a9 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 25 Mar 2020 15:38:53 +0200 Subject: Add couch_views_updater interactive indexer This adds the ability for couch_views to index an index in the docs update transaction. This only happens if a design doc has the field <<"interactive">> = true. --- rel/apps/couch_epi.config | 1 + src/couch_views/src/couch_views.app.src | 1 + src/couch_views/src/couch_views.erl | 16 +- src/couch_views/src/couch_views_ddoc.erl | 42 ++++ src/couch_views/src/couch_views_epi.erl | 58 ++++++ src/couch_views/src/couch_views_fabric2_plugin.erl | 24 +++ src/couch_views/src/couch_views_sup.erl | 2 +- src/couch_views/src/couch_views_updater.erl | 101 +++++++++ src/couch_views/test/couch_views_updater_test.erl | 230 +++++++++++++++++++++ 9 files changed, 470 insertions(+), 5 deletions(-) create mode 100644 src/couch_views/src/couch_views_ddoc.erl create mode 100644 src/couch_views/src/couch_views_epi.erl create mode 100644 src/couch_views/src/couch_views_fabric2_plugin.erl create mode 100644 src/couch_views/src/couch_views_updater.erl create mode 100644 src/couch_views/test/couch_views_updater_test.erl diff --git a/rel/apps/couch_epi.config b/rel/apps/couch_epi.config index 0f3d2da55..d3711636f 100644 --- a/rel/apps/couch_epi.config +++ b/rel/apps/couch_epi.config @@ -15,6 +15,7 @@ fabric2_epi, chttpd_epi, couch_index_epi, + couch_views_epi, dreyfus_epi, global_changes_epi, mango_epi, diff --git a/src/couch_views/src/couch_views.app.src b/src/couch_views/src/couch_views.app.src index b704c9745..cb8285ac2 100644 --- a/src/couch_views/src/couch_views.app.src +++ b/src/couch_views/src/couch_views.app.src @@ -22,6 +22,7 @@ kernel, stdlib, erlfdb, + couch_epi, couch_log, config, couch_stats, diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 2268052f8..2acba00a6 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -37,6 +37,7 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> end, DbName = fabric2_db:name(Db), + IsInteractive = couch_views_ddoc:is_interactive(DDoc), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), #mrst{ @@ -54,7 +55,7 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> try fabric2_fdb:transactional(Db, fun(TxDb) -> - ok = maybe_update_view(TxDb, Mrst, Args3), + ok = maybe_update_view(TxDb, Mrst, IsInteractive, Args3), read_view(TxDb, Mrst, ViewName, Callback, Acc0, Args3) end) catch throw:{build_view, WaitSeq} -> @@ -127,13 +128,20 @@ read_view(Db, Mrst, ViewName, Callback, Acc0, Args) -> end). -maybe_update_view(_Db, _Mrst, #mrargs{update = false}) -> +maybe_update_view(_Db, _Mrst, _, #mrargs{update = false}) -> ok; -maybe_update_view(_Db, _Mrst, #mrargs{update = lazy}) -> +maybe_update_view(_Db, _Mrst, _, #mrargs{update = lazy}) -> ok; -maybe_update_view(TxDb, Mrst, _Args) -> +maybe_update_view(TxDb, Mrst, true, _Args) -> + BuildState = couch_views_fdb:get_build_status(TxDb, Mrst), + if BuildState == ?INDEX_READY -> ok; true -> + VS = couch_views_fdb:get_creation_vs(TxDb, Mrst), + throw({build_view, fabric2_fdb:vs_to_seq(VS)}) + end; + +maybe_update_view(TxDb, Mrst, false, _Args) -> DbSeq = fabric2_db:get_update_seq(TxDb), ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), case DbSeq == ViewSeq of diff --git a/src/couch_views/src/couch_views_ddoc.erl b/src/couch_views/src/couch_views_ddoc.erl new file mode 100644 index 000000000..fae4a3433 --- /dev/null +++ b/src/couch_views/src/couch_views_ddoc.erl @@ -0,0 +1,42 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_views_ddoc). + + +-export([ + get_interactive_list/1, + get_mango_list/1, + is_interactive/1 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +% TODO: build a ddoc cache that checks the md_version +get_interactive_list(Db) -> + DDocs = fabric2_db:get_design_docs(Db), + lists:filter(fun is_interactive/1, DDocs). + + +get_mango_list(Db) -> + DDocs = fabric2_db:get_design_docs(Db), + lists:filter(fun (DDoc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), + fabric2_util:get_value(<<"language">>, Props) == <<"query">> + end, DDocs). + + +is_interactive(#doc{} = DDoc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), + {Opts} = fabric2_util:get_value(<<"options">>, Props, {[]}), + fabric2_util:get_value(<<"interactive">>, Opts, false). diff --git a/src/couch_views/src/couch_views_epi.erl b/src/couch_views/src/couch_views_epi.erl new file mode 100644 index 000000000..6d39d9a5e --- /dev/null +++ b/src/couch_views/src/couch_views_epi.erl @@ -0,0 +1,58 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_epi). + + +-behaviour(couch_epi_plugin). + + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + + +app() -> + couch_views. + + +providers() -> + [ + {fabric2_db, couch_views_fabric2_plugin} + ]. + + +services() -> + []. + + +data_subscriptions() -> + []. + + +data_providers() -> + []. + + +processes() -> + []. + + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/couch_views/src/couch_views_fabric2_plugin.erl b/src/couch_views/src/couch_views_fabric2_plugin.erl new file mode 100644 index 000000000..cae0e1f75 --- /dev/null +++ b/src/couch_views/src/couch_views_fabric2_plugin.erl @@ -0,0 +1,24 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_fabric2_plugin). + + +-export([ + after_doc_write/6 +]). + + +after_doc_write(Db, Doc, NewWinner, OldWinner, NewRevId, Seq)-> + couch_views_updater:index(Db, Doc, NewWinner, OldWinner, NewRevId, Seq), + [Db, Doc, NewWinner, OldWinner, NewRevId, Seq]. diff --git a/src/couch_views/src/couch_views_sup.erl b/src/couch_views/src/couch_views_sup.erl index 2a40f0a79..94531893d 100644 --- a/src/couch_views/src/couch_views_sup.erl +++ b/src/couch_views/src/couch_views_sup.erl @@ -42,7 +42,7 @@ init(normal) -> id => couch_views_server, start => {couch_views_server, start_link, []} } - ], + ] ++ couch_epi:register_service(couch_views_epi, []), {ok, {flags(), Children}}; init(builds_disabled) -> diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl new file mode 100644 index 000000000..f405123fa --- /dev/null +++ b/src/couch_views/src/couch_views_updater.erl @@ -0,0 +1,101 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_views_updater). + +-export([ + index/6 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +% If the doc revision doesn't not match the NewRevId passed here we can ignore +% the document since it is then a conflict document and it doesn't need +% to be indexed. +index(Db, #doc{id = Id, revs = Revs} = Doc, _NewWinner, _OldWinner, NewRevId, + Seq) -> + try + {Depth, [FirstRev | _]} = Revs, + DocRev = {Depth, FirstRev}, + if DocRev /= NewRevId -> ok; true -> + index_int(Db, Doc, Seq) + end + catch + Error:Reason -> + DbName = fabric2_db:name(Db), + couch_log:error("Mango index error for Db ~s Doc ~p ~p ~p", + [DbName, Id, Error, Reason]) + end. + + +% Check if design doc is an interactive index and kick off background worker +% to build the new index up to the creation_vs +index_int(Db, #doc{id = <>, + deleted = false} = DDoc, Seq) -> + DbName = fabric2_db:name(Db), + + case couch_views_ddoc:is_interactive(DDoc) of + true -> + {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + case couch_views_fdb:get_creation_vs(Db, Mrst) of + not_found -> + couch_views_fdb:new_interactive_index(Db, Mrst, Seq), + {ok, _} = couch_views_jobs:build_view_async(Db, Mrst); + _ -> + ok + end; + false -> + ok + end, + write_doc(Db, DDoc); + + +index_int(Db, #doc{} = Doc, _Seq) -> + write_doc(Db, Doc). + + +write_doc(Db, #doc{deleted = Deleted} = Doc) -> + DbName = fabric2_db:name(Db), + DDocs = couch_views_ddoc:get_interactive_list(Db), + + Result0 = [#{ + id => Doc#doc.id, + results => [], + deleted => Deleted, + doc => Doc + }], + + %% Interactive updates do not update the views update_seq + State = #{ + last_seq => false + }, + + lists:foreach(fun(DDoc) -> + {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + + case should_index_doc(Doc, Mrst) of + true -> + {Mrst1, Result1} = couch_views_indexer:map_docs(Mrst, Result0), + couch_views_indexer:write_docs(Db, Mrst1, Result1, State), + couch_eval:release_map_context(Mrst1#mrst.qserver); + false -> + ok + end + end, DDocs). + + +should_index_doc(<>, Mrst) -> + lists:keymember(<<"include_design">>, 1, Mrst#mrst.design_opts); + +should_index_doc(_, _) -> + true. diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl new file mode 100644 index 000000000..e45622512 --- /dev/null +++ b/src/couch_views/test/couch_views_updater_test.erl @@ -0,0 +1,230 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_updater_test). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). +-include_lib("mango/src/mango_idx.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). + + +indexer_test_() -> + { + "Test indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(index_docs), + ?TDEF_FE(update_doc), + ?TDEF_FE(delete_doc), + ?TDEF_FE(includes_design_docs) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views, + mango + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_idx_ddoc(), + fabric2_db:update_docs(Db, [DDoc]), + % make sure the index is built for the first time so the background + % indexer doesn't build the index + wait_while_ddoc_builds(Db), + + Docs = make_docs(3), + fabric2_db:update_docs(Db, Docs), + {Db, DDoc}. + + +foreach_teardown({Db, _}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +index_docs({Db, DDoc}) -> + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"1">>}, {value, 1}], + [{id, <<"2">>}, {value, 2}], + [{id, <<"3">>}, {value, 3}] + ], Docs). + + +update_doc({Db, DDoc}) -> + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + JsonDoc = couch_doc:to_json_obj(Doc, []), + JsonDoc2 = couch_util:json_apply_field({<<"value">>, 4}, JsonDoc), + Doc2 = couch_doc:from_json_obj(JsonDoc2), + fabric2_db:update_doc(Db, Doc2), + + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"1">>}, {value, 1}], + [{id, <<"3">>}, {value, 3}], + [{id, <<"2">>}, {value, 4}] + ], Docs). + + +delete_doc({Db, DDoc}) -> + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + JsonDoc = couch_doc:to_json_obj(Doc, []), + JsonDoc2 = couch_util:json_apply_field({<<"_deleted">>, true}, JsonDoc), + Doc2 = couch_doc:from_json_obj(JsonDoc2), + fabric2_db:update_doc(Db, Doc2), + + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"1">>}, {value, 1}], + [{id, <<"3">>}, {value, 3}] + ], Docs). + + +includes_design_docs({Db, _}) -> + DDoc = create_idx_include_ddocs(), + fabric2_db:update_docs(Db, [DDoc]), + + IndexDDoc0 = create_idx_ddoc(), + IndexDDoc = IndexDDoc0#doc{ + id = <<"_design/to_be_indexed">> + }, + + fabric2_db:update_docs(Db, [IndexDDoc]), + + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"_design/ddoc_that_indexes_ddocs">>}, {value, 1}], + [{id, <<"_design/to_be_indexed">>}, {value, 1}] + ], Docs). + + +run_query(Db, DDoc) -> + Args = #mrargs{ + view_type = map, + reduce = false, + include_docs = true, + update = false + }, + CB = fun query_cb/2, + {ok, Acc} = couch_views:query(Db, DDoc, <<"idx_01">>, CB, [], Args), + lists:map(fun ({Props}) -> + [ + {id, couch_util:get_value(<<"_id">>, Props)}, + {value, couch_util:get_value(<<"value">>, Props, 1)} + ] + + end, Acc). + + +create_idx_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc1">>}, + {<<"language">>, <<"query">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, {[ + {<<"fields">>, {[{<<"value">>, <<"asc">>}]}} + ]}}, + {<<"reduce">>, <<"_count">>}, + {<<"options">>, {[ + {<<"def">>, + {[{<<"fields">>, + {[{<<"value">>, <<"asc">>}]}}]}} + ]}} + ]}} + ]} + }, + {<<"autoupdate">>, false}, + {<<"options">>, {[{<<"interactive">>, true}]}} + ]}). + + +create_idx_include_ddocs() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc_that_indexes_ddocs">>}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, << + "function(doc) {" + "if (doc.language) {" + "emit(doc.language, 1);" + "}" + "}">>} + ]}} + ]}}, + {<<"autoupdate">>, false}, + {<<"options">>, {[ + {<<"include_design">>, true}, + {<<"interactive">>, true} + ]}} + ]}). + + +wait_while_ddoc_builds(Db) -> + Fun = fun () -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + Ready = lists:filter(fun (Idx) -> + Idx#idx.build_status == ?INDEX_READY + end, mango_idx:list(TxDb)), + + if length(Ready) > 1 -> ok; true -> + wait + end + end) + end, + test_util:wait(Fun). + + + +make_docs(Count) -> + [doc(I) || I <- lists:seq(1, Count)]. + + +doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"value">>, Id} + ]}). + + +query_cb({row, Props}, Acc) -> + Doc = couch_util:get_value(doc, Props), + {ok, Acc ++ [Doc]}; + +query_cb(_, Acc) -> + {ok, Acc}. + -- cgit v1.2.1 From 0c12654ec79ba9ad4a8b1dd43cd89e8deb5797c3 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 24 Mar 2020 15:48:14 +0200 Subject: Add couch_views_encoding max value Adds a max value to use for encoding. This is useful when getting the max range when encoding startkey/endkeys. --- src/couch_views/src/couch_views_encoding.erl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/couch_views/src/couch_views_encoding.erl b/src/couch_views/src/couch_views_encoding.erl index ef5fed9a2..2f69db306 100644 --- a/src/couch_views/src/couch_views_encoding.erl +++ b/src/couch_views/src/couch_views_encoding.erl @@ -14,6 +14,7 @@ -export([ + max/0, encode/1, encode/2, decode/1 @@ -27,6 +28,11 @@ -define(STRING, 4). -define(LIST, 5). -define(OBJECT, 6). +-define(MAX, 255). + + +max() -> + max_encoding_value. encode(X) -> @@ -51,6 +57,9 @@ encode_int(false, _Type) -> encode_int(true, _Type) -> {?TRUE}; +encode_int(max_encoding_value, _Type) -> + {?MAX}; + encode_int(Num, key) when is_number(Num) -> {?NUMBER, float(Num)}; @@ -87,6 +96,9 @@ decode_int({?FALSE}) -> decode_int({?TRUE}) -> true; +decode_int({?MAX}) -> + max_encoding_value; + decode_int({?STRING, Bin}) -> Bin; -- cgit v1.2.1 From aa940dbd1cf514b2f898b5b5816454b7919fc092 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 10 Mar 2020 18:20:27 +0200 Subject: remove mango native proc --- src/couch/src/couch_proc_manager.erl | 1 - src/couch_js/src/couch_js_proc_manager.erl | 1 - src/mango/src/mango_native_proc.erl | 373 ----------------------------- 3 files changed, 375 deletions(-) delete mode 100644 src/mango/src/mango_native_proc.erl diff --git a/src/couch/src/couch_proc_manager.erl b/src/couch/src/couch_proc_manager.erl index 0daef3ee9..376e12e74 100644 --- a/src/couch/src/couch_proc_manager.erl +++ b/src/couch/src/couch_proc_manager.erl @@ -109,7 +109,6 @@ init([]) -> ets:new(?SERVERS, [public, named_table, set]), ets:insert(?SERVERS, get_servers_from_env("COUCHDB_QUERY_SERVER_")), ets:insert(?SERVERS, get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_")), - ets:insert(?SERVERS, [{"QUERY", {mango_native_proc, start_link, []}}]), maybe_configure_erlang_native_servers(), {ok, #state{ diff --git a/src/couch_js/src/couch_js_proc_manager.erl b/src/couch_js/src/couch_js_proc_manager.erl index 096469612..45f173668 100644 --- a/src/couch_js/src/couch_js_proc_manager.erl +++ b/src/couch_js/src/couch_js_proc_manager.erl @@ -108,7 +108,6 @@ init([]) -> ets:new(?SERVERS, [public, named_table, set]), ets:insert(?SERVERS, get_servers_from_env("COUCHDB_QUERY_SERVER_")), ets:insert(?SERVERS, get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_")), - ets:insert(?SERVERS, [{"QUERY", {mango_native_proc, start_link, []}}]), maybe_configure_erlang_native_servers(), {ok, #state{ diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl deleted file mode 100644 index cbf362291..000000000 --- a/src/mango/src/mango_native_proc.erl +++ /dev/null @@ -1,373 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(mango_native_proc). --behavior(gen_server). - - --include("mango_idx.hrl"). - - --export([ - start_link/0, - set_timeout/2, - prompt/2 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_cast/2, - handle_info/2, - code_change/3 -]). - - --record(st, { - indexes = [], - timeout = 5000 -}). - - --record(tacc, { - index_array_lengths = true, - fields = all_fields, - path = [] -}). - - -start_link() -> - gen_server:start_link(?MODULE, [], []). - - -set_timeout(Pid, TimeOut) when is_integer(TimeOut), TimeOut > 0 -> - gen_server:call(Pid, {set_timeout, TimeOut}). - - -prompt(Pid, Data) -> - gen_server:call(Pid, {prompt, Data}). - - -init(_) -> - {ok, #st{}}. - - -terminate(_Reason, _St) -> - ok. - - -handle_call({set_timeout, TimeOut}, _From, St) -> - {reply, ok, St#st{timeout=TimeOut}}; - -handle_call({prompt, [<<"reset">>]}, _From, St) -> - {reply, true, St#st{indexes=[]}}; - -handle_call({prompt, [<<"reset">>, _QueryConfig]}, _From, St) -> - {reply, true, St#st{indexes=[]}}; - -handle_call({prompt, [<<"add_fun">>, IndexInfo]}, _From, St) -> - Indexes = case validate_index_info(IndexInfo) of - true -> - St#st.indexes ++ [IndexInfo]; - false -> - couch_log:error("No Valid Indexes For: ~p", [IndexInfo]), - St#st.indexes - end, - NewSt = St#st{indexes = Indexes}, - {reply, true, NewSt}; - -handle_call({prompt, [<<"map_doc">>, Doc]}, _From, St) -> - {reply, map_doc(St, mango_json:to_binary(Doc)), St}; - -handle_call({prompt, [<<"reduce">>, RedSrcs, _]}, _From, St) -> - {reply, [true, [null || _ <- RedSrcs]], St}; - -handle_call({prompt, [<<"rereduce">>, RedSrcs, _]}, _From, St) -> - {reply, [true, [null || _ <- RedSrcs]], St}; - -handle_call({prompt, [<<"index_doc">>, Doc]}, _From, St) -> - Vals = case index_doc(St, mango_json:to_binary(Doc)) of - [] -> - [[]]; - Else -> - Else - end, - {reply, Vals, St}; - - -handle_call(Msg, _From, St) -> - {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}. - - -handle_cast(garbage_collect, St) -> - erlang:garbage_collect(), - {noreply, St}; - -handle_cast(stop, St) -> - {stop, normal, St}; - -handle_cast(Msg, St) -> - {stop, {invalid_cast, Msg}, St}. - - -handle_info(Msg, St) -> - {stop, {invalid_info, Msg}, St}. - - -code_change(_OldVsn, St, _Extra) -> - {ok, St}. - - -map_doc(#st{indexes=Indexes}, Doc) -> - lists:map(fun(Idx) -> get_index_entries(Idx, Doc) end, Indexes). - - -index_doc(#st{indexes=Indexes}, Doc) -> - lists:map(fun(Idx) -> get_text_entries(Idx, Doc) end, Indexes). - - -get_index_entries({IdxProps}, Doc) -> - {Fields} = couch_util:get_value(<<"fields">>, IdxProps), - Selector = get_index_partial_filter_selector(IdxProps), - case should_index(Selector, Doc) of - false -> - []; - true -> - Values = get_index_values(Fields, Doc), - case lists:member(not_found, Values) of - true -> []; - false -> [[Values, null]] - end - end. - - -get_index_values(Fields, Doc) -> - lists:map(fun({Field, _Dir}) -> - case mango_doc:get_field(Doc, Field) of - not_found -> not_found; - bad_path -> not_found; - Value -> Value - end - end, Fields). - - -get_text_entries({IdxProps}, Doc) -> - Selector = get_index_partial_filter_selector(IdxProps), - case should_index(Selector, Doc) of - true -> - get_text_entries0(IdxProps, Doc); - false -> - [] - end. - - -get_index_partial_filter_selector(IdxProps) -> - case couch_util:get_value(<<"partial_filter_selector">>, IdxProps, {[]}) of - {[]} -> - % this is to support legacy text indexes that had the partial_filter_selector - % set as selector - couch_util:get_value(<<"selector">>, IdxProps, {[]}); - Else -> - Else - end. - - -get_text_entries0(IdxProps, Doc) -> - DefaultEnabled = get_default_enabled(IdxProps), - IndexArrayLengths = get_index_array_lengths(IdxProps), - FieldsList = get_text_field_list(IdxProps), - TAcc = #tacc{ - index_array_lengths = IndexArrayLengths, - fields = FieldsList - }, - Fields0 = get_text_field_values(Doc, TAcc), - Fields = if not DefaultEnabled -> Fields0; true -> - add_default_text_field(Fields0) - end, - FieldNames = get_field_names(Fields), - Converted = convert_text_fields(Fields), - FieldNames ++ Converted. - - -get_text_field_values({Props}, TAcc) when is_list(Props) -> - get_text_field_values_obj(Props, TAcc, []); - -get_text_field_values(Values, TAcc) when is_list(Values) -> - IndexArrayLengths = TAcc#tacc.index_array_lengths, - NewPath = ["[]" | TAcc#tacc.path], - NewTAcc = TAcc#tacc{path = NewPath}, - case IndexArrayLengths of - true -> - % We bypass make_text_field and directly call make_text_field_name - % because the length field name is not part of the path. - LengthFieldName = make_text_field_name(NewTAcc#tacc.path, <<"length">>), - LengthField = [{LengthFieldName, <<"length">>, length(Values)}], - get_text_field_values_arr(Values, NewTAcc, LengthField); - _ -> - get_text_field_values_arr(Values, NewTAcc, []) - end; - -get_text_field_values(Bin, TAcc) when is_binary(Bin) -> - make_text_field(TAcc, <<"string">>, Bin); - -get_text_field_values(Num, TAcc) when is_number(Num) -> - make_text_field(TAcc, <<"number">>, Num); - -get_text_field_values(Bool, TAcc) when is_boolean(Bool) -> - make_text_field(TAcc, <<"boolean">>, Bool); - -get_text_field_values(null, TAcc) -> - make_text_field(TAcc, <<"null">>, true). - - -get_text_field_values_obj([], _, FAcc) -> - FAcc; -get_text_field_values_obj([{Key, Val} | Rest], TAcc, FAcc) -> - NewPath = [Key | TAcc#tacc.path], - NewTAcc = TAcc#tacc{path = NewPath}, - Fields = get_text_field_values(Val, NewTAcc), - get_text_field_values_obj(Rest, TAcc, Fields ++ FAcc). - - -get_text_field_values_arr([], _, FAcc) -> - FAcc; -get_text_field_values_arr([Value | Rest], TAcc, FAcc) -> - Fields = get_text_field_values(Value, TAcc), - get_text_field_values_arr(Rest, TAcc, Fields ++ FAcc). - - -get_default_enabled(Props) -> - case couch_util:get_value(<<"default_field">>, Props, {[]}) of - Bool when is_boolean(Bool) -> - Bool; - {[]} -> - true; - {Opts}-> - couch_util:get_value(<<"enabled">>, Opts, true) - end. - - -get_index_array_lengths(Props) -> - couch_util:get_value(<<"index_array_lengths">>, Props, true). - - -add_default_text_field(Fields) -> - DefaultFields = add_default_text_field(Fields, []), - DefaultFields ++ Fields. - - -add_default_text_field([], Acc) -> - Acc; -add_default_text_field([{_Name, <<"string">>, Value} | Rest], Acc) -> - NewAcc = [{<<"$default">>, <<"string">>, Value} | Acc], - add_default_text_field(Rest, NewAcc); -add_default_text_field([_ | Rest], Acc) -> - add_default_text_field(Rest, Acc). - - -%% index of all field names -get_field_names(Fields) -> - FieldNameSet = lists:foldl(fun({Name, _, _}, Set) -> - gb_sets:add([<<"$fieldnames">>, Name, []], Set) - end, gb_sets:new(), Fields), - gb_sets:to_list(FieldNameSet). - - -convert_text_fields([]) -> - []; -convert_text_fields([{Name, _Type, Value} | Rest]) -> - [[Name, Value, []] | convert_text_fields(Rest)]. - - -should_index(Selector, Doc) -> - % We should do this - NormSelector = mango_selector:normalize(Selector), - Matches = mango_selector:match(NormSelector, Doc), - IsDesign = case mango_doc:get_field(Doc, <<"_id">>) of - <<"_design/", _/binary>> -> true; - _ -> false - end, - Matches and not IsDesign. - - -get_text_field_list(IdxProps) -> - case couch_util:get_value(<<"fields">>, IdxProps) of - Fields when is_list(Fields) -> - RawList = lists:flatmap(fun get_text_field_info/1, Fields), - [mango_util:lucene_escape_user(Field) || Field <- RawList]; - _ -> - all_fields - end. - - -get_text_field_info({Props}) -> - Name = couch_util:get_value(<<"name">>, Props), - Type0 = couch_util:get_value(<<"type">>, Props), - if not is_binary(Name) -> []; true -> - Type = get_text_field_type(Type0), - [iolist_to_binary([Name, ":", Type])] - end. - - -get_text_field_type(<<"number">>) -> - <<"number">>; -get_text_field_type(<<"boolean">>) -> - <<"boolean">>; -get_text_field_type(_) -> - <<"string">>. - - -make_text_field(TAcc, Type, Value) -> - FieldName = make_text_field_name(TAcc#tacc.path, Type), - Fields = TAcc#tacc.fields, - case Fields == all_fields orelse lists:member(FieldName, Fields) of - true -> - [{FieldName, Type, Value}]; - false -> - [] - end. - - -make_text_field_name([P | Rest], Type) -> - Parts = lists:reverse(Rest, [iolist_to_binary([P, ":", Type])]), - Escaped = [mango_util:lucene_escape_field(N) || N <- Parts], - iolist_to_binary(mango_util:join(".", Escaped)). - - -validate_index_info(IndexInfo) -> - IdxTypes = [mango_idx_view, mango_idx_text], - Results = lists:foldl(fun(IdxType, Results0) -> - try - IdxType:validate_index_def(IndexInfo), - [valid_index | Results0] - catch _:_ -> - [invalid_index | Results0] - end - end, [], IdxTypes), - lists:member(valid_index, Results). - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - -handle_garbage_collect_cast_test() -> - ?assertEqual({noreply, []}, handle_cast(garbage_collect, [])). - -handle_stop_cast_test() -> - ?assertEqual({stop, normal, []}, handle_cast(stop, [])). - -handle_invalid_cast_test() -> - ?assertEqual({stop, {invalid_cast, random}, []}, handle_cast(random, [])). - --endif. -- cgit v1.2.1 From e28eb6b791d3421753f6806d9c0a42e9d0f7b4b1 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 24 Mar 2020 14:23:09 +0200 Subject: remove partition opts from mango --- src/mango/src/mango_cursor.erl | 1 - src/mango/src/mango_cursor_text.erl | 9 --- src/mango/src/mango_cursor_view.erl | 6 -- src/mango/src/mango_error.erl | 14 ---- src/mango/src/mango_httpd.erl | 21 +----- src/mango/src/mango_idx.erl | 126 +++--------------------------------- src/mango/src/mango_idx.hrl | 1 - src/mango/src/mango_idx_text.erl | 1 - src/mango/src/mango_idx_view.erl | 1 - src/mango/src/mango_opts.erl | 30 --------- 10 files changed, 10 insertions(+), 200 deletions(-) diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index b1cb4148e..f16765b96 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -72,7 +72,6 @@ explain(#cursor{}=Cursor) -> {[ {dbname, mango_idx:dbname(Idx)}, {index, mango_idx:to_json(Idx)}, - {partitioned, mango_idx:partitioned(Idx)}, {selector, Selector}, {opts, {Opts}}, {limit, Limit}, diff --git a/src/mango/src/mango_cursor_text.erl b/src/mango/src/mango_cursor_text.erl index 43ef84e4c..ccf58ad6e 100644 --- a/src/mango/src/mango_cursor_text.erl +++ b/src/mango/src/mango_cursor_text.erl @@ -77,7 +77,6 @@ explain(Cursor) -> } = Cursor, [ {'query', mango_selector_text:convert(Selector)}, - {partition, get_partition(Opts, null)}, {sort, sort_query(Opts, Selector)} ]. @@ -95,7 +94,6 @@ execute(Cursor, UserFun, UserAcc) -> Query = mango_selector_text:convert(Selector), QueryArgs = #index_query_args{ q = Query, - partition = get_partition(Opts, nil), sort = sort_query(Opts, Selector), raw_bookmark = true }, @@ -250,13 +248,6 @@ sort_query(Opts, Selector) -> end. -get_partition(Opts, Default) -> - case couch_util:get_value(partition, Opts) of - <<>> -> Default; - Else -> Else - end. - - get_bookmark(Opts) -> case lists:keyfind(bookmark, 1, Opts) of {_, BM} when is_list(BM), BM /= [] -> diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 240ef501d..bced842ae 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -73,7 +73,6 @@ explain(Cursor) -> {include_docs, Args#mrargs.include_docs}, {view_type, Args#mrargs.view_type}, {reduce, Args#mrargs.reduce}, - {partition, couch_mrview_util:get_extra(Args, partition, null)}, {start_key, maybe_replace_max_json(Args#mrargs.start_key)}, {end_key, maybe_replace_max_json(Args#mrargs.end_key)}, {direction, Args#mrargs.direction}, @@ -410,11 +409,6 @@ apply_opts([{update, false} | Rest], Args) -> update = false }, apply_opts(Rest, NewArgs); -apply_opts([{partition, <<>>} | Rest], Args) -> - apply_opts(Rest, Args); -apply_opts([{partition, Partition} | Rest], Args) when is_binary(Partition) -> - NewArgs = couch_mrview_util:set_extra(Args, partition, Partition), - apply_opts(Rest, NewArgs); apply_opts([{_, _} | Rest], Args) -> % Ignore unknown options apply_opts(Rest, Args). diff --git a/src/mango/src/mango_error.erl b/src/mango/src/mango_error.erl index bb545ad67..9ac8f6368 100644 --- a/src/mango/src/mango_error.erl +++ b/src/mango/src/mango_error.erl @@ -28,13 +28,6 @@ info(mango_idx, {no_usable_index, missing_sort_index}) -> <<"No index exists for this sort, " "try indexing by the sort fields.">> }; -info(mango_idx, {no_usable_index, missing_sort_index_partitioned}) -> - { - 400, - <<"no_usable_index">>, - <<"No partitioned index exists for this sort, " - "try indexing by the sort fields.">> - }; info(mango_idx, {no_usable_index, missing_sort_index_global}) -> { 400, @@ -118,13 +111,6 @@ info(mango_idx, {invalid_index_type, BadType}) -> <<"invalid_index">>, fmt("Invalid type for index: ~s", [BadType]) }; -info(mango_idx, {partitioned_option_mismatch, BadDDoc}) -> - { - 400, - <<"invalid_partitioned_option">>, - fmt("Requested partitioned option does not match existing value on" - " design document ~s", [BadDDoc]) - }; info(mango_idx, invalid_query_ddoc_language) -> { 400, diff --git a/src/mango/src/mango_httpd.erl b/src/mango/src/mango_httpd.erl index 379d2e127..946d7e41c 100644 --- a/src/mango/src/mango_httpd.erl +++ b/src/mango/src/mango_httpd.erl @@ -170,7 +170,7 @@ handle_index_req(#httpd{path_parts=[_, _, _DDocId0, _Type, _Name]}=Req, _Db) -> handle_explain_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - Body = maybe_set_partition(Req), + Body = chttpd:json_body_obj(Req), {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), Resp = mango_crud:explain(Db, Sel, Opts), @@ -182,7 +182,7 @@ handle_explain_req(Req, _Db) -> handle_find_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - Body = maybe_set_partition(Req), + Body = chttpd:json_body_obj(Req), {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), {ok, Resp0} = start_find_resp(Req), @@ -231,23 +231,6 @@ get_idx_del_opts(Req) -> end. -maybe_set_partition(Req) -> - {Props} = chttpd:json_body_obj(Req), - case chttpd:qs_value(Req, "partition", undefined) of - undefined -> - {Props}; - Partition -> - case couch_util:get_value(<<"partition">>, Props) of - undefined -> - {[{<<"partition">>, ?l2b(Partition)} | Props]}; - Partition -> - {Props}; - OtherPartition -> - ?MANGO_ERROR({bad_partition, OtherPartition}) - end - end. - - convert_to_design_id(DDocId) -> case DDocId of <<"_design/", _/binary>> -> DDocId; diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index 5d06a8fe3..0f79bdb8a 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -33,7 +33,6 @@ name/1, type/1, def/1, - partitioned/1, opts/1, columns/1, is_usable/3, @@ -64,13 +63,12 @@ get_usable_indexes(Db, Selector, Opts) -> ExistingIndexes ), UserSpecifiedIndex = mango_cursor:maybe_filter_indexes_by_ddoc(ExistingIndexes, Opts), - UsableIndexes0 = lists:usort(GlobalIndexes ++ UserSpecifiedIndex), - UsableIndexes1 = filter_partition_indexes(UsableIndexes0, Opts), + UsableIndexes = lists:usort(GlobalIndexes ++ UserSpecifiedIndex), SortFields = get_sort_fields(Opts), UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end, - case lists:filter(UsableFilter, UsableIndexes1) of + case lists:filter(UsableFilter, UsableIndexes) of [] -> mango_sort_error(Db, Opts); UsableIndexes -> @@ -78,15 +76,8 @@ get_usable_indexes(Db, Selector, Opts) -> end. -mango_sort_error(Db, Opts) -> - case {fabric_util:is_partitioned(Db), is_opts_partitioned(Opts)} of - {false, _} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index}); - {true, true} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index_partitioned}); - {true, false} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index_global}) - end. +mango_sort_error(_Db, _Opts) -> + ?MANGO_ERROR({no_usable_index, missing_sort_index}). recover(Db) -> @@ -124,7 +115,6 @@ new(Db, Opts) -> name = IdxName, type = Type, def = Def, - partitioned = get_idx_partitioned(Opts), opts = filter_opts(Opts) }}. @@ -136,11 +126,10 @@ validate_new(Idx, Db) -> add(DDoc, Idx) -> Mod = idx_mod(Idx), - {ok, NewDDoc1} = Mod:add(DDoc, Idx), - NewDDoc2 = set_ddoc_partitioned(NewDDoc1, Idx), + {ok, NewDDoc} = Mod:add(DDoc, Idx), % Round trip through JSON for normalization - Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc2#doc.body)), - {ok, NewDDoc2#doc{body = Body}}. + Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc#doc.body)), + {ok, NewDDoc#doc{body = Body}}. remove(DDoc, Idx) -> @@ -192,8 +181,7 @@ from_ddoc(Db, {Props}) -> lists:map(fun(Idx) -> Idx#idx{ dbname = DbName, - ddoc = DDoc, - partitioned = get_idx_partitioned(Db, Props) + ddoc = DDoc } end, Idxs). @@ -230,10 +218,6 @@ def(#idx{def=Def}) -> Def. -partitioned(#idx{partitioned=Partitioned}) -> - Partitioned. - - opts(#idx{opts=Opts}) -> Opts. @@ -350,97 +334,6 @@ gen_name(Idx, Opts0) -> mango_util:enc_hex(Sha). -get_idx_partitioned(Opts) -> - case proplists:get_value(partitioned, Opts) of - B when is_boolean(B) -> - B; - db_default -> - % Default to the partitioned setting on - % the database. - undefined - end. - - -set_ddoc_partitioned(DDoc, Idx) -> - % We have to verify that the new index being added - % to this design document either matches the current - % ddoc's design options *or* this is a new design doc - #doc{ - id = DDocId, - revs = Revs, - body = {BodyProps} - } = DDoc, - OldDOpts = couch_util:get_value(<<"options">>, BodyProps), - OldOpt = case OldDOpts of - {OldDOptProps} when is_list(OldDOptProps) -> - couch_util:get_value(<<"partitioned">>, OldDOptProps); - _ -> - undefined - end, - % If new matches old we're done - if Idx#idx.partitioned == OldOpt -> DDoc; true -> - % If we're creating a ddoc then we can set the options - case Revs == {0, []} of - true when Idx#idx.partitioned /= undefined -> - set_ddoc_partitioned_option(DDoc, Idx#idx.partitioned); - true when Idx#idx.partitioned == undefined -> - DDoc; - false -> - ?MANGO_ERROR({partitioned_option_mismatch, DDocId}) - end - end. - - -set_ddoc_partitioned_option(DDoc, Partitioned) -> - #doc{ - body = {BodyProps} - } = DDoc, - NewProps = case couch_util:get_value(<<"options">>, BodyProps) of - {Existing} when is_list(Existing) -> - Opt = {<<"partitioned">>, Partitioned}, - New = lists:keystore(<<"partitioned">>, 1, Existing, Opt), - lists:keystore(<<"options">>, 1, BodyProps, {<<"options">>, New}); - undefined -> - New = {<<"options">>, {[{<<"partitioned">>, Partitioned}]}}, - lists:keystore(<<"options">>, 1, BodyProps, New) - end, - DDoc#doc{body = {NewProps}}. - - -get_idx_partitioned(Db, DDocProps) -> - Default = fabric_util:is_partitioned(Db), - case couch_util:get_value(<<"options">>, DDocProps) of - {DesignOpts} -> - case couch_util:get_value(<<"partitioned">>, DesignOpts) of - P when is_boolean(P) -> - P; - undefined -> - Default - end; - undefined -> - Default - end. - -is_opts_partitioned(Opts) -> - case couch_util:get_value(partition, Opts, <<>>) of - <<>> -> - false; - Partition when is_binary(Partition) -> - true - end. - - -filter_partition_indexes(Indexes, Opts) -> - PFilt = case is_opts_partitioned(Opts) of - false -> - fun(#idx{partitioned = P}) -> not P end; - true -> - fun(#idx{partitioned = P}) -> P end - end, - Filt = fun(Idx) -> type(Idx) == <<"special">> orelse PFilt(Idx) end, - lists:filter(Filt, Indexes). - - filter_opts([]) -> []; filter_opts([{user_ctx, _} | Rest]) -> @@ -453,8 +346,6 @@ filter_opts([{type, _} | Rest]) -> filter_opts(Rest); filter_opts([{w, _} | Rest]) -> filter_opts(Rest); -filter_opts([{partitioned, _} | Rest]) -> - filter_opts(Rest); filter_opts([Opt | Rest]) -> [Opt | filter_opts(Rest)]. @@ -488,7 +379,6 @@ index(SelectorName, Selector) -> <<"Selected">>,<<"json">>, {[{<<"fields">>,{[{<<"location">>,<<"asc">>}]}}, {SelectorName,{Selector}}]}, - false, [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}] }. diff --git a/src/mango/src/mango_idx.hrl b/src/mango/src/mango_idx.hrl index 97259500b..712031b75 100644 --- a/src/mango/src/mango_idx.hrl +++ b/src/mango/src/mango_idx.hrl @@ -16,6 +16,5 @@ name, type, def, - partitioned, opts }). diff --git a/src/mango/src/mango_idx_text.erl b/src/mango/src/mango_idx_text.erl index 1d4becfb3..71eaf110a 100644 --- a/src/mango/src/mango_idx_text.erl +++ b/src/mango/src/mango_idx_text.erl @@ -100,7 +100,6 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, - {partitioned, Idx#idx.partitioned}, {def, {def_to_json(Idx#idx.def)}} ]}. diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 37911498c..2d784b638 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -104,7 +104,6 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, - {partitioned, Idx#idx.partitioned}, {def, {def_to_json(Idx#idx.def)}} ]}. diff --git a/src/mango/src/mango_opts.erl b/src/mango/src/mango_opts.erl index 92c07f743..7bae9c90d 100644 --- a/src/mango/src/mango_opts.erl +++ b/src/mango/src/mango_opts.erl @@ -34,7 +34,6 @@ validate_sort/1, validate_fields/1, validate_bulk_delete/1, - validate_partitioned/1, default_limit/0 ]). @@ -71,12 +70,6 @@ validate_idx_create({Props}) -> {optional, true}, {default, 2}, {validator, fun is_pos_integer/1} - ]}, - {<<"partitioned">>, [ - {tag, partitioned}, - {optional, true}, - {default, db_default}, - {validator, fun validate_partitioned/1} ]} ], validate(Props, Opts). @@ -124,12 +117,6 @@ validate_find({Props}) -> {default, []}, {validator, fun validate_fields/1} ]}, - {<<"partition">>, [ - {tag, partition}, - {optional, true}, - {default, <<>>}, - {validator, fun validate_partition/1} - ]}, {<<"r">>, [ {tag, r}, {optional, true}, @@ -309,23 +296,6 @@ validate_fields(Value) -> mango_fields:new(Value). -validate_partitioned(true) -> - {ok, true}; -validate_partitioned(false) -> - {ok, false}; -validate_partitioned(db_default) -> - {ok, db_default}; -validate_partitioned(Else) -> - ?MANGO_ERROR({invalid_partitioned_value, Else}). - - -validate_partition(<<>>) -> - {ok, <<>>}; -validate_partition(Partition) -> - couch_partition:validate_partition(Partition), - {ok, Partition}. - - validate_opts([], Props, Acc) -> {Props, lists:reverse(Acc)}; validate_opts([{Name, Desc} | Rest], Props, Acc) -> -- cgit v1.2.1 From bd1667d3bf1b5328ac7b0c4a4b50d1b8b91a9ec2 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 23 Mar 2020 15:35:22 +0200 Subject: remove unneeded r/w parameter --- src/mango/src/mango_crud.erl | 17 +++-------------- src/mango/src/mango_cursor_view.erl | 12 ------------ src/mango/src/mango_httpd.erl | 28 +++------------------------- src/mango/src/mango_idx.erl | 2 -- src/mango/src/mango_opts.erl | 12 ------------ src/mango/test/mango.py | 2 -- 6 files changed, 6 insertions(+), 67 deletions(-) diff --git a/src/mango/src/mango_crud.erl b/src/mango/src/mango_crud.erl index 41a4d143d..42717ffc8 100644 --- a/src/mango/src/mango_crud.erl +++ b/src/mango/src/mango_crud.erl @@ -35,8 +35,7 @@ insert(Db, {_}=Doc, Opts) -> insert(Db, [Doc], Opts); insert(Db, Docs, Opts0) when is_list(Docs) -> Opts1 = maybe_add_user_ctx(Db, Opts0), - Opts2 = maybe_int_to_str(w, Opts1), - case fabric:update_docs(Db, Docs, Opts2) of + case fabric:update_docs(Db, Docs, Opts1) of {ok, Results0} -> {ok, lists:zipwith(fun result_to_json/2, Docs, Results0)}; {accepted, Results0} -> @@ -48,8 +47,7 @@ insert(Db, Docs, Opts0) when is_list(Docs) -> find(Db, Selector, Callback, UserAcc, Opts0) -> Opts1 = maybe_add_user_ctx(Db, Opts0), - Opts2 = maybe_int_to_str(r, Opts1), - {ok, Cursor} = mango_cursor:create(Db, Selector, Opts2), + {ok, Cursor} = mango_cursor:create(Db, Selector, Opts1), mango_cursor:execute(Cursor, Callback, UserAcc). @@ -101,8 +99,7 @@ delete(Db, Selector, Options) -> explain(Db, Selector, Opts0) -> Opts1 = maybe_add_user_ctx(Db, Opts0), - Opts2 = maybe_int_to_str(r, Opts1), - {ok, Cursor} = mango_cursor:create(Db, Selector, Opts2), + {ok, Cursor} = mango_cursor:create(Db, Selector, Opts1), mango_cursor:explain(Cursor). @@ -115,14 +112,6 @@ maybe_add_user_ctx(Db, Opts) -> end. -maybe_int_to_str(_Key, []) -> - []; -maybe_int_to_str(Key, [{Key, Val} | Rest]) when is_integer(Val) -> - [{Key, integer_to_list(Val)} | maybe_int_to_str(Key, Rest)]; -maybe_int_to_str(Key, [KV | Rest]) -> - [KV | maybe_int_to_str(Key, Rest)]. - - result_to_json(#doc{id=Id}, Result) -> result_to_json(Id, Result); result_to_json({Props}, Result) -> diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index bced842ae..5187c10c6 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -353,18 +353,6 @@ ddocid(Idx) -> apply_opts([], Args) -> Args; -apply_opts([{r, RStr} | Rest], Args) -> - IncludeDocs = case list_to_integer(RStr) of - 1 -> - true; - R when R > 1 -> - % We don't load the doc in the view query because - % we have to do a quorum read in the coordinator - % so there's no point. - false - end, - NewArgs = Args#mrargs{include_docs = IncludeDocs}, - apply_opts(Rest, NewArgs); apply_opts([{conflicts, true} | Rest], Args) -> NewArgs = Args#mrargs{conflicts = true}, apply_opts(Rest, NewArgs); diff --git a/src/mango/src/mango_httpd.erl b/src/mango/src/mango_httpd.erl index 946d7e41c..1054c74bb 100644 --- a/src/mango/src/mango_httpd.erl +++ b/src/mango/src/mango_httpd.erl @@ -94,8 +94,7 @@ handle_index_req(#httpd{method='POST', path_parts=[_, _]}=Req, Db) -> {ok, DDoc} -> <<"exists">>; {ok, NewDDoc} -> - CreateOpts = get_idx_w_opts(Opts), - case mango_crud:insert(Db, NewDDoc, CreateOpts) of + case mango_crud:insert(Db, NewDDoc, Opts) of {ok, [{RespProps}]} -> case lists:keyfind(error, 1, RespProps) of {error, Reason} -> @@ -121,12 +120,11 @@ handle_index_req(#httpd{method='POST', path_parts=[_, <<"_index">>, {ok, Opts} = mango_opts:validate_bulk_delete(chttpd:json_body_obj(Req)), Idxs = mango_idx:list(Db), DDocs = get_bulk_delete_ddocs(Opts), - DelOpts = get_idx_w_opts(Opts), {Success, Fail} = lists:foldl(fun(DDocId0, {Success0, Fail0}) -> DDocId = convert_to_design_id(DDocId0), Filt = fun(Idx) -> mango_idx:ddoc(Idx) == DDocId end, Id = {<<"id">>, DDocId}, - case mango_idx:delete(Filt, Db, Idxs, DelOpts) of + case mango_idx:delete(Filt, Db, Idxs, Opts) of {ok, true} -> {[{[Id, {<<"ok">>, true}]} | Success0], Fail0}; {error, Error} -> @@ -148,14 +146,13 @@ handle_index_req(#httpd{method='DELETE', path_parts=[_, _, DDocId0, Type, Name]}=Req, Db) -> Idxs = mango_idx:list(Db), DDocId = convert_to_design_id(DDocId0), - DelOpts = get_idx_del_opts(Req), Filt = fun(Idx) -> IsDDoc = mango_idx:ddoc(Idx) == DDocId, IsType = mango_idx:type(Idx) == Type, IsName = mango_idx:name(Idx) == Name, IsDDoc andalso IsType andalso IsName end, - case mango_idx:delete(Filt, Db, Idxs, DelOpts) of + case mango_idx:delete(Filt, Db, Idxs, []) of {ok, true} -> chttpd:send_json(Req, {[{ok, true}]}); {error, not_found} -> @@ -203,15 +200,6 @@ set_user_ctx(#httpd{user_ctx=Ctx}, Db) -> NewDb. -get_idx_w_opts(Opts) -> - case lists:keyfind(w, 1, Opts) of - {w, N} when is_integer(N), N > 0 -> - [{w, integer_to_list(N)}]; - _ -> - [{w, "2"}] - end. - - get_bulk_delete_ddocs(Opts) -> case lists:keyfind(docids, 1, Opts) of {docids, DDocs} when is_list(DDocs) -> @@ -221,16 +209,6 @@ get_bulk_delete_ddocs(Opts) -> end. -get_idx_del_opts(Req) -> - try - WStr = chttpd:qs_value(Req, "w", "2"), - _ = list_to_integer(WStr), - [{w, WStr}] - catch _:_ -> - [{w, "2"}] - end. - - convert_to_design_id(DDocId) -> case DDocId of <<"_design/", _/binary>> -> DDocId; diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index 0f79bdb8a..a26a6851a 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -344,8 +344,6 @@ filter_opts([{name, _} | Rest]) -> filter_opts(Rest); filter_opts([{type, _} | Rest]) -> filter_opts(Rest); -filter_opts([{w, _} | Rest]) -> - filter_opts(Rest); filter_opts([Opt | Rest]) -> [Opt | filter_opts(Rest)]. diff --git a/src/mango/src/mango_opts.erl b/src/mango/src/mango_opts.erl index 7bae9c90d..e35767600 100644 --- a/src/mango/src/mango_opts.erl +++ b/src/mango/src/mango_opts.erl @@ -64,12 +64,6 @@ validate_idx_create({Props}) -> {optional, true}, {default, auto_name}, {validator, fun validate_idx_name/1} - ]}, - {<<"w">>, [ - {tag, w}, - {optional, true}, - {default, 2}, - {validator, fun is_pos_integer/1} ]} ], validate(Props, Opts). @@ -117,12 +111,6 @@ validate_find({Props}) -> {default, []}, {validator, fun validate_fields/1} ]}, - {<<"r">>, [ - {tag, r}, - {optional, true}, - {default, 1}, - {validator, fun mango_opts:is_pos_integer/1} - ]}, {<<"conflicts">>, [ {tag, conflicts}, {optional, true}, diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index 03cb85f48..638de4787 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -244,7 +244,6 @@ class Database(object): skip=0, sort=None, fields=None, - r=1, conflicts=False, use_index=None, explain=False, @@ -258,7 +257,6 @@ class Database(object): "use_index": use_index, "limit": limit, "skip": skip, - "r": r, "conflicts": conflicts, } if sort is not None: -- cgit v1.2.1 From 4f8e33ea7fa2d4131bfe7fb5cca139f00ab17c8f Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 23 Mar 2020 16:11:03 +0200 Subject: Remove view_cb predicate push down Removes the view callback that was performed on the nodes before sending the results back to the co-ordinator. --- src/mango/src/mango_cursor_view.erl | 95 +------------------------------------ 1 file changed, 1 insertion(+), 94 deletions(-) diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 5187c10c6..b88f6eaee 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -19,7 +19,6 @@ ]). -export([ - view_cb/2, handle_message/2, handle_all_docs_message/2, composite_indexes/2, @@ -114,8 +113,7 @@ base_args(#cursor{index = Idx, selector = Selector} = Cursor) -> reduce = false, start_key = StartKey, end_key = EndKey, - include_docs = true, - extra = [{callback, {?MODULE, view_cb}}, {selector, Selector}] + include_docs = true }. @@ -226,66 +224,6 @@ choose_best_index(_DbName, IndexRanges) -> {SelectedIndex, SelectedIndexRanges}. -view_cb({meta, Meta}, Acc) -> - % Map function starting - put(mango_docs_examined, 0), - set_mango_msg_timestamp(), - ok = rexi:stream2({meta, Meta}), - {ok, Acc}; -view_cb({row, Row}, #mrargs{extra = Options} = Acc) -> - ViewRow = #view_row{ - id = couch_util:get_value(id, Row), - key = couch_util:get_value(key, Row), - doc = couch_util:get_value(doc, Row) - }, - case ViewRow#view_row.doc of - null -> - maybe_send_mango_ping(); - undefined -> - % include_docs=false. Use quorum fetch at coordinator - ok = rexi:stream2(ViewRow), - set_mango_msg_timestamp(); - Doc -> - put(mango_docs_examined, get(mango_docs_examined) + 1), - Selector = couch_util:get_value(selector, Options), - couch_stats:increment_counter([mango, docs_examined]), - case mango_selector:match(Selector, Doc) of - true -> - ok = rexi:stream2(ViewRow), - set_mango_msg_timestamp(); - false -> - maybe_send_mango_ping() - end - end, - {ok, Acc}; -view_cb(complete, Acc) -> - % Send shard-level execution stats - ok = rexi:stream2({execution_stats, {docs_examined, get(mango_docs_examined)}}), - % Finish view output - ok = rexi:stream_last(complete), - {ok, Acc}; -view_cb(ok, ddoc_updated) -> - rexi:reply({ok, ddoc_updated}). - - -maybe_send_mango_ping() -> - Current = os:timestamp(), - LastPing = get(mango_last_msg_timestamp), - % Fabric will timeout if it has not heard a response from a worker node - % after 5 seconds. Send a ping every 4 seconds so the timeout doesn't happen. - case timer:now_diff(Current, LastPing) > ?HEARTBEAT_INTERVAL_IN_USEC of - false -> - ok; - true -> - rexi:ping(), - set_mango_msg_timestamp() - end. - - -set_mango_msg_timestamp() -> - put(mango_last_msg_timestamp, os:timestamp()). - - handle_message({meta, _}, Cursor) -> {ok, Cursor}; handle_message({row, Props}, Cursor) -> @@ -456,34 +394,3 @@ update_bookmark_keys(#cursor{limit = Limit} = Cursor, Props) when Limit > 0 -> }; update_bookmark_keys(Cursor, _Props) -> Cursor. - - -%%%%%%%% module tests below %%%%%%%% - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -does_not_refetch_doc_with_value_test() -> - Cursor = #cursor { - db = <<"db">>, - opts = [], - execution_stats = #execution_stats{}, - selector = mango_selector:normalize({[{<<"user_id">>, <<"1234">>}]}) - }, - RowProps = [ - {id,<<"b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4">>}, - {key,<<"b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4">>}, - {doc,{ - [ - {<<"_id">>,<<"b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4">>}, - {<<"_rev">>,<<"1-a954fe2308f14307756067b0e18c2968">>}, - {<<"user_id">>,11} - ] - }} - ], - {Match, _, _} = doc_member(Cursor, RowProps), - ?assertEqual(Match, ok). - - --endif. -- cgit v1.2.1 From dd4444209aaf43c8b7cc6aa422d1b7eeb96dad26 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 23 Mar 2020 16:04:41 +0200 Subject: Remove quorum stats Removing quorum stats since they are not relevant with FDB. --- src/mango/src/mango_cursor.erl | 5 +---- src/mango/src/mango_execution_stats.erl | 8 -------- src/mango/src/mango_execution_stats.hrl | 1 - 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index f16765b96..db4e98184 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -205,12 +205,9 @@ invalid_index_warning_int(_, _) -> % returned, implying a lot of in-memory filtering index_scan_warning(#execution_stats { totalDocsExamined = Docs, - totalQuorumDocsExamined = DocsQuorum, resultsReturned = ResultCount }) -> - % Docs and DocsQuorum are mutually exclusive so it's safe to sum them - DocsScanned = Docs + DocsQuorum, - Ratio = calculate_index_scan_ratio(DocsScanned, ResultCount), + Ratio = calculate_index_scan_ratio(Docs, ResultCount), Threshold = config:get_integer("mango", "index_scan_warning_threshold", 10), case Threshold > 0 andalso Ratio > Threshold of true -> diff --git a/src/mango/src/mango_execution_stats.erl b/src/mango/src/mango_execution_stats.erl index 5878a3190..fe9d27b90 100644 --- a/src/mango/src/mango_execution_stats.erl +++ b/src/mango/src/mango_execution_stats.erl @@ -18,7 +18,6 @@ incr_keys_examined/1, incr_docs_examined/1, incr_docs_examined/2, - incr_quorum_docs_examined/1, incr_results_returned/1, log_start/1, log_end/1, @@ -33,7 +32,6 @@ to_json(Stats) -> {[ {total_keys_examined, Stats#execution_stats.totalKeysExamined}, {total_docs_examined, Stats#execution_stats.totalDocsExamined}, - {total_quorum_docs_examined, Stats#execution_stats.totalQuorumDocsExamined}, {results_returned, Stats#execution_stats.resultsReturned}, {execution_time_ms, Stats#execution_stats.executionTimeMs} ]}. @@ -55,12 +53,6 @@ incr_docs_examined(Stats, N) -> }. -incr_quorum_docs_examined(Stats) -> - Stats#execution_stats { - totalQuorumDocsExamined = Stats#execution_stats.totalQuorumDocsExamined + 1 - }. - - incr_results_returned(Stats) -> couch_stats:increment_counter([mango, results_returned]), Stats#execution_stats { diff --git a/src/mango/src/mango_execution_stats.hrl b/src/mango/src/mango_execution_stats.hrl index ea5ed5ee8..783c1e7f9 100644 --- a/src/mango/src/mango_execution_stats.hrl +++ b/src/mango/src/mango_execution_stats.hrl @@ -13,7 +13,6 @@ -record(execution_stats, { totalKeysExamined = 0, totalDocsExamined = 0, - totalQuorumDocsExamined = 0, resultsReturned = 0, executionStartTime, executionTimeMs -- cgit v1.2.1 From c483652fee917ae3c6d4065cc6deb28296e40065 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 23 Mar 2020 15:19:44 +0200 Subject: Add mango indexing This uses couch_views_updater to create mango indexes in the doc update along with the couch_views_indexer to update the indexes in the background up to the creation versionstamp. --- rel/overlay/etc/default.ini | 1 + src/couch_eval/src/couch_eval.erl | 3 + src/mango/src/mango_eval.erl | 115 ++++++++++++++++++++++++++++++++++++ src/mango/src/mango_idx.erl | 57 ++++++++++++++---- src/mango/src/mango_idx.hrl | 4 +- src/mango/src/mango_idx_special.erl | 4 +- src/mango/src/mango_idx_view.erl | 22 +++++-- 7 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 src/mango/src/mango_eval.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index fd0aa7763..d2a2c7257 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -355,6 +355,7 @@ os_process_limit = 100 ; beahvior for executing provided code in design ; documents. javascript = couch_js +query = mango_eval [mango] ; Set to true to disable the "index all fields" text index, which can lead diff --git a/src/couch_eval/src/couch_eval.erl b/src/couch_eval/src/couch_eval.erl index 23ca263ab..3541a5b94 100644 --- a/src/couch_eval/src/couch_eval.erl +++ b/src/couch_eval/src/couch_eval.erl @@ -75,6 +75,9 @@ acquire_map_context(DbName, DDocId, Language, Sig, Lib, MapFuns) -> -spec release_map_context(context()) -> ok | {error, any()}. +release_map_context(nil) -> + ok; + release_map_context({ApiMod, Ctx}) -> ApiMod:release_map_context(Ctx). diff --git a/src/mango/src/mango_eval.erl b/src/mango/src/mango_eval.erl new file mode 100644 index 000000000..59d784b49 --- /dev/null +++ b/src/mango/src/mango_eval.erl @@ -0,0 +1,115 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(mango_eval). +-behavior(couch_eval). + + +-export([ + acquire_map_context/1, + release_map_context/1, + map_docs/2 +]). + + +-export([ + index_doc/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include("mango_idx.hrl"). + + +acquire_map_context(Opts) -> + #{ + db_name := DbName, + ddoc_id := DDocId, + map_funs := MapFuns + } = Opts, + Indexes = lists:map(fun (Def) -> + #idx{ + type = <<"json">>, + dbname = DbName, + ddoc = DDocId, + def = Def + } + end, MapFuns), + {ok, Indexes}. + + +release_map_context(_) -> + ok. + + +map_docs(Indexes, Docs) -> + {ok, lists:map(fun(Doc) -> + Json = couch_doc:to_json_obj(Doc, []), + Results = index_doc(Indexes, Json), + {Doc#doc.id, Results} + end, Docs)}. + + +index_doc(Indexes, Doc) -> + lists:map(fun(Idx) -> + {IdxDef} = mango_idx:def(Idx), + Results = get_index_entries(IdxDef, Doc), + case lists:member(not_found, Results) of + true -> + []; + false -> + [{Results, null}] + end + end, Indexes). + + +get_index_entries(IdxDef, Doc) -> + {Fields} = couch_util:get_value(<<"fields">>, IdxDef), + Selector = get_index_partial_filter_selector(IdxDef), + case should_index(Selector, Doc) of + false -> + [not_found]; + true -> + get_index_values(Fields, Doc) + end. + + +get_index_values(Fields, Doc) -> + lists:map(fun({Field, _Dir}) -> + case mango_doc:get_field(Doc, Field) of + not_found -> not_found; + bad_path -> not_found; + Value -> Value + end + end, Fields). + + +get_index_partial_filter_selector(IdxDef) -> + case couch_util:get_value(<<"partial_filter_selector">>, IdxDef, {[]}) of + {[]} -> + % this is to support legacy text indexes that had the + % partial_filter_selector set as selector + couch_util:get_value(<<"selector">>, IdxDef, {[]}); + Else -> + Else + end. + + +should_index(Selector, Doc) -> + NormSelector = mango_selector:normalize(Selector), + Matches = mango_selector:match(NormSelector, Doc), + IsDesign = case mango_doc:get_field(Doc, <<"_id">>) of + <<"_design/", _/binary>> -> true; + _ -> false + end, + Matches and not IsDesign. diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index a26a6851a..ba9f68fd0 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -50,11 +50,35 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). -include("mango_idx.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). list(Db) -> - {ok, Indexes} = ddoc_cache:open(db_to_name(Db), ?MODULE), - Indexes. + DDocs = couch_views_ddoc:get_mango_list(Db), + DbName = fabric2_db:name(Db), + Indexes = lists:foldl(fun(DDoc, Acc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), + + case proplists:get_value(<<"language">>, Props) == <<"query">> of + true -> + {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + + IsInteractive = couch_views_ddoc:is_interactive(DDoc), + BuildState = couch_views_fdb:get_build_status(Db, Mrst), + + Idxs = lists:map(fun(Idx) -> + Idx#idx{ + build_status = BuildState, + interactive = IsInteractive + } + end, from_ddoc(Db, DDoc)), + Acc ++ Idxs; + false -> + Acc + end + + end, [], DDocs), + Indexes ++ special(Db). get_usable_indexes(Db, Selector, Opts) -> @@ -62,13 +86,14 @@ get_usable_indexes(Db, Selector, Opts) -> GlobalIndexes = mango_cursor:remove_indexes_with_partial_filter_selector( ExistingIndexes ), + BuiltIndexes = mango_cursor:remove_unbuilt_indexes(GlobalIndexes), UserSpecifiedIndex = mango_cursor:maybe_filter_indexes_by_ddoc(ExistingIndexes, Opts), - UsableIndexes = lists:usort(GlobalIndexes ++ UserSpecifiedIndex), + UsableIndexes0 = lists:usort(BuiltIndexes ++ UserSpecifiedIndex), SortFields = get_sort_fields(Opts), UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end, - case lists:filter(UsableFilter, UsableIndexes) of + case lists:filter(UsableFilter, UsableIndexes0) of [] -> mango_sort_error(Db, Opts); UsableIndexes -> @@ -162,16 +187,17 @@ delete(Filt, Db, Indexes, DelOpts) -> end. -from_ddoc(Db, {Props}) -> +from_ddoc(Db, #doc{id = DDocId} = DDoc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), DbName = db_to_name(Db), - DDoc = proplists:get_value(<<"_id">>, Props), + DDocId = proplists:get_value(<<"_id">>, Props), case proplists:get_value(<<"language">>, Props) of <<"query">> -> ok; _ -> ?MANGO_ERROR(invalid_query_ddoc_language) end, - IdxMods = case clouseau_rpc:connected() of + IdxMods = case is_text_service_available() of true -> [mango_idx_view, mango_idx_text]; false -> @@ -181,7 +207,7 @@ from_ddoc(Db, {Props}) -> lists:map(fun(Idx) -> Idx#idx{ dbname = DbName, - ddoc = DDoc + ddoc = DDocId } end, Idxs). @@ -192,7 +218,8 @@ special(Db) -> name = <<"_all_docs">>, type = <<"special">>, def = all_docs, - opts = [] + opts = [], + build_status = ?INDEX_READY }, % Add one for _update_seq [AllDocs]. @@ -278,7 +305,7 @@ db_to_name(Name) when is_binary(Name) -> db_to_name(Name) when is_list(Name) -> iolist_to_binary(Name); db_to_name(Db) -> - couch_db:name(Db). + fabric2_db:name(Db). get_idx_def(Opts) -> @@ -293,7 +320,7 @@ get_idx_def(Opts) -> get_idx_type(Opts) -> case proplists:get_value(type, Opts) of <<"json">> -> <<"json">>; - <<"text">> -> case clouseau_rpc:connected() of + <<"text">> -> case is_text_service_available() of true -> <<"text">>; false -> @@ -306,6 +333,11 @@ get_idx_type(Opts) -> end. +is_text_service_available() -> + erlang:function_exported(clouseau_rpc, connected, 0) andalso + clouseau_rpc:connected(). + + get_idx_ddoc(Idx, Opts) -> case proplists:get_value(ddoc, Opts) of <<"_design/", _Rest/binary>> = Name -> @@ -377,7 +409,8 @@ index(SelectorName, Selector) -> <<"Selected">>,<<"json">>, {[{<<"fields">>,{[{<<"location">>,<<"asc">>}]}}, {SelectorName,{Selector}}]}, - [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}] + [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}], + <<"ready">> }. get_partial_filter_all_docs_test() -> diff --git a/src/mango/src/mango_idx.hrl b/src/mango/src/mango_idx.hrl index 712031b75..68e5aaaf0 100644 --- a/src/mango/src/mango_idx.hrl +++ b/src/mango/src/mango_idx.hrl @@ -16,5 +16,7 @@ name, type, def, - opts + opts, + build_status, + interactive }). diff --git a/src/mango/src/mango_idx_special.erl b/src/mango/src/mango_idx_special.erl index ac6efc707..3548372b6 100644 --- a/src/mango/src/mango_idx_special.erl +++ b/src/mango/src/mango_idx_special.erl @@ -28,6 +28,7 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango_idx.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). validate(_) -> @@ -55,7 +56,8 @@ to_json(#idx{def=all_docs}) -> {<<"fields">>, [{[ {<<"_id">>, <<"asc">>} ]}]} - ]}} + ]}}, + {build_status, ?INDEX_READY} ]}. diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 2d784b638..84be4180b 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -54,7 +54,16 @@ add(#doc{body={Props0}}=DDoc, Idx) -> NewView = make_view(Idx), Views2 = lists:keystore(element(1, NewView), 1, Views1, NewView), Props1 = lists:keystore(<<"views">>, 1, Props0, {<<"views">>, {Views2}}), - {ok, DDoc#doc{body={Props1}}}. + + {Opts0} = proplists:get_value(<<"options">>, Props1, {[]}), + Opts1 = case lists:keymember(<<"interactive">>, 1, Opts0) of + true -> Opts0; + false -> Opts0 ++ [{<<"interactive">>, true}] + end, + Props2 = lists:keystore(<<"options">>, 1, Props1, {<<"options">>, {Opts1}}), + + Props3 = [{<<"autoupdate">>, false}], + {ok, DDoc#doc{body={Props2 ++ Props3}}}. remove(#doc{body={Props0}}=DDoc, Idx) -> @@ -68,13 +77,15 @@ remove(#doc{body={Props0}}=DDoc, Idx) -> if Views2 /= Views1 -> ok; true -> ?MANGO_ERROR({index_not_found, Idx#idx.name}) end, - Props1 = case Views2 of + Props3 = case Views2 of [] -> - lists:keydelete(<<"views">>, 1, Props0); + Props1 = lists:keydelete(<<"views">>, 1, Props0), + Props2 = lists:keydelete(<<"options">>, 1, Props1), + lists:keydelete(<<"autoupdate">>, 1, Props2); _ -> lists:keystore(<<"views">>, 1, Props0, {<<"views">>, {Views2}}) end, - {ok, DDoc#doc{body={Props1}}}. + {ok, DDoc#doc{body={Props3}}}. from_ddoc({Props}) -> @@ -104,7 +115,8 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, - {def, {def_to_json(Idx#idx.def)}} + {def, {def_to_json(Idx#idx.def)}}, + {build_status, Idx#idx.build_status} ]}. -- cgit v1.2.1 From 377b0c26c845a5c5152ef493f833889b45625f4a Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 23 Mar 2020 16:21:22 +0200 Subject: Update Mango query to work with couch_views --- src/couch_views/test/couch_views_map_test.erl | 41 +++++++++- src/mango/src/mango_crud.erl | 24 ++---- src/mango/src/mango_cursor.erl | 4 +- src/mango/src/mango_cursor_view.erl | 70 +++++------------ src/mango/src/mango_httpd.erl | 105 ++++++++++++++------------ src/mango/src/mango_idx.erl | 13 +++- src/mango/src/mango_idx_view.erl | 5 +- src/mango/src/mango_idx_view.hrl | 13 ---- src/mango/src/mango_util.erl | 11 +-- 9 files changed, 139 insertions(+), 147 deletions(-) delete mode 100644 src/mango/src/mango_idx_view.hrl diff --git a/src/couch_views/test/couch_views_map_test.erl b/src/couch_views/test/couch_views_map_test.erl index f8ba18319..7d1e94b2c 100644 --- a/src/couch_views/test/couch_views_map_test.erl +++ b/src/couch_views/test/couch_views_map_test.erl @@ -14,6 +14,7 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include("couch_views.hrl"). -define(TDEF(A), {atom_to_list(A), fun A/0}). @@ -56,7 +57,8 @@ map_views_test_() -> ?TDEF(should_map_duplicate_keys), ?TDEF(should_map_with_doc_emit), ?TDEF(should_map_update_is_false), - ?TDEF(should_map_update_is_lazy) + ?TDEF(should_map_update_is_lazy), + ?TDEF(should_map_wait_for_interactive) % fun should_give_ext_size_seq_indexed_test/1 ] } @@ -419,6 +421,25 @@ should_map_update_is_lazy() -> ?assertEqual(Expect, Result2). +should_map_wait_for_interactive() -> + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_interactive_ddoc(), + Docs = make_docs(101), + + fabric2_db:update_docs(Db, Docs), + fabric2_db:update_docs(Db, [DDoc]), + + Result = couch_views:query(Db, DDoc, <<"idx_01">>, fun default_cb/2, [], + #{limit => 3}), + ?assertEqual({ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]} + ]}, Result). + + % should_give_ext_size_seq_indexed_test(Db) -> % DDoc = couch_doc:from_json_obj({[ % {<<"_id">>, <<"_design/seqdoc">>}, @@ -510,6 +531,24 @@ create_ddoc() -> ]}} ]}). +create_interactive_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc_interactive">>}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, << + "function(doc) {" + "if (doc.val) {" + "emit(doc.val, doc.val);" + "}" + "}">>} + ]}} + ]}}, + {<<"autoupdate">>, false}, + {<<"interactive">>, true} + ]}). + make_docs(Count) -> [doc(I) || I <- lists:seq(1, Count)]. diff --git a/src/mango/src/mango_crud.erl b/src/mango/src/mango_crud.erl index 42717ffc8..66cef65b3 100644 --- a/src/mango/src/mango_crud.erl +++ b/src/mango/src/mango_crud.erl @@ -33,9 +33,8 @@ insert(Db, #doc{}=Doc, Opts) -> insert(Db, [Doc], Opts); insert(Db, {_}=Doc, Opts) -> insert(Db, [Doc], Opts); -insert(Db, Docs, Opts0) when is_list(Docs) -> - Opts1 = maybe_add_user_ctx(Db, Opts0), - case fabric:update_docs(Db, Docs, Opts1) of +insert(Db, Docs, Opts) when is_list(Docs) -> + case fabric2_db:update_docs(Db, Docs, Opts) of {ok, Results0} -> {ok, lists:zipwith(fun result_to_json/2, Docs, Results0)}; {accepted, Results0} -> @@ -45,9 +44,8 @@ insert(Db, Docs, Opts0) when is_list(Docs) -> end. -find(Db, Selector, Callback, UserAcc, Opts0) -> - Opts1 = maybe_add_user_ctx(Db, Opts0), - {ok, Cursor} = mango_cursor:create(Db, Selector, Opts1), +find(Db, Selector, Callback, UserAcc, Opts) -> + {ok, Cursor} = mango_cursor:create(Db, Selector, Opts), mango_cursor:execute(Cursor, Callback, UserAcc). @@ -97,21 +95,11 @@ delete(Db, Selector, Options) -> end. -explain(Db, Selector, Opts0) -> - Opts1 = maybe_add_user_ctx(Db, Opts0), - {ok, Cursor} = mango_cursor:create(Db, Selector, Opts1), +explain(Db, Selector, Opts) -> + {ok, Cursor} = mango_cursor:create(Db, Selector, Opts), mango_cursor:explain(Cursor). -maybe_add_user_ctx(Db, Opts) -> - case lists:keyfind(user_ctx, 1, Opts) of - {user_ctx, _} -> - Opts; - false -> - [{user_ctx, couch_db:get_user_ctx(Db)} | Opts] - end. - - result_to_json(#doc{id=Id}, Result) -> result_to_json(Id, Result); result_to_json({Props}, Result) -> diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index db4e98184..63b449cdc 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -48,7 +48,9 @@ create(Db, Selector0, Opts) -> Selector = mango_selector:normalize(Selector0), - UsableIndexes = mango_idx:get_usable_indexes(Db, Selector, Opts), + UsableIndexes = fabric2_fdb:transactional(Db, fun (TxDb) -> + mango_idx:get_usable_indexes(TxDb, Selector, Opts) + end), case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of [] -> % use_index doesn't match a valid index - fall back to a valid one diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index b88f6eaee..4960fa126 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -31,9 +31,7 @@ -include_lib("fabric/include/fabric.hrl"). -include("mango_cursor.hrl"). --include("mango_idx_view.hrl"). --define(HEARTBEAT_INTERVAL_IN_USEC, 4000000). create(Db, Indexes, Selector, Opts) -> FieldRanges = mango_idx_view:field_ranges(Selector), @@ -91,7 +89,8 @@ maybe_replace_max_json(?MAX_STR) -> <<"">>; maybe_replace_max_json([H | T] = EndKey) when is_list(EndKey) -> - H1 = if H == ?MAX_JSON_OBJ -> <<"">>; + MAX_VAL = couch_views_encoding:max(), + H1 = if H == MAX_VAL -> <<"">>; true -> H end, [H1 | maybe_replace_max_json(T)]; @@ -100,7 +99,7 @@ maybe_replace_max_json(EndKey) -> EndKey. -base_args(#cursor{index = Idx, selector = Selector} = Cursor) -> +base_args(#cursor{index = Idx} = Cursor) -> {StartKey, EndKey} = case Cursor#cursor.ranges of [empty] -> {null, null}; @@ -132,18 +131,19 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu #cursor{opts = Opts, bookmark = Bookmark} = Cursor, Args0 = apply_opts(Opts, BaseArgs), Args = mango_json_bookmark:update_args(Bookmark, Args0), - UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), - DbOpts = [{user_ctx, UserCtx}], Result = case mango_idx:def(Idx) of all_docs -> CB = fun ?MODULE:handle_all_docs_message/2, - fabric:all_docs(Db, DbOpts, CB, Cursor, Args); + AllDocOpts = fabric2_util:all_docs_view_opts(Args) + ++ [{restart_tx, true}], + fabric2_db:fold_docs(Db, CB, Cursor, AllDocOpts); _ -> CB = fun ?MODULE:handle_message/2, % Normal view - DDoc = ddocid(Idx), + DDocId = mango_idx:ddoc(Idx), + {ok, DDoc} = fabric2_db:open_doc(Db, DDocId), Name = mango_idx:name(Idx), - fabric:query_view(Db, DbOpts, DDoc, Name, CB, Cursor, Args) + couch_views:query(Db, DDoc, Name, CB, Cursor, Args) end, case Result of {ok, LastCursor} -> @@ -227,7 +227,7 @@ choose_best_index(_DbName, IndexRanges) -> handle_message({meta, _}, Cursor) -> {ok, Cursor}; handle_message({row, Props}, Cursor) -> - case doc_member(Cursor, Props) of + case match_doc(Cursor, Props) of {ok, Doc, {execution_stats, Stats}} -> Cursor1 = Cursor#cursor { execution_stats = Stats @@ -280,15 +280,6 @@ handle_doc(C, _Doc) -> {stop, C}. -ddocid(Idx) -> - case mango_idx:ddoc(Idx) of - <<"_design/", Rest/binary>> -> - Rest; - Else -> - Else - end. - - apply_opts([], Args) -> Args; apply_opts([{conflicts, true} | Rest], Args) -> @@ -340,41 +331,18 @@ apply_opts([{_, _} | Rest], Args) -> apply_opts(Rest, Args). -doc_member(Cursor, RowProps) -> - Db = Cursor#cursor.db, - Opts = Cursor#cursor.opts, - ExecutionStats = Cursor#cursor.execution_stats, - Selector = Cursor#cursor.selector, - case couch_util:get_value(doc, RowProps) of - {DocProps} -> - % only matching documents are returned; the selector - % is evaluated at the shard level in view_cb({row, Row}, - {ok, {DocProps}, {execution_stats, ExecutionStats}}; - undefined -> - % an undefined doc was returned, indicating we should - % perform a quorum fetch - ExecutionStats1 = mango_execution_stats:incr_quorum_docs_examined(ExecutionStats), - couch_stats:increment_counter([mango, quorum_docs_examined]), - Id = couch_util:get_value(id, RowProps), - case mango_util:defer(fabric, open_doc, [Db, Id, Opts]) of - {ok, #doc{}=DocProps} -> - Doc = couch_doc:to_json_obj(DocProps, []), - match_doc(Selector, Doc, ExecutionStats1); - Else -> - Else - end; - _ -> - % no doc, no match - {no_match, null, {execution_stats, ExecutionStats}} - end. - - -match_doc(Selector, Doc, ExecutionStats) -> +match_doc(Cursor, RowProps) -> + #cursor{ + execution_stats = Stats0, + selector = Selector + } = Cursor, + Stats1 = mango_execution_stats:incr_docs_examined(Stats0, 1), + Doc = couch_util:get_value(doc, RowProps), case mango_selector:match(Selector, Doc) of true -> - {ok, Doc, {execution_stats, ExecutionStats}}; + {ok, Doc, {execution_stats, Stats1}}; false -> - {no_match, Doc, {execution_stats, ExecutionStats}} + {no_match, Doc, {execution_stats, Stats1}} end. diff --git a/src/mango/src/mango_httpd.erl b/src/mango/src/mango_httpd.erl index 1054c74bb..94aa866d2 100644 --- a/src/mango/src/mango_httpd.erl +++ b/src/mango/src/mango_httpd.erl @@ -32,9 +32,8 @@ threshold = 1490 }). -handle_req(#httpd{} = Req, Db0) -> +handle_req(#httpd{} = Req, Db) -> try - Db = set_user_ctx(Req, Db0), handle_req_int(Req, Db) catch throw:{mango_error, Module, Reason} -> @@ -61,7 +60,9 @@ handle_req_int(_, _) -> handle_index_req(#httpd{method='GET', path_parts=[_, _]}=Req, Db) -> Params = lists:flatmap(fun({K, V}) -> parse_index_param(K, V) end, chttpd:qs(Req)), - Idxs = lists:sort(mango_idx:list(Db)), + Idxs = fabric2_fdb:transactional(Db, fun(TxDb) -> + lists:sort(mango_idx:list(TxDb)) + end), JsonIdxs0 = lists:map(fun mango_idx:to_json/1, Idxs), TotalRows = length(JsonIdxs0), Limit = case couch_util:get_value(limit, Params, TotalRows) of @@ -87,25 +88,27 @@ handle_index_req(#httpd{method='POST', path_parts=[_, _]}=Req, Db) -> {ok, Idx0} = mango_idx:new(Db, Opts), {ok, Idx} = mango_idx:validate_new(Idx0, Db), DbOpts = [{user_ctx, Req#httpd.user_ctx}, deleted, ejson_body], - {ok, DDoc} = mango_util:load_ddoc(Db, mango_idx:ddoc(Idx), DbOpts), - Id = Idx#idx.ddoc, - Name = Idx#idx.name, - Status = case mango_idx:add(DDoc, Idx) of - {ok, DDoc} -> - <<"exists">>; - {ok, NewDDoc} -> - case mango_crud:insert(Db, NewDDoc, Opts) of - {ok, [{RespProps}]} -> - case lists:keyfind(error, 1, RespProps) of - {error, Reason} -> - ?MANGO_ERROR({error_saving_ddoc, Reason}); - _ -> - <<"created">> - end; - _ -> - ?MANGO_ERROR(error_saving_ddoc) - end - end, + Id = mango_idx:ddoc(Idx), + Name = mango_idx:name(Idx), + Status = fabric2_fdb:transactional(Db, fun(TxDb) -> + {ok, DDoc} = mango_util:load_ddoc(TxDb, Id, DbOpts), + case mango_idx:add(DDoc, Idx) of + {ok, DDoc} -> + <<"exists">>; + {ok, NewDDoc} -> + case mango_crud:insert(TxDb, NewDDoc, Opts) of + {ok, [{RespProps}]} -> + case lists:keyfind(error, 1, RespProps) of + {error, Reason} -> + ?MANGO_ERROR({error_saving_ddoc, Reason}); + _ -> + <<"created">> + end; + _ -> + ?MANGO_ERROR(error_saving_ddoc) + end + end + end), chttpd:send_json(Req, {[{result, Status}, {id, Id}, {name, Name}]}); handle_index_req(#httpd{path_parts=[_, _]}=Req, _Db) -> @@ -118,19 +121,21 @@ handle_index_req(#httpd{method='POST', path_parts=[_, <<"_index">>, <<"_bulk_delete">>]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), {ok, Opts} = mango_opts:validate_bulk_delete(chttpd:json_body_obj(Req)), - Idxs = mango_idx:list(Db), - DDocs = get_bulk_delete_ddocs(Opts), - {Success, Fail} = lists:foldl(fun(DDocId0, {Success0, Fail0}) -> - DDocId = convert_to_design_id(DDocId0), - Filt = fun(Idx) -> mango_idx:ddoc(Idx) == DDocId end, - Id = {<<"id">>, DDocId}, - case mango_idx:delete(Filt, Db, Idxs, Opts) of - {ok, true} -> - {[{[Id, {<<"ok">>, true}]} | Success0], Fail0}; - {error, Error} -> - {Success0, [{[Id, {<<"error">>, Error}]} | Fail0]} - end - end, {[], []}, DDocs), + {Success, Fail} = fabric2_fdb:transactional(Db, fun (TxDb) -> + Idxs = mango_idx:list(TxDb), + DDocs = get_bulk_delete_ddocs(Opts), + lists:foldl(fun(DDocId0, {Success0, Fail0}) -> + DDocId = convert_to_design_id(DDocId0), + Filt = fun(Idx) -> mango_idx:ddoc(Idx) == DDocId end, + Id = {<<"id">>, DDocId}, + case mango_idx:delete(Filt, TxDb, Idxs, Opts) of + {ok, true} -> + {[{[Id, {<<"ok">>, true}]} | Success0], Fail0}; + {error, Error} -> + {Success0, [{[Id, {<<"error">>, Error}]} | Fail0]} + end + end, {[], []}, DDocs) + end), chttpd:send_json(Req, {[{<<"success">>, Success}, {<<"fail">>, Fail}]}); handle_index_req(#httpd{path_parts=[_, <<"_index">>, @@ -144,15 +149,18 @@ handle_index_req(#httpd{method='DELETE', handle_index_req(#httpd{method='DELETE', path_parts=[_, _, DDocId0, Type, Name]}=Req, Db) -> - Idxs = mango_idx:list(Db), - DDocId = convert_to_design_id(DDocId0), - Filt = fun(Idx) -> - IsDDoc = mango_idx:ddoc(Idx) == DDocId, - IsType = mango_idx:type(Idx) == Type, - IsName = mango_idx:name(Idx) == Name, - IsDDoc andalso IsType andalso IsName - end, - case mango_idx:delete(Filt, Db, Idxs, []) of + Result = fabric2_fdb:transactional(Db, fun(TxDb) -> + Idxs = mango_idx:list(TxDb), + DDocId = convert_to_design_id(DDocId0), + Filt = fun(Idx) -> + IsDDoc = mango_idx:ddoc(Idx) == DDocId, + IsType = mango_idx:type(Idx) == Type, + IsName = mango_idx:name(Idx) == Name, + IsDDoc andalso IsType andalso IsName + end, + mango_idx:delete(Filt, TxDb, Idxs, []) + end), + case Result of {ok, true} -> chttpd:send_json(Req, {[{ok, true}]}); {error, not_found} -> @@ -170,7 +178,9 @@ handle_explain_req(#httpd{method='POST'}=Req, Db) -> Body = chttpd:json_body_obj(Req), {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), - Resp = mango_crud:explain(Db, Sel, Opts), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + mango_crud:explain(TxDb, Sel, Opts) + end), chttpd:send_json(Req, Resp); handle_explain_req(Req, _Db) -> @@ -195,11 +205,6 @@ handle_find_req(Req, _Db) -> chttpd:send_method_not_allowed(Req, "POST"). -set_user_ctx(#httpd{user_ctx=Ctx}, Db) -> - {ok, NewDb} = couch_db:set_user_ctx(Db, Ctx), - NewDb. - - get_bulk_delete_ddocs(Opts) -> case lists:keyfind(docids, 1, Opts) of {docids, DDocs} when is_list(DDocs) -> diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index ba9f68fd0..99a35886f 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -86,7 +86,7 @@ get_usable_indexes(Db, Selector, Opts) -> GlobalIndexes = mango_cursor:remove_indexes_with_partial_filter_selector( ExistingIndexes ), - BuiltIndexes = mango_cursor:remove_unbuilt_indexes(GlobalIndexes), + BuiltIndexes = remove_unbuilt_indexes(GlobalIndexes), UserSpecifiedIndex = mango_cursor:maybe_filter_indexes_by_ddoc(ExistingIndexes, Opts), UsableIndexes0 = lists:usort(BuiltIndexes ++ UserSpecifiedIndex), @@ -399,6 +399,17 @@ get_legacy_selector(Def) -> Selector -> Selector end. +% remove any interactive indexes that are not built. If an index is not +% interactive than we do not remove it as it will be built when queried +remove_unbuilt_indexes(Indexes) -> + lists:filter(fun(Idx) -> + case Idx#idx.interactive of + true -> Idx#idx.build_status == ?INDEX_READY; + _ -> true + end + end, Indexes). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 84be4180b..f80cc217b 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -34,7 +34,6 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). -include("mango_idx.hrl"). --include("mango_idx_view.hrl"). validate_new(#idx{}=Idx, _Db) -> @@ -183,11 +182,11 @@ start_key([{'$eq', Key, '$eq', Key} | Rest]) -> end_key([]) -> - [?MAX_JSON_OBJ]; + [couch_views_encoding:max()]; end_key([{_, _, '$lt', Key} | Rest]) -> case mango_json:special(Key) of true -> - [?MAX_JSON_OBJ]; + [couch_views_encoding:max()]; false -> [Key | end_key(Rest)] end; diff --git a/src/mango/src/mango_idx_view.hrl b/src/mango/src/mango_idx_view.hrl deleted file mode 100644 index 0d213e56e..000000000 --- a/src/mango/src/mango_idx_view.hrl +++ /dev/null @@ -1,13 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --define(MAX_JSON_OBJ, {<<255, 255, 255, 255>>}). \ No newline at end of file diff --git a/src/mango/src/mango_util.erl b/src/mango/src/mango_util.erl index 0d31f15f9..18a643985 100644 --- a/src/mango/src/mango_util.erl +++ b/src/mango/src/mango_util.erl @@ -85,14 +85,7 @@ open_doc(Db, DocId) -> open_doc(Db, DocId, Options) -> - case mango_util:defer(fabric, open_doc, [Db, DocId, Options]) of - {ok, Doc} -> - {ok, Doc}; - {not_found, _} -> - not_found; - _ -> - ?MANGO_ERROR({error_loading_doc, DocId}) - end. + fabric2_db:open_doc(Db, DocId, Options). open_ddocs(Db) -> @@ -111,7 +104,7 @@ load_ddoc(Db, DDocId, DbOpts) -> case open_doc(Db, DDocId, DbOpts) of {ok, Doc} -> {ok, check_lang(Doc)}; - not_found -> + {not_found, missing} -> Body = {[ {<<"language">>, <<"query">>} ]}, -- cgit v1.2.1 From 1bc7c54ac2aee5302ce53a7a0c41867495f34704 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 31 Mar 2020 11:39:32 +0200 Subject: Update mango test creds to same as elixir tests --- Makefile | 2 +- src/mango/test/README.md | 4 ++-- src/mango/test/mango.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index b3eb64c99..9622c0ea0 100644 --- a/Makefile +++ b/Makefile @@ -349,7 +349,7 @@ mango-test: devclean all @cd src/mango && \ python3 -m venv .venv && \ .venv/bin/python3 -m pip install -r requirements.txt - @cd src/mango && ../../dev/run "$(TEST_OPTS)" -n 1 --admin=testuser:testpass '.venv/bin/python3 -m nose --with-xunit' + @cd src/mango && ../../dev/run "$(TEST_OPTS)" -n 1 --admin=adm:pass '.venv/bin/python3 -m nose --with-xunit' ################################################################################ # Developing diff --git a/src/mango/test/README.md b/src/mango/test/README.md index 509e32e47..08693a2e1 100644 --- a/src/mango/test/README.md +++ b/src/mango/test/README.md @@ -23,7 +23,7 @@ Test configuration The following environment variables can be used to configure the test fixtures: * `COUCH_HOST` - root url (including port) of the CouchDB instance to run the tests against. Default is `"http://127.0.0.1:15984"`. - * `COUCH_USER` - CouchDB username (with admin premissions). Default is `"testuser"`. - * `COUCH_PASSWORD` - CouchDB password. Default is `"testpass"`. + * `COUCH_USER` - CouchDB username (with admin premissions). Default is `"adm"`. + * `COUCH_PASSWORD` - CouchDB password. Default is `"pass"`. * `COUCH_AUTH_HEADER` - Optional Authorization header value. If specified, this is used instead of basic authentication with the username/password variables above. * `MANGO_TEXT_INDEXES` - Set to `"1"` to run the tests only applicable to text indexes. diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index 638de4787..db0fab057 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -48,8 +48,8 @@ class Database(object): dbname, host="127.0.0.1", port="15984", - user="testuser", - password="testpass", + user="adm", + password="pass", ): root_url = get_from_environment("COUCH_HOST", "http://{}:{}".format(host, port)) auth_header = get_from_environment("COUCH_AUTH_HEADER", None) -- cgit v1.2.1 From 29ef7fb37007821edd8a0673226090cd47d48bab Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 31 Mar 2020 11:39:58 +0200 Subject: update mango tests to work with Mango on FDB --- Makefile | 5 ++- src/mango/test/01-index-crud-test.py | 15 ++++++++ src/mango/test/02-basic-find-test.py | 15 -------- src/mango/test/05-index-selection-test.py | 7 +++- src/mango/test/12-use-correct-index-test.py | 38 +++++++++++-------- src/mango/test/13-stable-update-test.py | 51 -------------------------- src/mango/test/13-users-db-find-test.py | 7 +++- src/mango/test/15-execution-stats-test.py | 7 +--- src/mango/test/17-multi-type-value-test.py | 4 +- src/mango/test/19-find-conflicts.py | 7 ++-- src/mango/test/20-no-timeout-test.py | 32 ---------------- src/mango/test/22-build-wait-selected-index.py | 50 +++++++++++++++++++++++++ src/mango/test/mango.py | 20 +++++++--- src/mango/test/user_docs.py | 7 ++-- 14 files changed, 129 insertions(+), 136 deletions(-) delete mode 100644 src/mango/test/13-stable-update-test.py delete mode 100644 src/mango/test/20-no-timeout-test.py create mode 100644 src/mango/test/22-build-wait-selected-index.py diff --git a/Makefile b/Makefile index 9622c0ea0..cc33c61d8 100644 --- a/Makefile +++ b/Makefile @@ -160,9 +160,10 @@ endif .PHONY: check-fdb check-fdb: - make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric + make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs make exunit tests=src/couch_rate/test/exunit/ + make mango-test .PHONY: eunit # target: eunit - Run EUnit tests, use EUNIT_OPTS to provide custom options @@ -349,7 +350,7 @@ mango-test: devclean all @cd src/mango && \ python3 -m venv .venv && \ .venv/bin/python3 -m pip install -r requirements.txt - @cd src/mango && ../../dev/run "$(TEST_OPTS)" -n 1 --admin=adm:pass '.venv/bin/python3 -m nose --with-xunit' + @cd src/mango && ../../dev/run "$(TEST_OPTS)" -n 1 --admin=adm:pass --erlang-config=rel/files/eunit.config '.venv/bin/python3 -m nose -v --with-xunit' ################################################################################ # Developing diff --git a/src/mango/test/01-index-crud-test.py b/src/mango/test/01-index-crud-test.py index b60239992..13ae300dd 100644 --- a/src/mango/test/01-index-crud-test.py +++ b/src/mango/test/01-index-crud-test.py @@ -113,6 +113,21 @@ class IndexCrudTests(mango.DbPerClass): return raise AssertionError("index not created") + def test_ignore_design_docs(self): + fields = ["baz", "foo"] + ret = self.db.create_index(fields, name="idx_02") + assert ret is True + self.db.save_doc({ + "_id": "_design/ignore", + "views": { + "view1": { + "map": "function (doc) { emit(doc._id, 1)}" + } + } + }) + indexes = self.db.list_indexes() + self.assertEqual(len(indexes), 2) + def test_read_idx_doc(self): self.db.create_index(["foo", "bar"], name="idx_01") self.db.create_index(["hello", "bar"]) diff --git a/src/mango/test/02-basic-find-test.py b/src/mango/test/02-basic-find-test.py index afdba03a2..2a03a3a55 100644 --- a/src/mango/test/02-basic-find-test.py +++ b/src/mango/test/02-basic-find-test.py @@ -100,16 +100,6 @@ class BasicFindTests(mango.UserDocsTests): else: raise AssertionError("bad find") - def test_bad_r(self): - bad_rs = ([None, True, False, 1.2, "no limit!", {"foo": "bar"}, [2]],) - for br in bad_rs: - try: - self.db.find({"int": {"$gt": 2}}, r=br) - except Exception as e: - assert e.response.status_code == 400 - else: - raise AssertionError("bad find") - def test_bad_conflicts(self): bad_conflicts = ([None, 1.2, "no limit!", {"foo": "bar"}, [2]],) for bc in bad_conflicts: @@ -262,11 +252,6 @@ class BasicFindTests(mango.UserDocsTests): assert sorted(d.keys()) == ["location", "user_id"] assert sorted(d["location"].keys()) == ["address"] - def test_r(self): - for r in [1, 2, 3]: - docs = self.db.find({"age": {"$gt": 0}}, r=r) - assert len(docs) == 15 - def test_empty(self): docs = self.db.find({}) # 15 users diff --git a/src/mango/test/05-index-selection-test.py b/src/mango/test/05-index-selection-test.py index cb4d32986..bae3d58f1 100644 --- a/src/mango/test/05-index-selection-test.py +++ b/src/mango/test/05-index-selection-test.py @@ -14,6 +14,8 @@ import mango import user_docs import unittest +import requests + class IndexSelectionTests: def test_basic(self): @@ -201,8 +203,11 @@ class IndexSelectionTests: } }, } - with self.assertRaises(KeyError): + try: self.db.save_doc(design_doc) + assert False, "Should not get here." + except requests.exceptions.HTTPError as e: + self.assertEqual(e.response.json()['error'], 'invalid_design_doc') def test_explain_sort_reverse(self): selector = {"manager": {"$gt": None}} diff --git a/src/mango/test/12-use-correct-index-test.py b/src/mango/test/12-use-correct-index-test.py index c21ad6c5e..a7f07b5e8 100644 --- a/src/mango/test/12-use-correct-index-test.py +++ b/src/mango/test/12-use-correct-index-test.py @@ -54,36 +54,41 @@ class ChooseCorrectIndexForDocs(mango.DbPerClass): self.db.save_docs(copy.deepcopy(DOCS)) def test_choose_index_with_one_field_in_index(self): - self.db.create_index(["name", "age", "user_id"], ddoc="aaa") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "age", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie"}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/zzz") def test_choose_index_with_two(self): - self.db.create_index(["name", "age", "user_id"], ddoc="aaa") - self.db.create_index(["name", "age"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "age", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name", "age"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "age": {"$gte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/bbb") def test_choose_index_alphabetically(self): - self.db.create_index(["name"], ddoc="aaa") - self.db.create_index(["name"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "age": {"$gte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/aaa") def test_choose_index_most_accurate(self): - self.db.create_index(["name", "age", "user_id"], ddoc="aaa") - self.db.create_index(["name", "age"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "age", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name", "age"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "age": {"$gte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/bbb") def test_choose_index_most_accurate_in_memory_selector(self): - self.db.create_index(["name", "location", "user_id"], ddoc="aaa") - self.db.create_index(["name", "age", "user_id"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "location", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name", "age", "user_id"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "number": {"$lte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/zzz") @@ -100,8 +105,9 @@ class ChooseCorrectIndexForDocs(mango.DbPerClass): def test_chooses_idxA(self): DOCS2 = [{"a": 1, "b": 1, "c": 1}, {"a": 1000, "d": 1000, "e": 1000}] self.db.save_docs(copy.deepcopy(DOCS2)) - self.db.create_index(["a", "b", "c"]) - self.db.create_index(["a", "d", "e"]) + self.db.create_index(["a", "b", "c"], wait_for_built_index=False) + self.db.create_index(["a", "d", "e"], wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find( {"a": {"$gt": 0}, "b": {"$gt": 0}, "c": {"$gt": 0}}, explain=True ) diff --git a/src/mango/test/13-stable-update-test.py b/src/mango/test/13-stable-update-test.py deleted file mode 100644 index 303f3fab1..000000000 --- a/src/mango/test/13-stable-update-test.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -import copy -import mango - -DOCS1 = [ - { - "_id": "54af50626de419f5109c962f", - "user_id": 0, - "age": 10, - "name": "Jimi", - "location": "UK", - "number": 4, - }, - { - "_id": "54af50622071121b25402dc3", - "user_id": 1, - "age": 12, - "name": "Eddie", - "location": "ZAR", - "number": 2, - }, -] - - -class SupportStableAndUpdate(mango.DbPerClass): - def setUp(self): - self.db.recreate() - # Hack to prevent auto-indexer from foiling update=False test - # https://github.com/apache/couchdb/issues/2313 - self.db.save_doc( - {"_id": "_design/foo", "language": "query", "autoupdate": False} - ) - self.db.create_index(["name"], ddoc="foo") - self.db.save_docs(copy.deepcopy(DOCS1)) - - def test_update_updates_view_when_specified(self): - docs = self.db.find({"name": "Eddie"}, update=False) - assert len(docs) == 0 - docs = self.db.find({"name": "Eddie"}, update=True) - assert len(docs) == 1 diff --git a/src/mango/test/13-users-db-find-test.py b/src/mango/test/13-users-db-find-test.py index 73d15ea1a..9f9b53a81 100644 --- a/src/mango/test/13-users-db-find-test.py +++ b/src/mango/test/13-users-db-find-test.py @@ -12,10 +12,15 @@ # the License. -import mango, requests +import mango, requests, unittest +# Re-enable once the _users db is implemented class UsersDbFindTests(mango.UsersDbTests): + @classmethod + def setUpClass(klass): + raise unittest.SkipTest("Re-enable once the _users db is implemented") + def test_simple_find(self): docs = self.db.find({"name": {"$eq": "demo02"}}) assert len(docs) == 1 diff --git a/src/mango/test/15-execution-stats-test.py b/src/mango/test/15-execution-stats-test.py index 537a19add..6ccc04b44 100644 --- a/src/mango/test/15-execution-stats-test.py +++ b/src/mango/test/15-execution-stats-test.py @@ -22,7 +22,6 @@ class ExecutionStatsTests(mango.UserDocsTests): self.assertEqual(len(resp["docs"]), 3) self.assertEqual(resp["execution_stats"]["total_keys_examined"], 0) self.assertEqual(resp["execution_stats"]["total_docs_examined"], 3) - self.assertEqual(resp["execution_stats"]["total_quorum_docs_examined"], 0) self.assertEqual(resp["execution_stats"]["results_returned"], 3) # See https://github.com/apache/couchdb/issues/1732 # Erlang os:timestamp() only has ms accuracy on Windows! @@ -35,12 +34,11 @@ class ExecutionStatsTests(mango.UserDocsTests): def test_quorum_json_index(self): resp = self.db.find( - {"age": {"$lt": 35}}, return_raw=True, r=3, executionStats=True + {"age": {"$lt": 35}}, return_raw=True, executionStats=True ) self.assertEqual(len(resp["docs"]), 3) self.assertEqual(resp["execution_stats"]["total_keys_examined"], 0) - self.assertEqual(resp["execution_stats"]["total_docs_examined"], 0) - self.assertEqual(resp["execution_stats"]["total_quorum_docs_examined"], 3) + self.assertEqual(resp["execution_stats"]["total_docs_examined"], 3) self.assertEqual(resp["execution_stats"]["results_returned"], 3) # See https://github.com/apache/couchdb/issues/1732 # Erlang os:timestamp() only has ms accuracy on Windows! @@ -70,7 +68,6 @@ class ExecutionStatsTests_Text(mango.UserDocsTextTests): self.assertEqual(len(resp["docs"]), 1) self.assertEqual(resp["execution_stats"]["total_keys_examined"], 0) self.assertEqual(resp["execution_stats"]["total_docs_examined"], 1) - self.assertEqual(resp["execution_stats"]["total_quorum_docs_examined"], 0) self.assertEqual(resp["execution_stats"]["results_returned"], 1) self.assertGreater(resp["execution_stats"]["execution_time_ms"], 0) diff --git a/src/mango/test/17-multi-type-value-test.py b/src/mango/test/17-multi-type-value-test.py index 21e7afda4..5a8fcedef 100644 --- a/src/mango/test/17-multi-type-value-test.py +++ b/src/mango/test/17-multi-type-value-test.py @@ -53,9 +53,9 @@ class MultiValueFieldTests: class MultiValueFieldJSONTests(mango.DbPerClass, MultiValueFieldTests): def setUp(self): self.db.recreate() + self.db.create_index(["name"], wait_for_built_index=False) + self.db.create_index(["age", "name"], wait_for_built_index=True) self.db.save_docs(copy.deepcopy(DOCS)) - self.db.create_index(["name"]) - self.db.create_index(["age", "name"]) # @unittest.skipUnless(mango.has_text_service(), "requires text service") diff --git a/src/mango/test/19-find-conflicts.py b/src/mango/test/19-find-conflicts.py index bf865d6ea..3bf3c0693 100644 --- a/src/mango/test/19-find-conflicts.py +++ b/src/mango/test/19-find-conflicts.py @@ -12,11 +12,12 @@ import mango import copy +import unittest -DOC = [{"_id": "doc", "a": 2}] +DOC = [{"_id": "doc", "a": 2}, {"_id": "doc1", "b": 2}] CONFLICT = [{"_id": "doc", "_rev": "1-23202479633c2b380f79507a776743d5", "a": 1}] - +CONFLICT2 = [{"_id": "doc1", "_rev": "1-23202479633c2b380f79507a776743d5", "b": 1}] class ChooseCorrectIndexForDocs(mango.DbPerClass): def setUp(self): @@ -25,7 +26,7 @@ class ChooseCorrectIndexForDocs(mango.DbPerClass): self.db.save_docs_with_conflicts(copy.deepcopy(CONFLICT)) def test_retrieve_conflicts(self): - self.db.create_index(["_conflicts"]) + self.db.create_index(["_conflicts"], wait_for_built_index=False) result = self.db.find({"_conflicts": {"$exists": True}}, conflicts=True) self.assertEqual( result[0]["_conflicts"][0], "1-23202479633c2b380f79507a776743d5" diff --git a/src/mango/test/20-no-timeout-test.py b/src/mango/test/20-no-timeout-test.py deleted file mode 100644 index cffdfc335..000000000 --- a/src/mango/test/20-no-timeout-test.py +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -import mango -import copy -import unittest - - -class LongRunningMangoTest(mango.DbPerClass): - def setUp(self): - self.db.recreate() - docs = [] - for i in range(100000): - docs.append({"_id": str(i), "another": "field"}) - if i % 20000 == 0: - self.db.save_docs(docs) - docs = [] - - # This test should run to completion and not timeout - def test_query_does_not_time_out(self): - selector = {"_id": {"$gt": 0}, "another": "wrong"} - docs = self.db.find(selector) - self.assertEqual(len(docs), 0) diff --git a/src/mango/test/22-build-wait-selected-index.py b/src/mango/test/22-build-wait-selected-index.py new file mode 100644 index 000000000..fd856f4d6 --- /dev/null +++ b/src/mango/test/22-build-wait-selected-index.py @@ -0,0 +1,50 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import mango +import copy +import unittest + + +class BuildAndWaitOnSelectedIndex(mango.DbPerClass): + def setUp(self): + self.db.recreate() + docs = [] + for i in range(1000): + docs.append({"_id": str(i), "val": i}) + if len(docs) == 250: + self.db.save_docs(docs) + docs = [] + + def test_wait_for_query(self): + self.db.create_index(["val"], ddoc="my-ddoc", wait_for_built_index=False) + + explain = self.db.find({'val': {"$gt": 990}}, use_index="my-ddoc", explain=True) + self.assertEqual(explain["index"]["ddoc"], "_design/my-ddoc") + + docs = self.db.find({'val': {"$gte": 990}}, limit=10) + + self.assertEqual(len(docs), 10) + + def test_dont_wait(self): + self.db.create_index(["val"], ddoc="my-ddoc", wait_for_built_index=False) + + explain = self.db.find({'val': {"$gt": 990}}, explain=True) + self.assertEqual(explain["index"]["name"], "_all_docs") + + docs = self.db.find({'val': {"$gte": 990}}) + self.assertEqual(len(docs), 10) + + def test_update_false(self): + self.db.create_index(["val"], ddoc="my-ddoc", wait_for_built_index=False) + docs = self.db.find({'val': {"$gte": 990}}, update=False, use_index="my-ddoc") + self.assertEqual(docs, []) diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index db0fab057..e013e0e1e 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -139,8 +139,9 @@ class Database(object): ddoc=None, partial_filter_selector=None, selector=None, + wait_for_built_index=True, ): - body = {"index": {"fields": fields}, "type": idx_type, "w": 3} + body = {"index": {"fields": fields}, "type": idx_type} if name is not None: body["name"] = name if ddoc is not None: @@ -156,13 +157,22 @@ class Database(object): assert r.json()["name"] is not None created = r.json()["result"] == "created" - if created: - # wait until the database reports the index as available - while len(self.get_index(r.json()["id"], r.json()["name"])) < 1: - delay(t=0.1) + if created and wait_for_built_index: + # wait until the database reports the index as available and build + while True: + idx = self.get_index(r.json()["id"], r.json()["name"])[0] + if idx["build_status"] == "ready": + break + delay(t=0.2) return created + def wait_for_built_indexes(self): + while True: + if all(idx["build_status"] == "ready" for idx in self.list_indexes()): + break + delay(t=0.2) + def create_text_index( self, analyzer=None, diff --git a/src/mango/test/user_docs.py b/src/mango/test/user_docs.py index 8f0ed2e04..d69e6d652 100644 --- a/src/mango/test/user_docs.py +++ b/src/mango/test/user_docs.py @@ -61,12 +61,11 @@ def setup_users(db, **kwargs): def setup(db, index_type="view", **kwargs): db.recreate() - db.save_docs(copy.deepcopy(DOCS)) if index_type == "view": add_view_indexes(db, kwargs) elif index_type == "text": add_text_indexes(db, kwargs) - + db.save_docs(copy.deepcopy(DOCS)) def add_view_indexes(db, kwargs): indexes = [ @@ -90,7 +89,9 @@ def add_view_indexes(db, kwargs): (["ordered"], "ordered"), ] for (idx, name) in indexes: - assert db.create_index(idx, name=name, ddoc=name) is True + assert db.create_index(idx, name=name, ddoc=name, + wait_for_built_index=False) is True + db.wait_for_built_indexes() def add_text_indexes(db, kwargs): -- cgit v1.2.1 From 5652e72e43406b7e4b743ee3fe7e2570aec77e95 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 31 Mar 2020 15:26:03 +0200 Subject: remove defer and load_ddocs from mango_utils Clean up unused mango_utils functions. --- src/mango/src/mango_idx.erl | 16 --------------- src/mango/src/mango_util.erl | 47 -------------------------------------------- 2 files changed, 63 deletions(-) diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index 99a35886f..9907cc781 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -19,7 +19,6 @@ -export([ list/1, - recover/1, new/2, validate_new/2, @@ -105,21 +104,6 @@ mango_sort_error(_Db, _Opts) -> ?MANGO_ERROR({no_usable_index, missing_sort_index}). -recover(Db) -> - {ok, DDocs0} = mango_util:open_ddocs(Db), - Pred = fun({Props}) -> - case proplists:get_value(<<"language">>, Props) of - <<"query">> -> true; - _ -> false - end - end, - DDocs = lists:filter(Pred, DDocs0), - Special = special(Db), - {ok, Special ++ lists:flatmap(fun(Doc) -> - from_ddoc(Db, Doc) - end, DDocs)}. - - get_sort_fields(Opts) -> case lists:keyfind(sort, 1, Opts) of {sort, Sort} -> diff --git a/src/mango/src/mango_util.erl b/src/mango/src/mango_util.erl index 18a643985..d649f95f1 100644 --- a/src/mango/src/mango_util.erl +++ b/src/mango/src/mango_util.erl @@ -15,13 +15,9 @@ -export([ open_doc/2, - open_ddocs/1, load_ddoc/2, load_ddoc/3, - defer/3, - do_defer/3, - assert_ejson/1, to_lower/1, @@ -88,15 +84,6 @@ open_doc(Db, DocId, Options) -> fabric2_db:open_doc(Db, DocId, Options). -open_ddocs(Db) -> - case mango_util:defer(fabric, design_docs, [Db]) of - {ok, Docs} -> - {ok, Docs}; - _ -> - ?MANGO_ERROR(error_loading_ddocs) - end. - - load_ddoc(Db, DDocId) -> load_ddoc(Db, DDocId, [deleted, ejson_body]). @@ -112,40 +99,6 @@ load_ddoc(Db, DDocId, DbOpts) -> end. -defer(Mod, Fun, Args) -> - {Pid, Ref} = erlang:spawn_monitor(?MODULE, do_defer, [Mod, Fun, Args]), - receive - {'DOWN', Ref, process, Pid, {mango_defer_ok, Value}} -> - Value; - {'DOWN', Ref, process, Pid, {mango_defer_throw, Value}} -> - erlang:throw(Value); - {'DOWN', Ref, process, Pid, {mango_defer_error, Value}} -> - erlang:error(Value); - {'DOWN', Ref, process, Pid, {mango_defer_exit, Value}} -> - erlang:exit(Value) - end. - - -do_defer(Mod, Fun, Args) -> - try erlang:apply(Mod, Fun, Args) of - Resp -> - erlang:exit({mango_defer_ok, Resp}) - catch - throw:Error -> - Stack = erlang:get_stacktrace(), - couch_log:error("Defered error: ~w~n ~p", [{throw, Error}, Stack]), - erlang:exit({mango_defer_throw, Error}); - error:Error -> - Stack = erlang:get_stacktrace(), - couch_log:error("Defered error: ~w~n ~p", [{error, Error}, Stack]), - erlang:exit({mango_defer_error, Error}); - exit:Error -> - Stack = erlang:get_stacktrace(), - couch_log:error("Defered error: ~w~n ~p", [{exit, Error}, Stack]), - erlang:exit({mango_defer_exit, Error}) - end. - - assert_ejson({Props}) -> assert_ejson_obj(Props); assert_ejson(Vals) when is_list(Vals) -> -- cgit v1.2.1 From 2ba98a89cda88ccc9be2b4e4fb481086d1364e42 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 6 Apr 2020 17:39:22 -0400 Subject: Return better responses for endpoints which are not implemented Endpoints which are removed return a 410 response: - _show - _list - _rewrite Endpoints which will be implemented in CouchDB 4.x eventually now return a 510 response: - _purge - _purge_infos_limit Endpoints which return a 2xx but are a no-op effectively: - _compact - _view_cleanup --- src/chttpd/src/chttpd_db.erl | 21 +++++++-------------- src/chttpd/src/chttpd_httpd_handlers.erl | 32 +++++++++++++++++++++++++++----- src/chttpd/src/chttpd_view.erl | 5 +---- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index e9b33f001..deaca4855 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -252,21 +252,14 @@ maybe_flush_changes_feed(Acc0, Data, Len) -> }, {ok, Acc}. -handle_compact_req(#httpd{method='POST'}=Req, Db) -> + +% Return the same response as if a compaction succeeded even though _compaction +% isn't a valid operation in CouchDB >= 4.x anymore. This is mostly to not +% break existing user script which maybe periodically call this endpoint. In +% the future this endpoint will return a 410 response then it will be removed. +handle_compact_req(#httpd{method='POST'}=Req, _Db) -> chttpd:validate_ctype(Req, "application/json"), - case Req#httpd.path_parts of - [_DbName, <<"_compact">>] -> - ok = fabric:compact(Db), - send_json(Req, 202, {[{ok, true}]}); - [DbName, <<"_compact">>, DesignName | _] -> - case ddoc_cache:open(DbName, <<"_design/", DesignName/binary>>) of - {ok, _DDoc} -> - ok = fabric:compact(Db, DesignName), - send_json(Req, 202, {[{ok, true}]}); - Error -> - throw(Error) - end - end; + send_json(Req, 202, {[{ok, true}]}); handle_compact_req(Req, _Db) -> send_method_not_allowed(Req, "POST"). diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 831c014b3..be6c0a13e 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -14,6 +14,13 @@ -export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). +-export([ + not_supported/2, + not_supported/3, + not_implemented/2 +]). + + -include_lib("couch/include/couch_db.hrl"). @@ -32,20 +39,22 @@ url_handler(<<"_session">>) -> fun chttpd_auth:handle_session_req/1; url_handler(<<"_up">>) -> fun chttpd_misc:handle_up_req/1; url_handler(_) -> no_match. -db_handler(<<"_view_cleanup">>) -> fun chttpd_db:handle_view_cleanup_req/2; +db_handler(<<"_view_cleanup">>) -> fun ?MODULE:not_implemented/2; db_handler(<<"_compact">>) -> fun chttpd_db:handle_compact_req/2; db_handler(<<"_design">>) -> fun chttpd_db:handle_design_req/2; db_handler(<<"_partition">>) -> fun chttpd_db:handle_partition_req/2; -db_handler(<<"_temp_view">>) -> fun chttpd_view:handle_temp_view_req/2; +db_handler(<<"_temp_view">>) -> fun ?MODULE:not_supported/2; db_handler(<<"_changes">>) -> fun chttpd_db:handle_changes_req/2; +db_handler(<<"_purge">>) -> fun ?MODULE:not_implemented/2; +db_handler(<<"_purged_infos_limit">>) -> fun ?MODULE:not_implemented/2; db_handler(_) -> no_match. design_handler(<<"_view">>) -> fun chttpd_view:handle_view_req/3; -design_handler(<<"_show">>) -> fun chttpd_show:handle_doc_show_req/3; -design_handler(<<"_list">>) -> fun chttpd_show:handle_view_list_req/3; +design_handler(<<"_show">>) -> fun ?MODULE:not_supported/3; +design_handler(<<"_list">>) -> fun ?MODULE:not_supported/3; design_handler(<<"_update">>) -> fun chttpd_show:handle_doc_update_req/3; design_handler(<<"_info">>) -> fun chttpd_db:handle_design_info_req/3; -design_handler(<<"_rewrite">>) -> fun chttpd_rewrite:handle_rewrite_req/3; +design_handler(<<"_rewrite">>) -> fun ?MODULE:not_supported/3; design_handler(_) -> no_match. @@ -484,3 +493,16 @@ get_copy_destination(Req) -> unknown end. + +not_supported(#httpd{} = Req, Db, _DDoc) -> + not_supported(Req, Db). + + +not_supported(#httpd{} = Req, _Db) -> + Msg = <<"resource is not supported in CouchDB >= 4.x">>, + chttpd:send_error(Req, 410, gone, Msg). + + +not_implemented(#httpd{} = Req, _Db) -> + Msg = <<"resouce is not implemented">>, + chttpd:send_error(Req, 501, not_implemented, Msg). diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index 49ca1a793..84997e5a5 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -14,7 +14,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). --export([handle_view_req/3, handle_temp_view_req/2]). +-export([handle_view_req/3]). multi_query_view(Req, Db, DDoc, ViewName, Queries) -> Args0 = couch_mrview_http:parse_params(Req, undefined), @@ -101,9 +101,6 @@ handle_view_req(#httpd{method='POST', handle_view_req(Req, _Db, _DDoc) -> chttpd:send_method_not_allowed(Req, "GET,POST,HEAD"). -handle_temp_view_req(Req, _Db) -> - Msg = <<"Temporary views are not supported in CouchDB">>, - chttpd:send_error(Req, 410, gone, Msg). % See https://github.com/apache/couchdb/issues/2168 assert_no_queries_param(undefined) -> -- cgit v1.2.1 From 1be2363b4dae31ab67d6bafd3f5a2620f5d784eb Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 6 Apr 2020 17:46:45 -0400 Subject: Fix POST _all_docs/queries endpoint Call couch_views module instead of the old fabric:query_view also needed to call `view_cb(complete, ...)` when using keys similar to how `all_docs_view/4` does it. --- src/chttpd/src/chttpd_db.erl | 4 +++- src/chttpd/src/chttpd_view.erl | 5 ++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index deaca4855..16cfbd0df 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -831,7 +831,9 @@ multi_all_docs_view(Req, Db, OP, Queries) -> (#mrargs{keys = undefined} = Args, Acc0) -> send_all_docs(Db, Args, Acc0); (#mrargs{keys = Keys} = Args, Acc0) when is_list(Keys) -> - send_all_docs_keys(Db, Args, Acc0) + Acc1 = send_all_docs_keys(Db, Args, Acc0), + {ok, Acc2} = view_cb(complete, Acc1), + Acc2 end, VAcc0, ArgQueries), {ok, Resp1} = chttpd:send_delayed_chunk(VAcc1#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index 84997e5a5..3ef9c2408 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -26,14 +26,13 @@ multi_query_view(Req, Db, DDoc, ViewName, Queries) -> QueryArg1 = couch_mrview_util:set_view_type(QueryArg, ViewName, Views), fabric_util:validate_args(Db, DDoc, QueryArg1) end, Queries), - Options = [{user_ctx, Req#httpd.user_ctx}], VAcc0 = #vacc{db=Db, req=Req, prepend="\r\n"}, FirstChunk = "{\"results\":[", {ok, Resp0} = chttpd:start_delayed_json_response(VAcc0#vacc.req, 200, [], FirstChunk), VAcc1 = VAcc0#vacc{resp=Resp0}, VAcc2 = lists:foldl(fun(Args, Acc0) -> - {ok, Acc1} = fabric:query_view(Db, Options, DDoc, ViewName, - fun view_cb/2, Acc0, Args), + Fun = fun view_cb/2, + {ok, Acc1} = couch_views:query(Db, DDoc, ViewName, Fun, Acc0, Args), Acc1 end, VAcc1, ArgQueries), {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), -- cgit v1.2.1 From e71a77d5c8bf9394a4130d98e59e8de2795ba4b3 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 6 Apr 2020 17:48:59 -0400 Subject: Do not allow editing _security in _user database It should only be allowed if explicitly configured. Previously we did not propertly match on the database name and effectively always allowed it. --- src/chttpd/src/chttpd_db.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 16cfbd0df..730cf3ef5 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1963,7 +1963,7 @@ extract_header_rev(Req, ExplicitRev) -> end. validate_security_can_be_edited(DbName) -> - UserDbName = config:get("chttpd_auth", "authentication_db", "_users"), + UserDbName = ?l2b(config:get("chttpd_auth", "authentication_db", "_users")), CanEditUserSecurityObject = config:get("couchdb","users_db_security_editable","false"), case {DbName,CanEditUserSecurityObject} of {UserDbName,"false"} -> -- cgit v1.2.1 From 1d6799f5239af5e36d089ae605f943a13bb4ed99 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 6 Apr 2020 17:53:40 -0400 Subject: Start running chttpd eunit tests Enable running all chttpd tests. Some fixes needed for this to happen are: * Some tests were not valid (checking shard maps, etc) and were deleted * Some tests were disabled either because functionality is not implemented yet or simply to minimize the diff between 3.x and this branch for when we have to rebase * Some applications used for index querying had to be started explicitly * Mock updated to use new version of modules instead of old ones --- Makefile | 2 +- src/chttpd/src/chttpd_test_util.erl | 2 +- src/chttpd/src/chttpd_view.erl | 10 +- .../eunit/chttpd_db_bulk_get_multipart_test.erl | 31 +++--- src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl | 30 +++--- src/chttpd/test/eunit/chttpd_db_test.erl | 38 ++----- src/chttpd/test/eunit/chttpd_dbs_info_test.erl | 13 ++- .../test/eunit/chttpd_open_revs_error_test.erl | 112 --------------------- src/chttpd/test/eunit/chttpd_purge_tests.erl | 6 +- src/chttpd/test/eunit/chttpd_security_tests.erl | 57 ++++++----- src/chttpd/test/eunit/chttpd_view_test.erl | 4 +- src/couch/src/couch_db.erl | 2 + 12 files changed, 92 insertions(+), 215 deletions(-) delete mode 100644 src/chttpd/test/eunit/chttpd_open_revs_error_test.erl diff --git a/Makefile b/Makefile index cc33c61d8..592093ea3 100644 --- a/Makefile +++ b/Makefile @@ -160,7 +160,7 @@ endif .PHONY: check-fdb check-fdb: - make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango + make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs make exunit tests=src/couch_rate/test/exunit/ make mango-test diff --git a/src/chttpd/src/chttpd_test_util.erl b/src/chttpd/src/chttpd_test_util.erl index a1a08eff4..8930a5a5e 100644 --- a/src/chttpd/src/chttpd_test_util.erl +++ b/src/chttpd/src/chttpd_test_util.erl @@ -21,7 +21,7 @@ start_couch() -> start_couch(?CONFIG_CHAIN). start_couch(IniFiles) -> - test_util:start_couch(IniFiles, [chttpd]). + test_util:start_couch(IniFiles, [couch_js, couch_views, chttpd]). stop_couch(Ctx) -> test_util:stop_couch(Ctx). diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index 3ef9c2408..c9340fbe2 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -136,7 +136,7 @@ check_multi_query_reduce_view_overrides_test_() -> t_check_include_docs_throw_validation_error() -> ?_test(begin Req = #httpd{qs = []}, - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, Query = {[{<<"include_docs">>, true}]}, Throw = {query_parse_error, <<"`include_docs` is invalid for reduce">>}, ?assertThrow(Throw, multi_query_view(Req, Db, ddoc, <<"v">>, [Query])) @@ -146,7 +146,7 @@ t_check_include_docs_throw_validation_error() -> t_check_user_can_override_individual_query_type() -> ?_test(begin Req = #httpd{qs = []}, - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, Query = {[{<<"include_docs">>, true}, {<<"reduce">>, false}]}, multi_query_view(Req, Db, ddoc, <<"v">>, [Query]), ?assertEqual(1, meck:num_calls(chttpd, start_delayed_json_response, '_')) @@ -157,7 +157,7 @@ setup_all() -> Views = [#mrview{reduce_funs = [{<<"v">>, <<"_count">>}]}], meck:expect(couch_mrview_util, ddoc_to_mrst, 2, {ok, #mrst{views = Views}}), meck:expect(chttpd, start_delayed_json_response, 4, {ok, resp}), - meck:expect(fabric, query_view, 7, {ok, #vacc{}}), + meck:expect(couch_views, query, 6, {ok, #vacc{}}), meck:expect(chttpd, send_delayed_chunk, 2, {ok, resp}), meck:expect(chttpd, end_delayed_json_response, 1, ok). @@ -169,8 +169,8 @@ teardown_all(_) -> setup() -> meck:reset([ chttpd, - couch_mrview_util, - fabric + couch_views, + couch_mrview_util ]). diff --git a/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl b/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl index 86a8eab1a..c0095d59d 100644 --- a/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl @@ -39,7 +39,7 @@ setup() -> couch_epi, couch_httpd, couch_stats, - fabric, + fabric2_db, mochireq ]), spawn_accumulator(). @@ -78,13 +78,13 @@ bulk_get_test_() -> should_require_docs_field(_) -> Req = fake_request({[{}]}), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, ?_assertThrow({bad_request, _}, chttpd_db:db_req(Req, Db)). should_not_accept_specific_query_params(_) -> Req = fake_request({[{<<"docs">>, []}]}), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, lists:map(fun (Param) -> {Param, ?_assertThrow({bad_request, _}, begin BadReq = Req#httpd{qs = [{Param, ""}]}, @@ -95,7 +95,7 @@ should_not_accept_specific_query_params(_) -> should_return_empty_results_on_no_docs(Pid) -> Req = fake_request({[{<<"docs">>, []}]}), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, chttpd_db:db_req(Req, Db), Results = get_results_from_response(Pid), ?_assertEqual([], Results). @@ -104,7 +104,7 @@ should_return_empty_results_on_no_docs(Pid) -> should_get_doc_with_all_revs(Pid) -> DocId = <<"docudoc">>, Req = fake_request(DocId), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, DocRevA = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-ABC">>}]}}, DocRevB = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-CDE">>}]}}, @@ -120,7 +120,7 @@ should_validate_doc_with_bad_id(Pid) -> DocId = <<"_docudoc">>, Req = fake_request(DocId), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), @@ -137,7 +137,7 @@ should_validate_doc_with_bad_rev(Pid) -> Rev = <<"revorev">>, Req = fake_request(DocId, Rev), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), @@ -154,7 +154,7 @@ should_validate_missing_doc(Pid) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, mock_open_revs([{1,<<"revorev">>}], {ok, []}), chttpd_db:db_req(Req, Db), @@ -172,7 +172,7 @@ should_validate_bad_atts_since(Pid) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev, <<"badattsince">>), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, mock_open_revs([{1,<<"revorev">>}], {ok, []}), chttpd_db:db_req(Req, Db), @@ -190,14 +190,13 @@ should_include_attachments_when_atts_since_specified(_) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev, [<<"1-abc">>]), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, mock_open_revs([{1,<<"revorev">>}], {ok, []}), chttpd_db:db_req(Req, Db), - ?_assert(meck:called(fabric, open_revs, - ['_', DocId, [{1, <<"revorev">>}], - [{atts_since, [{1, <<"abc">>}]}, attachments, - {user_ctx, undefined}]])). + Options = [{atts_since, [{1, <<"abc">>}]}, attachments], + ?_assert(meck:called(fabric2_db, open_doc_revs, ['_', DocId, + [{1, <<"revorev">>}], Options])). %% helpers @@ -217,7 +216,7 @@ fake_request(DocId, Rev, AttsSince) -> mock_open_revs(RevsReq0, RevsResp) -> - ok = meck:expect(fabric, open_revs, + ok = meck:expect(fabric2_db, open_doc_revs, fun(_, _, RevsReq1, _) -> ?assertEqual(RevsReq0, RevsReq1), RevsResp @@ -259,7 +258,7 @@ mock(couch_stats) -> ok = meck:expect(couch_stats, update_gauge, fun(_, _) -> ok end), ok; mock(fabric) -> - ok = meck:new(fabric, [passthrough]), + ok = meck:new(fabric2_db, [passthrough]), ok; mock(config) -> ok = meck:new(config, [passthrough]), diff --git a/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl b/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl index 1a3411254..0e4778371 100644 --- a/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl @@ -99,7 +99,8 @@ should_get_doc_with_all_revs(Pid) -> DocRevB = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-CDE">>}]}}, mock_open_revs(all, {ok, [{ok, DocRevA}, {ok, DocRevB}]}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -119,7 +120,8 @@ should_validate_doc_with_bad_id(Pid) -> DocId = <<"_docudoc">>, Req = fake_request(DocId), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -142,7 +144,8 @@ should_validate_doc_with_bad_rev(Pid) -> Rev = <<"revorev">>, Req = fake_request(DocId, Rev), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -166,7 +169,8 @@ should_validate_missing_doc(Pid) -> Req = fake_request(DocId, Rev), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -190,7 +194,8 @@ should_validate_bad_atts_since(Pid) -> Req = fake_request(DocId, Rev, <<"badattsince">>), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -214,12 +219,13 @@ should_include_attachments_when_atts_since_specified(_) -> Req = fake_request(DocId, Rev, [<<"1-abc">>]), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), + + Options = [{atts_since, [{1, <<"abc">>}]}, attachments], + ?_assert(meck:called(fabric2_db, open_doc_revs, ['_', DocId, + [{1, <<"revorev">>}], Options])). - ?_assert(meck:called(fabric, open_revs, - ['_', DocId, [{1, <<"revorev">>}], - [{atts_since, [{1, <<"abc">>}]}, attachments, - {user_ctx, undefined}]])). %% helpers @@ -239,7 +245,7 @@ fake_request(DocId, Rev, AttsSince) -> mock_open_revs(RevsReq0, RevsResp) -> - ok = meck:expect(fabric, open_revs, + ok = meck:expect(fabric2_db, open_doc_revs, fun(_, _, RevsReq1, _) -> ?assertEqual(RevsReq0, RevsReq1), RevsResp @@ -276,7 +282,7 @@ mock(couch_stats) -> ok = meck:expect(couch_stats, update_gauge, fun(_, _) -> ok end), ok; mock(fabric) -> - ok = meck:new(fabric, [passthrough]), + ok = meck:new(fabric2_db, [passthrough]), ok; mock(config) -> ok = meck:new(config, [passthrough]), diff --git a/src/chttpd/test/eunit/chttpd_db_test.erl b/src/chttpd/test/eunit/chttpd_db_test.erl index 204332d7f..b1d514a4f 100644 --- a/src/chttpd/test/eunit/chttpd_db_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_test.erl @@ -73,8 +73,7 @@ all_test_() -> fun should_return_update_seq_when_set_on_all_docs/1, fun should_not_return_update_seq_when_unset_on_all_docs/1, fun should_return_correct_id_on_doc_copy/1, - fun should_return_400_for_bad_engine/1, - fun should_not_change_db_proper_after_rewriting_shardmap/1, + fun should_ignore_engine_parameter/1, fun should_succeed_on_all_docs_with_queries_keys/1, fun should_succeed_on_all_docs_with_queries_limit_skip/1, fun should_succeed_on_all_docs_with_multiple_queries/1, @@ -282,7 +281,7 @@ attachment_doc() -> ]}. -should_return_400_for_bad_engine(_) -> +should_ignore_engine_parameter(_) -> {timeout, ?TIMEOUT, ?_test(begin TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), @@ -290,30 +289,7 @@ should_return_400_for_bad_engine(_) -> BaseUrl = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), Url = BaseUrl ++ "?engine=cowabunga", {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), - ?assertEqual(400, Status) - end)}. - - -should_not_change_db_proper_after_rewriting_shardmap(_) -> - {timeout, ?TIMEOUT, ?_test(begin - TmpDb = ?tempdb(), - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - AdmPort = mochiweb_socket_server:get(couch_httpd, port), - - BaseUrl = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), - Url = BaseUrl ++ "?partitioned=true&q=1", - {ok, 201, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), - - ShardDbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - {ok, ShardDb} = mem3_util:ensure_exists(ShardDbName), - {ok, #doc{body = {Props}}} = couch_db:open_doc( - ShardDb, TmpDb, [ejson_body]), - Shards = mem3_util:build_shards(TmpDb, Props), - - {Prop2} = ?JSON_DECODE(?JSON_ENCODE({Props})), - Shards2 = mem3_util:build_shards(TmpDb, Prop2), - ?assertEqual(Shards2, Shards) + ?assertEqual(201, Status) end)}. @@ -341,7 +317,7 @@ should_succeed_on_all_docs_with_queries_limit_skip(Url) -> {ResultJson} = ?JSON_DECODE(RespBody), ResultJsonBody = couch_util:get_value(<<"results">>, ResultJson), {InnerJson} = lists:nth(1, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson))) end)}. @@ -359,7 +335,7 @@ should_succeed_on_all_docs_with_multiple_queries(Url) -> {InnerJson1} = lists:nth(1, ResultJsonBody), ?assertEqual(2, length(couch_util:get_value(<<"rows">>, InnerJson1))), {InnerJson2} = lists:nth(2, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson2)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson2)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson2))) end)}. @@ -389,7 +365,7 @@ should_succeed_on_design_docs_with_queries_limit_skip(Url) -> {ResultJson} = ?JSON_DECODE(RespBody), ResultJsonBody = couch_util:get_value(<<"results">>, ResultJson), {InnerJson} = lists:nth(1, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson))) end)}. @@ -407,7 +383,7 @@ should_succeed_on_design_docs_with_multiple_queries(Url) -> {InnerJson1} = lists:nth(1, ResultJsonBody), ?assertEqual(2, length(couch_util:get_value(<<"rows">>, InnerJson1))), {InnerJson2} = lists:nth(2, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson2)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson2)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson2))) end)}. diff --git a/src/chttpd/test/eunit/chttpd_dbs_info_test.erl b/src/chttpd/test/eunit/chttpd_dbs_info_test.erl index 5b61d8831..6e11f3245 100644 --- a/src/chttpd/test/eunit/chttpd_dbs_info_test.erl +++ b/src/chttpd/test/eunit/chttpd_dbs_info_test.erl @@ -57,7 +57,7 @@ dbs_info_test_() -> foreach, fun setup/0, fun teardown/1, [ - fun should_return_error_for_get_db_info/1, + fun should_return_for_get_db_info/1, fun should_return_dbs_info_for_single_db/1, fun should_return_dbs_info_for_multiple_dbs/1, fun should_return_error_for_exceeded_keys/1, @@ -69,15 +69,14 @@ dbs_info_test_() -> }. -should_return_error_for_get_db_info(Url) -> +should_return_for_get_db_info(Url) -> ?_test(begin {ok, Code, _, ResultBody} = test_request:get(Url ++ "/_dbs_info?" - ++ "keys=[\"db1\"]", [?CONTENT_JSON, ?AUTH]), - {Body} = jiffy:decode(ResultBody), + ++ "start_key=\"db1\"&end_key=\"db1\"", [?CONTENT_JSON, ?AUTH]), + Body = jiffy:decode(ResultBody, [return_maps]), [ - ?assertEqual(<<"method_not_allowed">>, - couch_util:get_value(<<"error">>, Body)), - ?assertEqual(405, Code) + ?assertEqual(200, Code), + ?assertMatch([#{<<"db_name">> := <<"db1">>}], Body) ] end). diff --git a/src/chttpd/test/eunit/chttpd_open_revs_error_test.erl b/src/chttpd/test/eunit/chttpd_open_revs_error_test.erl deleted file mode 100644 index d53d370f8..000000000 --- a/src/chttpd/test/eunit/chttpd_open_revs_error_test.erl +++ /dev/null @@ -1,112 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(chttpd_open_revs_error_test). - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). - --define(USER, "chttpd_db_test_admin"). --define(PASS, "pass"). --define(AUTH, {basic_auth, {?USER, ?PASS}}). --define(CONTENT_JSON, {"Content-Type", "application/json"}). --define(CONTENT_MULTI_FORM, {"Content-Type", - "multipart/form-data;boundary=\"bound\""}). - -setup() -> - Hashed = couch_passwords:hash_admin_password(?PASS), - ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), - TmpDb = ?tempdb(), - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), - mock(fabric), - create_db(Url), - Url. - -teardown(Url) -> - delete_db(Url), - (catch meck:unload(fabric)), - ok = config:delete("admins", ?USER, _Persist=false). - -create_db(Url) -> - {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), - ?assert(Status =:= 201 orelse Status =:= 202). - - -create_doc(Url, Id) -> - test_request:put(Url ++ "/" ++ Id, - [?CONTENT_JSON, ?AUTH], "{\"mr\": \"rockoartischocko\"}"). - -delete_db(Url) -> - {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). - -open_revs_error_test_() -> - { - "open revs error tests", - { - setup, - fun chttpd_test_util:start_couch/0, - fun chttpd_test_util:stop_couch/1, - { - foreach, - fun setup/0, fun teardown/1, - [ - fun should_return_503_error_for_open_revs_get/1, - fun should_return_503_error_for_open_revs_post_form/1 - ] - } - } - }. - -should_return_503_error_for_open_revs_get(Url) -> - {ok, _, _, Body} = create_doc(Url, "testdoc"), - {Json} = ?JSON_DECODE(Body), - Ref = couch_util:get_value(<<"rev">>, Json, undefined), - mock_open_revs({error, all_workers_died}), - {ok, Code, _, _} = test_request:get(Url ++ - "/testdoc?rev=" ++ ?b2l(Ref), [?AUTH]), - ?_assertEqual(503, Code). - -should_return_503_error_for_open_revs_post_form(Url) -> - Port = mochiweb_socket_server:get(chttpd, port), - Host = lists:concat([ "http://127.0.0.1:", Port]), - Referer = {"Referer", Host}, - Body1 = "{\"body\":\"This is a body.\"}", - DocBeg = "--bound\r\nContent-Disposition: form-data; name=\"_doc\"\r\n\r\n", - DocRev = "--bound\r\nContent-Disposition: form-data; name=\"_rev\"\r\n\r\n", - DocRest = "\r\n--bound\r\nContent-Disposition:" - "form-data; name=\"_attachments\"; filename=\"file.txt\"\r\n" - "Content-Type: text/plain\r\n\r\ncontents of file.txt\r\n\r\n" - "--bound--", - Doc1 = lists:concat([DocBeg, Body1, DocRest]), - {ok, _, _, ResultBody} = test_request:post(Url ++ "/" ++ "RevDoc", - [?CONTENT_MULTI_FORM, ?AUTH, Referer], Doc1), - {Json} = ?JSON_DECODE(ResultBody), - Ref = couch_util:get_value(<<"rev">>, Json, undefined), - Doc2 = lists:concat([DocRev, ?b2l(Ref) , DocRest]), - - mock_open_revs({error, all_workers_died}), - {ok, Code, _, ResultBody1} = test_request:post(Url ++ "/" ++ "RevDoc", - [?CONTENT_MULTI_FORM, ?AUTH, Referer], Doc2), - {Json1} = ?JSON_DECODE(ResultBody1), - ErrorMessage = couch_util:get_value(<<"error">>, Json1), - [ - ?_assertEqual(503, Code), - ?_assertEqual(<<"service unvailable">>, ErrorMessage) - ]. - -mock_open_revs(RevsResp) -> - ok = meck:expect(fabric, open_revs, fun(_, _, _, _) -> RevsResp end). - -mock(fabric) -> - ok = meck:new(fabric, [passthrough]). diff --git a/src/chttpd/test/eunit/chttpd_purge_tests.erl b/src/chttpd/test/eunit/chttpd_purge_tests.erl index ab435682a..bc1fce0cd 100644 --- a/src/chttpd/test/eunit/chttpd_purge_tests.erl +++ b/src/chttpd/test/eunit/chttpd_purge_tests.erl @@ -13,6 +13,10 @@ -module(chttpd_purge_tests). +% Remove when purge is implemented +-compile(nowarn_unused_function). + + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). @@ -62,7 +66,7 @@ delete_db(Url) -> {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). -purge_test_() -> +purge_test_disabled() -> { "chttpd db tests", { diff --git a/src/chttpd/test/eunit/chttpd_security_tests.erl b/src/chttpd/test/eunit/chttpd_security_tests.erl index 0bea9dbcd..8085f82a0 100644 --- a/src/chttpd/test/eunit/chttpd_security_tests.erl +++ b/src/chttpd/test/eunit/chttpd_security_tests.erl @@ -12,6 +12,9 @@ -module(chttpd_security_tests). +% Remove when purge is implemented +-compile(nowarn_unused_function). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). @@ -38,14 +41,13 @@ setup() -> ok = config:set("admins", ?USER, ?b2l(Hashed), Persist), UserDb = ?tempdb(), TmpDb = ?tempdb(), - ok = config:set("chttpd_auth", "authentication_db", ?b2l(UserDb), Persist), - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), BaseUrl = lists:concat(["http://", Addr, ":", Port, "/"]), - Url = lists:concat([BaseUrl, ?b2l(TmpDb)]), UsersUrl = lists:concat([BaseUrl, ?b2l(UserDb)]), create_db(UsersUrl), + ok = config:set("chttpd_auth", "authentication_db", ?b2l(UserDb), Persist), + Url = lists:concat([BaseUrl, ?b2l(TmpDb)]), create_db(Url), create_design_doc(Url), create_user(UsersUrl,?TEST_MEMBER,?TEST_MEMBER_PASS,[<>]), @@ -56,6 +58,7 @@ setup() -> teardown([Url,UsersUrl]) -> delete_db(Url), delete_db(UsersUrl), + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), ok = config:delete("admins", ?USER, _Persist=false). create_db(Url) -> @@ -108,15 +111,21 @@ all_test_() -> fun should_disallow_db_member_db_compaction/1, fun should_allow_db_admin_db_compaction/1, fun should_allow_admin_view_compaction/1, - fun should_disallow_anonymous_view_compaction/1, - fun should_allow_admin_db_view_cleanup/1, - fun should_disallow_anonymous_db_view_cleanup/1, - fun should_allow_admin_purge/1, - fun should_disallow_anonymous_purge/1, - fun should_disallow_db_member_purge/1, - fun should_allow_admin_purged_infos_limit/1, - fun should_disallow_anonymous_purged_infos_limit/1, - fun should_disallow_db_member_purged_infos_limit/1 + fun should_disallow_anonymous_view_compaction/1 + + % Re-enable when _view_cleanup is implemented + % + %fun should_allow_admin_db_view_cleanup/1, + %fun should_disallow_anonymous_db_view_cleanup/1, + + % Re-enable when purge is implemented + % + %fun should_allow_admin_purge/1, + %fun should_disallow_anonymous_purge/1, + %fun should_disallow_db_member_purge/1, + %fun should_allow_admin_purged_infos_limit/1, + %fun should_disallow_anonymous_purged_infos_limit/1, + %fun should_disallow_db_member_purged_infos_limit/1 ] } } @@ -337,13 +346,11 @@ should_return_error_for_sec_obj_with_incorrect_roles_and_names( Body = jiffy:encode({SecurityProperties}), {ok, Status, _, RespBody} = test_request:put(SecurityUrl, [?CONTENT_JSON, ?AUTH], Body), - ResultJson = ?JSON_DECODE(RespBody), + ResultJson = couch_util:json_decode(RespBody, [return_maps]), + ExpectReason = <<"names must be a JSON list of strings">>, [ ?_assertEqual(500, Status), - ?_assertEqual({[ - {<<"error">>,<<"error">>}, - {<<"reason">>,<<"no_majority">>} - ]}, ResultJson) + ?_assertMatch(#{<<"reason">> := ExpectReason}, ResultJson) ]. should_return_error_for_sec_obj_with_incorrect_roles([Url,_UsersUrl]) -> @@ -356,13 +363,11 @@ should_return_error_for_sec_obj_with_incorrect_roles([Url,_UsersUrl]) -> Body = jiffy:encode({SecurityProperties}), {ok, Status, _, RespBody} = test_request:put(SecurityUrl, [?CONTENT_JSON, ?AUTH], Body), - ResultJson = ?JSON_DECODE(RespBody), + ResultJson = couch_util:json_decode(RespBody, [return_maps]), + ExpectReason = <<"roles must be a JSON list of strings">>, [ ?_assertEqual(500, Status), - ?_assertEqual({[ - {<<"error">>,<<"error">>}, - {<<"reason">>,<<"no_majority">>} - ]}, ResultJson) + ?_assertMatch(#{<<"reason">> := ExpectReason}, ResultJson) ]. should_return_error_for_sec_obj_with_incorrect_names([Url,_UsersUrl]) -> @@ -375,13 +380,11 @@ should_return_error_for_sec_obj_with_incorrect_names([Url,_UsersUrl]) -> Body = jiffy:encode({SecurityProperties}), {ok, Status, _, RespBody} = test_request:put(SecurityUrl, [?CONTENT_JSON, ?AUTH], Body), - ResultJson = ?JSON_DECODE(RespBody), + ResultJson = couch_util:json_decode(RespBody, [return_maps]), + ExpectReason = <<"names must be a JSON list of strings">>, [ ?_assertEqual(500, Status), - ?_assertEqual({[ - {<<"error">>,<<"error">>}, - {<<"reason">>,<<"no_majority">>} - ]}, ResultJson) + ?_assertMatch(#{<<"reason">> := ExpectReason}, ResultJson) ]. should_return_error_for_sec_obj_in_user_db([_,_UsersUrl]) -> diff --git a/src/chttpd/test/eunit/chttpd_view_test.erl b/src/chttpd/test/eunit/chttpd_view_test.erl index 4c224bb4e..1744f97a1 100644 --- a/src/chttpd/test/eunit/chttpd_view_test.erl +++ b/src/chttpd/test/eunit/chttpd_view_test.erl @@ -99,7 +99,7 @@ should_succeed_on_view_with_queries_limit_skip(Url) -> {ResultJson} = ?JSON_DECODE(RespBody), ResultJsonBody = couch_util:get_value(<<"results">>, ResultJson), {InnerJson} = lists:nth(1, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson))) end)}. @@ -119,6 +119,6 @@ should_succeed_on_view_with_multiple_queries(Url) -> {InnerJson1} = lists:nth(1, ResultJsonBody), ?assertEqual(2, length(couch_util:get_value(<<"rows">>, InnerJson1))), {InnerJson2} = lists:nth(2, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson2)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson2)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson2))) end)}. diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index e1d726dc9..8a7b2181e 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -212,6 +212,8 @@ is_db(_) -> is_system_db(#db{options = Options}) -> lists:member(sys_db, Options). +is_clustered(#{}) -> + true; is_clustered(#db{main_pid = nil}) -> true; is_clustered(#db{}) -> -- cgit v1.2.1 From 0d1cf6115bcd9d3bd7f63032988c2a569997a3ae Mon Sep 17 00:00:00 2001 From: jiangph Date: Tue, 7 Apr 2020 08:35:05 +0800 Subject: Support soft-deletion in fabric level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of automatically and immediately removing data and index in database after a delete operation, soft-deletion allows to restore the deleted data back to original state due to a “fat finger”or undesired delete operation, up to defined periods, such as 48 hours. Co-Authored-By: Paul J. Davis --- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric2_db.erl | 147 ++++++++++++++++++- src/fabric/src/fabric2_fdb.erl | 132 ++++++++++++++++- src/fabric/src/fabric2_util.erl | 16 ++ src/fabric/test/fabric2_db_crud_tests.erl | 233 ++++++++++++++++++++++++++++++ 5 files changed, 515 insertions(+), 14 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 0c0757567..e12762260 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -22,6 +22,7 @@ -define(CLUSTER_CONFIG, 0). -define(ALL_DBS, 1). -define(DB_HCA, 2). +-define(DELETED_DBS, 3). -define(DBS, 15). -define(TX_IDS, 255). diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index fb6ae5176..3d6d9245e 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -17,6 +17,7 @@ create/2, open/2, delete/2, + undelete/4, list_dbs/0, list_dbs/1, @@ -26,6 +27,10 @@ list_dbs_info/1, list_dbs_info/3, + list_deleted_dbs_info/0, + list_deleted_dbs_info/1, + list_deleted_dbs_info/3, + check_is_admin/1, check_is_member/1, @@ -202,12 +207,30 @@ delete(DbName, Options) -> % Delete doesn't check user_ctx, that's done at the HTTP API level % here we just care to get the `database_does_not_exist` error thrown Options1 = lists:keystore(user_ctx, 1, Options, ?ADMIN_CTX), - {ok, Db} = open(DbName, Options1), - Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:delete(TxDb) - end), - if Resp /= ok -> Resp; true -> - fabric2_server:remove(DbName) + case lists:keyfind(deleted_at, 1, Options1) of + {deleted_at, TimeStamp} -> + fabric2_fdb:transactional(DbName, Options1, fun(TxDb) -> + fabric2_fdb:remove_deleted_db(TxDb, TimeStamp) + end); + false -> + {ok, Db} = open(DbName, Options1), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:delete(TxDb) + end), + if Resp /= ok -> Resp; true -> + fabric2_server:remove(DbName) + end + end. + + +undelete(DbName, TgtDbName, TimeStamp, Options) -> + case validate_dbname(TgtDbName) of + ok -> + fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> + fabric2_fdb:undelete(TxDb, TgtDbName, TimeStamp) + end); + Error -> + Error end. @@ -283,6 +306,87 @@ list_dbs_info(UserFun, UserAcc0, Options) -> end). +list_deleted_dbs_info() -> + list_deleted_dbs_info([]). + + +list_deleted_dbs_info(Options) -> + Callback = fun(Value, Acc) -> + NewAcc = case Value of + {meta, _} -> Acc; + {row, DbInfo} -> [DbInfo | Acc]; + complete -> Acc + end, + {ok, NewAcc} + end, + {ok, DbInfos} = list_deleted_dbs_info(Callback, [], Options), + {ok, lists:reverse(DbInfos)}. + + +list_deleted_dbs_info(UserFun, UserAcc0, Options0) -> + Dir = fabric2_util:get_value(dir, Options0, fwd), + StartKey0 = fabric2_util:get_value(start_key, Options0), + EndKey0 = fabric2_util:get_value(end_key, Options0), + + {FirstBinary, LastBinary} = case Dir of + fwd -> {<<>>, <<255>>}; + rev -> {<<255>>, <<>>} + end, + + StartKey1 = case StartKey0 of + undefined -> + {FirstBinary}; + DbName0 when is_binary(DbName0) -> + {DbName0, FirstBinary}; + [DbName0, TimeStamp0] when is_binary(DbName0), is_binary(TimeStamp0) -> + {DbName0, TimeStamp0}; + BadStartKey -> + erlang:error({invalid_start_key, BadStartKey}) + end, + EndKey1 = case EndKey0 of + undefined -> + {LastBinary}; + DbName1 when is_binary(DbName1) -> + {DbName1, LastBinary}; + [DbName1, TimeStamp1] when is_binary(DbName1), is_binary(TimeStamp1) -> + {DbName1, TimeStamp1}; + BadEndKey -> + erlang:error({invalid_end_key, BadEndKey}) + end, + + Options1 = Options0 -- [{start_key, StartKey0}, {end_key, EndKey0}], + Options2 = [ + {start_key, StartKey1}, + {end_key, EndKey1}, + {wrap_keys, false} + ] ++ Options1, + + FoldFun = fun(DbName, TimeStamp, InfoFuture, {FutureQ, Count, Acc}) -> + NewFutureQ = queue:in({DbName, TimeStamp, InfoFuture}, FutureQ), + drain_deleted_info_futures(NewFutureQ, Count + 1, UserFun, Acc) + end, + fabric2_fdb:transactional(fun(Tx) -> + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + InitAcc = {queue:new(), 0, UserAcc1}, + {FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_deleted_dbs_info( + Tx, + FoldFun, + InitAcc, + Options2 + ), + UserAcc3 = drain_all_deleted_info_futures( + FinalFutureQ, + UserFun, + UserAcc2 + ), + {ok, maybe_stop(UserFun(complete, UserAcc3))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + is_admin(Db, {SecProps}) when is_list(SecProps) -> case fabric2_db_plugin:check_is_admin(Db) of true -> @@ -1064,6 +1168,37 @@ drain_all_info_futures(FutureQ, UserFun, Acc) -> end. +drain_deleted_info_futures(FutureQ, Count, _UserFun, Acc) when Count < 100 -> + {FutureQ, Count, Acc}; + +drain_deleted_info_futures(FutureQ, Count, UserFun, Acc) when Count >= 100 -> + {{value, {DbName, TimeStamp, Future}}, RestQ} = queue:out(FutureQ), + BaseProps = fabric2_fdb:get_info_wait(Future), + DeletedProps = BaseProps ++ [ + {deleted, true}, + {timestamp, TimeStamp} + ], + DbInfo = make_db_info(DbName, DeletedProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + {RestQ, Count - 1, NewAcc}. + + +drain_all_deleted_info_futures(FutureQ, UserFun, Acc) -> + case queue:out(FutureQ) of + {{value, {DbName, TimeStamp, Future}}, RestQ} -> + BaseProps = fabric2_fdb:get_info_wait(Future), + DeletedProps = BaseProps ++ [ + {deleted, true}, + {timestamp, TimeStamp} + ], + DbInfo = make_db_info(DbName, DeletedProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + drain_all_deleted_info_futures(RestQ, UserFun, NewAcc); + {empty, _} -> + Acc + end. + + new_revid(Db, Doc) -> #doc{ id = DocId, diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 2295a5648..430693329 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -22,12 +22,15 @@ open/2, ensure_current/1, delete/1, + undelete/3, + remove_deleted_db/2, exists/1, get_dir/1, list_dbs/4, list_dbs_info/4, + list_deleted_dbs_info/4, get_info/1, get_info_future/2, @@ -340,18 +343,70 @@ reopen(#{} = OldDb) -> delete(#{} = Db) -> + DoRecovery = fabric2_util:do_recovery(), + case DoRecovery of + true -> soft_delete_db(Db); + false -> hard_delete_db(Db) + end. + + +undelete(#{} = Db0, TgtDbName, TimeStamp) -> #{ name := DbName, tx := Tx, - layer_prefix := LayerPrefix, - db_prefix := DbPrefix - } = ensure_current(Db), + layer_prefix := LayerPrefix + } = ensure_current(Db0, false), + DbKey = erlfdb_tuple:pack({?ALL_DBS, TgtDbName}, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DbKey)) of + Bin when is_binary(Bin) -> + file_exists; + not_found -> + DeletedDbTupleKey = { + ?DELETED_DBS, + DbName, + TimeStamp + }, + DeleteDbKey = erlfdb_tuple:pack(DeletedDbTupleKey, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DeleteDbKey)) of + not_found -> + not_found; + DbPrefix -> + erlfdb:set(Tx, DbKey, DbPrefix), + erlfdb:clear(Tx, DeleteDbKey), + bump_db_version(#{ + tx => Tx, + db_prefix => DbPrefix + }), + ok + end + end. - DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), - erlfdb:clear(Tx, DbKey), - erlfdb:clear_range_startswith(Tx, DbPrefix), - bump_metadata_version(Tx), - ok. + +remove_deleted_db(#{} = Db0, TimeStamp) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = ensure_current(Db0, false), + + DeletedDbTupleKey = { + ?DELETED_DBS, + DbName, + TimeStamp + }, + DeletedDbKey = erlfdb_tuple:pack(DeletedDbTupleKey, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DeletedDbKey)) of + not_found -> + not_found; + DbPrefix -> + erlfdb:clear(Tx, DeletedDbKey), + erlfdb:clear_range_startswith(Tx, DbPrefix), + bump_db_version(#{ + tx => Tx, + db_prefix => DbPrefix + }), + ok + end. exists(#{name := DbName} = Db) when is_binary(DbName) -> @@ -401,6 +456,20 @@ list_dbs_info(Tx, Callback, AccIn, Options0) -> end, AccIn, Options). +list_deleted_dbs_info(Tx, Callback, AccIn, Options0) -> + Options = case fabric2_util:get_value(restart_tx, Options0) of + undefined -> [{restart_tx, true} | Options0]; + _AlreadySet -> Options0 + end, + LayerPrefix = get_dir(Tx), + Prefix = erlfdb_tuple:pack({?DELETED_DBS}, LayerPrefix), + fold_range({tx, Tx}, Prefix, fun({DbKey, DbPrefix}, Acc) -> + {DbName, TimeStamp} = erlfdb_tuple:unpack(DbKey, Prefix), + InfoFuture = get_info_future(Tx, DbPrefix), + Callback(DbName, TimeStamp, InfoFuture, Acc) + end, AccIn, Options). + + get_info(#{} = Db) -> #{ tx := Tx, @@ -1186,6 +1255,45 @@ check_db_version(#{} = Db, CheckDbVersion) -> end. +soft_delete_db(Db) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix, + db_prefix := DbPrefix + } = ensure_current(Db), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + Timestamp = list_to_binary(fabric2_util:iso8601_timestamp()), + DeletedDbKeyTuple = {?DELETED_DBS, DbName, Timestamp}, + DeletedDbKey = erlfdb_tuple:pack(DeletedDbKeyTuple, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DeletedDbKey)) of + not_found -> + erlfdb:set(Tx, DeletedDbKey, DbPrefix), + erlfdb:clear(Tx, DbKey), + bump_db_version(Db), + ok; + _Val -> + {deletion_frequency_exceeded, DbName} + end. + + +hard_delete_db(Db) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix, + db_prefix := DbPrefix + } = ensure_current(Db), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + + erlfdb:clear(Tx, DbKey), + erlfdb:clear_range_startswith(Tx, DbPrefix), + bump_metadata_version(Tx), + ok. + + write_doc_body(#{} = Db0, #doc{} = Doc) -> #{ tx := Tx @@ -1514,6 +1622,7 @@ get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) EndKeyGt = fabric2_util:get_value(end_key_gt, Options), EndKey0 = fabric2_util:get_value(end_key, Options, EndKeyGt), InclusiveEnd = EndKeyGt == undefined, + WrapKeys = fabric2_util:get_value(wrap_keys, Options) /= false, % CouchDB swaps the key meanings based on the direction % of the fold. FoundationDB does not so we have to @@ -1527,6 +1636,8 @@ get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) StartKey2 = case StartKey1 of undefined -> <>; + SK2 when not WrapKeys -> + erlfdb_tuple:pack(SK2, RangePrefix); SK2 -> erlfdb_tuple:pack({SK2}, RangePrefix) end, @@ -1534,9 +1645,14 @@ get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) EndKey2 = case EndKey1 of undefined -> <>; + EK2 when Reverse andalso not WrapKeys -> + PackedEK = erlfdb_tuple:pack(EK2, RangePrefix), + <>; EK2 when Reverse -> PackedEK = erlfdb_tuple:pack({EK2}, RangePrefix), <>; + EK2 when not WrapKeys -> + erlfdb_tuple:pack(EK2, RangePrefix); EK2 -> erlfdb_tuple:pack({EK2}, RangePrefix) end, diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 97bfedc2c..9b6d18c58 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -40,6 +40,9 @@ encode_all_doc_key/1, all_docs_view_opts/1, + iso8601_timestamp/0, + do_recovery/0, + pmap/2, pmap/3 ]). @@ -337,6 +340,19 @@ all_docs_view_opts(#mrargs{} = Args) -> ] ++ StartKeyOpts ++ EndKeyOpts. +iso8601_timestamp() -> + Now = os:timestamp(), + {{Year, Month, Date}, {Hour, Minute, Second}} = + calendar:now_to_datetime(Now), + Format = "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0BZ", + io_lib:format(Format, [Year, Month, Date, Hour, Minute, Second]). + + +do_recovery() -> + config:get_boolean("couchdb", + "enable_database_recovery", false). + + pmap(Fun, Args) -> pmap(Fun, Args, []). diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index f409389d6..d5025b987 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -37,6 +37,9 @@ crud_test_() -> ?TDEF_FE(open_db), ?TDEF_FE(delete_db), ?TDEF_FE(recreate_db), + ?TDEF_FE(undelete_db), + ?TDEF_FE(remove_deleted_db), + ?TDEF_FE(old_db_handle), ?TDEF_FE(list_dbs), ?TDEF_FE(list_dbs_user_fun), ?TDEF_FE(list_dbs_user_fun_partial), @@ -44,6 +47,10 @@ crud_test_() -> ?TDEF_FE(list_dbs_info_partial), ?TDEF_FE(list_dbs_tx_too_old), ?TDEF_FE(list_dbs_info_tx_too_old), + ?TDEF_FE(list_deleted_dbs_info), + ?TDEF_FE(list_deleted_dbs_info_user_fun), + ?TDEF_FE(list_deleted_dbs_info_user_fun_partial), + ?TDEF_FE(list_deleted_dbs_info_with_timestamps), ?TDEF_FE(get_info_wait_retry_on_tx_too_old), ?TDEF_FE(get_info_wait_retry_on_tx_abort) ] @@ -68,6 +75,7 @@ setup() -> cleanup(_) -> + ok = config:set("couchdb", "enable_database_recovery", "false", false), fabric2_test_util:tx_too_old_reset_errors(), reset_fail_erfdb_wait(), meck:reset([erlfdb]). @@ -139,6 +147,123 @@ recreate_db(_) -> ?assertError(database_does_not_exist, fabric2_db:open(DbName, BadOpts)). +undelete_db(_) -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + [DeletedDbInfo] = [Info || Info <- Infos, + DbName == proplists:get_value(db_name, Info) + ], + Timestamp = proplists:get_value(timestamp, DeletedDbInfo), + + OldTS = <<"2020-01-01T12:00:00Z">>, + ?assertEqual(not_found, fabric2_db:undelete(DbName, DbName, OldTS, [])), + BadDbName = <<"bad_dbname">>, + ?assertEqual(not_found, + fabric2_db:undelete(BadDbName, BadDbName, Timestamp, [])), + + ok = fabric2_db:undelete(DbName, DbName, Timestamp, []), + {ok, AllDbInfos} = fabric2_db:list_dbs_info(), + ?assert(is_db_info_member(DbName, AllDbInfos)). + + +remove_deleted_db(_) -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + [DeletedDbInfo] = [Info || Info <- Infos, + DbName == proplists:get_value(db_name, Info) + ], + Timestamp = proplists:get_value(timestamp, DeletedDbInfo), + OldTS = <<"2020-01-01T12:00:00Z">>, + ?assertEqual(not_found, + fabric2_db:delete(DbName, [{deleted_at, OldTS}])), + BadDbName = <<"bad_dbname">>, + ?assertEqual(not_found, + fabric2_db:delete(BadDbName, [{deleted_at, Timestamp}])), + + ok = fabric2_db:delete(DbName, [{deleted_at, Timestamp}]), + {ok, Infos2} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos2], + ?assert(not lists:member(DbName, DeletedDbs)). + + +old_db_handle(_) -> + % db hard deleted + DbName1 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName1, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), + {ok, Db1} = fabric2_db:open(DbName1, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db1)), + ?assertEqual(ok, fabric2_db:delete(DbName1, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db1)), + + % db soft deleted + DbName2 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName2, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), + {ok, Db2} = fabric2_db:open(DbName2, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db2)), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName2, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db2)), + + % db soft deleted and re-created + DbName3 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName3, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName3, [])), + {ok, Db3} = fabric2_db:open(DbName3, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db3)), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName3, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName3, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db3)), + + % db soft deleted and undeleted + DbName4 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName4, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName4, [])), + {ok, Db4} = fabric2_db:open(DbName4, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db4)), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName4, [])), + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + [DeletedDbInfo] = [Info || Info <- Infos, + DbName4 == proplists:get_value(db_name, Info) + ], + Timestamp = proplists:get_value(timestamp, DeletedDbInfo), + ok = fabric2_db:undelete(DbName4, DbName4, Timestamp, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db4)), + + % db hard deleted and re-created + DbName5 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName5, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName5, [])), + {ok, Db5} = fabric2_db:open(DbName5, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db5)), + ok = config:set("couchdb", "enable_database_recovery", "false", false), + ?assertEqual(ok, fabric2_db:delete(DbName5, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName5, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db5)). + + list_dbs(_) -> DbName = ?tempdb(), AllDbs1 = fabric2_db:list_dbs(), @@ -295,6 +420,108 @@ list_dbs_info_tx_too_old(_) -> end, DbNames). +list_deleted_dbs_info(_) -> + DbName = ?tempdb(), + AllDbs1 = fabric2_db:list_dbs(), + + ?assert(is_list(AllDbs1)), + ?assert(not lists:member(DbName, AllDbs1)), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + AllDbs2 = fabric2_db:list_dbs(), + ?assert(lists:member(DbName, AllDbs2)), + + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + + AllDbs3 = fabric2_db:list_dbs(), + ?assert(not lists:member(DbName, AllDbs3)), + {ok, DeletedDbsInfo} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs4 = get_deleted_dbs(DeletedDbsInfo), + ?assert(lists:member(DbName, DeletedDbs4)). + + +list_deleted_dbs_info_user_fun(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + + UserFun = fun(Row, Acc) -> {ok, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_deleted_dbs_info(UserFun, [], []), + {ok, DeletedDbsInfo} = fabric2_db:list_deleted_dbs_info(), + + Base = lists:foldl(fun(DbInfo, Acc) -> + [{row, DbInfo} | Acc] + end, [{meta, []}], DeletedDbsInfo), + Expect = lists:reverse(Base, [complete]), + + ?assertEqual(Expect, lists:reverse(UserAcc)). + + +list_deleted_dbs_info_user_fun_partial(_) -> + UserFun = fun(Row, Acc) -> {stop, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_deleted_dbs_info(UserFun, [], []), + ?assertEqual([{meta, []}], UserAcc). + + +list_deleted_dbs_info_with_timestamps(_) -> + ok = config:set("couchdb", "enable_database_recovery", "true", false), + + % Cycle our database three times to get multiple entries + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + timer:sleep(1100), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + timer:sleep(1100), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + + UserFun = fun(Row, Acc) -> + case Row of + {row, Info} -> {ok, [Info | Acc]}; + _ -> {ok, Acc} + end + end, + + Options1 = [{start_key, DbName}, {end_key, <>}], + {ok, Infos1} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options1), + TimeStamps1 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos1], + ?assertEqual(3, length(TimeStamps1)), + + [FirstTS, MiddleTS, LastTS] = lists:sort(TimeStamps1), + + % Check we can skip over the FirstTS + Options2 = [{start_key, [DbName, MiddleTS]}, {end_key, [DbName, LastTS]}], + {ok, Infos2} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options2), + TimeStamps2 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos2], + ?assertEqual(2, length(TimeStamps2)), + ?assertEqual([LastTS, MiddleTS], TimeStamps2), % because foldl reverses + + % Check we an end before LastTS + Options3 = [{start_key, DbName}, {end_key, [DbName, MiddleTS]}], + {ok, Infos3} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options3), + TimeStamps3 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos3], + ?assertEqual([MiddleTS, FirstTS], TimeStamps3), + + % Check that {dir, rev} works without timestamps + Options4 = [{start_key, DbName}, {end_key, DbName}, {dir, rev}], + {ok, Infos4} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options4), + TimeStamps4 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos4], + ?assertEqual([FirstTS, MiddleTS, LastTS], TimeStamps4), + + % Check that reverse with keys returns correctly + Options5 = [ + {start_key, [DbName, MiddleTS]}, + {end_key, [DbName, FirstTS]}, + {dir, rev} + ], + {ok, Infos5} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options5), + TimeStamps5 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos5], + ?assertEqual([FirstTS, MiddleTS], TimeStamps5). + + get_info_wait_retry_on_tx_too_old(_) -> DbName = ?tempdb(), ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), @@ -382,3 +609,9 @@ is_db_info_member(DbName, [DbInfo | RestInfos]) -> _E -> is_db_info_member(DbName, RestInfos) end. + +get_deleted_dbs(DeletedDbInfos) -> + lists:foldl(fun(DbInfo, Acc) -> + DbName = fabric2_util:get_value(db_name, DbInfo), + [DbName | Acc] + end, [], DeletedDbInfos). -- cgit v1.2.1 From ec12e1f54b5e0dab477c8489b72af051cc490070 Mon Sep 17 00:00:00 2001 From: jiangph Date: Tue, 7 Apr 2020 08:41:26 +0800 Subject: Support soft-deletion in chttpd level Co-Authored-By: Paul J. Davis --- src/chttpd/src/chttpd_httpd_handlers.erl | 10 + src/chttpd/src/chttpd_misc.erl | 117 ++++++++--- src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl | 234 ++++++++++++++++++++++ 3 files changed, 332 insertions(+), 29 deletions(-) create mode 100644 src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index be6c0a13e..3fd56c354 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -28,6 +28,7 @@ url_handler(<<>>) -> fun chttpd_misc:handle_welcome_req/1; url_handler(<<"favicon.ico">>) -> fun chttpd_misc:handle_favicon_req/1; url_handler(<<"_utils">>) -> fun chttpd_misc:handle_utils_dir_req/1; url_handler(<<"_all_dbs">>) -> fun chttpd_misc:handle_all_dbs_req/1; +url_handler(<<"_deleted_dbs">>) -> fun chttpd_misc:handle_deleted_dbs_req/1; url_handler(<<"_dbs_info">>) -> fun chttpd_misc:handle_dbs_info_req/1; url_handler(<<"_active_tasks">>) -> fun chttpd_misc:handle_task_status_req/1; url_handler(<<"_scheduler">>) -> fun couch_replicator_httpd:handle_scheduler_req/1; @@ -67,6 +68,15 @@ handler_info('GET', [<<"_active_tasks">>], _) -> handler_info('GET', [<<"_all_dbs">>], _) -> {'all_dbs.read', #{}}; +handler_info('GET', [<<"_deleted_dbs">>], _) -> + {'account-deleted-dbs.read', #{}}; + +handler_info('POST', [<<"_deleted_dbs">>], _) -> + {'account-deleted-dbs.undelete', #{}}; + +handler_info('DELETE', [<<"_deleted_dbs">>, Db], _) -> + {'account-deleted-dbs.delete', #{'db.name' => Db}}; + handler_info('POST', [<<"_dbs_info">>], _) -> {'dbs_info.read', #{}}; diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index ca1e58ad2..843c3fe7e 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -15,6 +15,7 @@ -export([ handle_all_dbs_req/1, handle_dbs_info_req/1, + handle_deleted_dbs_req/1, handle_favicon_req/1, handle_favicon_req/2, handle_replicate_req/1, @@ -164,35 +165,7 @@ all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> handle_dbs_info_req(#httpd{method = 'GET'} = Req) -> ok = chttpd:verify_is_server_admin(Req), - - #mrargs{ - start_key = StartKey, - end_key = EndKey, - direction = Dir, - limit = Limit, - skip = Skip - } = couch_mrview_http:parse_params(Req, undefined), - - Options = [ - {start_key, StartKey}, - {end_key, EndKey}, - {dir, Dir}, - {limit, Limit}, - {skip, Skip} - ], - - % TODO: Figure out if we can't calculate a valid - % ETag for this request. \xFFmetadataVersion won't - % work as we don't bump versions on size changes - - {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, []), - Callback = fun dbs_info_callback/2, - Acc = #vacc{req = Req, resp = Resp}, - {ok, Resp} = fabric2_db:list_dbs_info(Callback, Acc, Options), - case is_record(Resp, vacc) of - true -> {ok, Resp#vacc.resp}; - _ -> {ok, Resp} - end; + send_db_infos(Req, list_dbs_info); handle_dbs_info_req(#httpd{method='POST', user_ctx=UserCtx}=Req) -> chttpd:validate_ctype(Req, "application/json"), Props = chttpd:json_body_obj(Req), @@ -226,6 +199,92 @@ handle_dbs_info_req(#httpd{method='POST', user_ctx=UserCtx}=Req) -> handle_dbs_info_req(Req) -> send_method_not_allowed(Req, "GET,HEAD,POST"). +handle_deleted_dbs_req(#httpd{method='GET', path_parts=[_]}=Req) -> + ok = chttpd:verify_is_server_admin(Req), + send_db_infos(Req, list_deleted_dbs_info); +handle_deleted_dbs_req(#httpd{method='POST', user_ctx=Ctx, path_parts=[_]}=Req) -> + couch_httpd:verify_is_server_admin(Req), + chttpd:validate_ctype(Req, "application/json"), + GetJSON = fun(Key, Props, Default) -> + case couch_util:get_value(Key, Props) of + undefined when Default == error -> + Fmt = "POST body must include `~s` parameter.", + Msg = io_lib:format(Fmt, [Key]), + throw({bad_request, iolist_to_binary(Msg)}); + undefined -> + Default; + Value -> + Value + end + end, + {BodyProps} = chttpd:json_body_obj(Req), + {UndeleteProps} = GetJSON(<<"undelete">>, BodyProps, error), + DbName = GetJSON(<<"source">>, UndeleteProps, error), + TimeStamp = GetJSON(<<"timestamp">>, UndeleteProps, error), + TgtDbName = GetJSON(<<"target">>, UndeleteProps, DbName), + case fabric2_db:undelete(DbName, TgtDbName, TimeStamp, [{user_ctx, Ctx}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + {error, file_exists} -> + chttpd:send_error(Req, file_exists); + {error, not_found} -> + chttpd:send_error(Req, not_found); + Error -> + throw(Error) + end; +handle_deleted_dbs_req(#httpd{path_parts = PP}=Req) when length(PP) == 1 -> + send_method_not_allowed(Req, "GET,HEAD,POST"); +handle_deleted_dbs_req(#httpd{method='DELETE', user_ctx=Ctx, path_parts=[_, DbName]}=Req) -> + couch_httpd:verify_is_server_admin(Req), + TS = case ?JSON_DECODE(couch_httpd:qs_value(Req, "timestamp", "null")) of + null -> + throw({bad_request, "`timestamp` parameter is not provided."}); + TS0 -> + TS0 + end, + case fabric2_db:delete(DbName, [{user_ctx, Ctx}, {deleted_at, TS}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + {error, not_found} -> + chttpd:send_error(Req, not_found); + Error -> + throw(Error) + end; +handle_deleted_dbs_req(#httpd{path_parts = PP}=Req) when length(PP) == 2 -> + send_method_not_allowed(Req, "HEAD,DELETE"); +handle_deleted_dbs_req(Req) -> + chttpd:send_error(Req, not_found). + +send_db_infos(Req, ListFunctionName) -> + #mrargs{ + start_key = StartKey, + end_key = EndKey, + direction = Dir, + limit = Limit, + skip = Skip + } = couch_mrview_http:parse_params(Req, undefined), + + Options = [ + {start_key, StartKey}, + {end_key, EndKey}, + {dir, Dir}, + {limit, Limit}, + {skip, Skip} + ], + + % TODO: Figure out if we can't calculate a valid + % ETag for this request. \xFFmetadataVersion won't + % work as we don't bump versions on size changes + + {ok, Resp1} = chttpd:start_delayed_json_response(Req, 200, []), + Callback = fun dbs_info_callback/2, + Acc = #vacc{req = Req, resp = Resp1}, + {ok, Resp2} = fabric2_db:ListFunctionName(Callback, Acc, Options), + case is_record(Resp2, vacc) of + true -> {ok, Resp2#vacc.resp}; + _ -> {ok, Resp2} + end. + dbs_info_callback({meta, _Meta}, #vacc{resp = Resp0} = Acc) -> {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), {ok, Acc#vacc{resp = Resp1}}; diff --git a/src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl b/src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl new file mode 100644 index 000000000..d6375c048 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl @@ -0,0 +1,234 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_deleted_dbs_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(USER, "chttpd_db_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). + + +setup() -> + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + lists:concat(["http://", Addr, ":", Port, "/"]). + + +teardown(_Url) -> + ok = config:delete("couchdb", "enable_database_recovery", false), + ok = config:delete("admins", ?USER, _Persist=false). + + +create_db(Url) -> + {ok, Status, _, _} = http(put, Url, ""), + ?assert(Status =:= 201 orelse Status =:= 202). + + +delete_db(Url) -> + {ok, 200, _, _} = http(delete, Url). + + +deleted_dbs_test_() -> + { + "chttpd deleted dbs tests", + { + setup, + fun chttpd_test_util:start_couch/0, + fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + fun should_return_error_for_unsupported_method/1, + fun should_list_deleted_dbs/1, + fun should_list_deleted_dbs_info/1, + fun should_undelete_db/1, + fun should_remove_deleted_db/1, + fun should_undelete_db_to_target_db/1, + fun should_not_undelete_db_to_existing_db/1 + ] + } + } + }. + + +should_return_error_for_unsupported_method(Url) -> + ?_test(begin + {ok, Code, _, Body} = http(delete, mk_url(Url)), + + ?assertEqual(405, Code), + ?assertEqual(<<"method_not_allowed">>, get_json(<<"error">>, Body)) + end). + + +should_list_deleted_dbs(Url) -> + ?_test(begin + DbName1 = create_and_delete_db(Url), + DbName2 = create_and_delete_db(Url), + {ok, Code, _, Body} = http(get, mk_url(Url)), + DeletedDbs = get_db_names(Body), + + ?assertEqual(200, Code), + ?assertEqual(true, lists:member(DbName1, DeletedDbs)), + ?assertEqual(true, lists:member(DbName2, DeletedDbs)) + end). + + +should_list_deleted_dbs_info(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, Body} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(Body), + + ?assertEqual(DbName, couch_util:get_value(<<"db_name">>, Props)) + end). + + +should_undelete_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, ResultBody} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(ResultBody), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + ErlJSON = {[ + {undelete, {[ + {source, DbName}, + {timestamp, TimeStamp} + ]}} + ]}, + + {ok, Code1, _, _} = http(get, Url ++ DbName), + ?assertEqual(404, Code1), + + {ok, Code2, _, _} = http(post, mk_url(Url), ErlJSON), + ?assertEqual(200, Code2), + + {ok, Code3, _, _} = http(get, Url ++ DbName), + ?assertEqual(200, Code3) + end). + + +should_remove_deleted_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, Body1} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(Body1), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + {ok, Code, _, _} = http(delete, mk_url(Url, DbName, TimeStamp)), + ?assertEqual(200, Code), + + {ok, _, _, Body2} = http(get, mk_url(Url, DbName)), + ?assertEqual([], jiffy:decode(Body2)) + end). + + +should_undelete_db_to_target_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, Body} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(Body), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + NewDbName = ?tempdb(), + ErlJSON = {[ + {undelete, {[ + {source, DbName}, + {timestamp, TimeStamp}, + {target, NewDbName} + ]}} + ]}, + + {ok, Code1, _, _} = http(get, Url ++ NewDbName), + ?assertEqual(404, Code1), + + {ok, Code2, _, _} = http(post, mk_url(Url), ErlJSON), + ?assertEqual(200, Code2), + + {ok, Code3, _, _} = http(get, Url ++ NewDbName), + ?assertEqual(200, Code3) + end). + + +should_not_undelete_db_to_existing_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, ResultBody} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(ResultBody), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + NewDbName = ?tempdb(), + create_db(Url ++ NewDbName), + ErlJSON = {[ + {undelete, {[ + {source, DbName}, + {timestamp, TimeStamp}, + {target, NewDbName} + ]}} + ]}, + {ok, Code2, _, ResultBody2} = http(post, mk_url(Url), ErlJSON), + ?assertEqual(412, Code2), + ?assertEqual(<<"file_exists">>, get_json(<<"error">>, ResultBody2)) + end). + + +create_and_delete_db(BaseUrl) -> + DbName = ?tempdb(), + DbUrl = BaseUrl ++ DbName, + create_db(DbUrl), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + delete_db(DbUrl), + DbName. + + +http(Verb, Url) -> + Headers = [?CONTENT_JSON, ?AUTH], + test_request:Verb(Url, Headers). + + +http(Verb, Url, Body) -> + Headers = [?CONTENT_JSON, ?AUTH], + test_request:Verb(Url, Headers, jiffy:encode(Body)). + + +mk_url(Url) -> + Url ++ "/_deleted_dbs". + + +mk_url(Url, DbName) -> + Url ++ "/_deleted_dbs?key=\"" ++ ?b2l(DbName) ++ "\"". + + +mk_url(Url, DbName, TimeStamp) -> + Url ++ "/_deleted_dbs/" ++ ?b2l(DbName) ++ "?timestamp=\"" ++ + ?b2l(TimeStamp) ++ "\"". + + +get_json(Key, Body) -> + {Props} = jiffy:decode(Body), + couch_util:get_value(Key, Props). + + +get_db_names(Body) -> + RevDbNames = lists:foldl(fun({DbInfo}, Acc) -> + DbName = couch_util:get_value(<<"db_name">>, DbInfo), + [DbName | Acc] + end, [], jiffy:decode(Body)), + lists:reverse(RevDbNames). -- cgit v1.2.1 From d6ec9935453c4f0fe26174a472cdf3e4cb9c5e60 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Sat, 4 Apr 2020 16:45:52 -0400 Subject: Compress doc bodies and attachments In CouchDB < 4.x we compressed document bodies by default, so enable it for 4.x as well. Use the basic term_to_binary compression mechanism for: - Document bodies - Local document bodies - Attachments, but only if they have not already been compressed. --- src/fabric/include/fabric2.hrl | 4 +++ src/fabric/src/fabric2_db.erl | 3 +- src/fabric/src/fabric2_fdb.erl | 42 ++++++++++++++++++++----- src/fabric/test/fabric2_doc_att_tests.erl | 52 +++++++++++++++++++++++++++++-- 4 files changed, 90 insertions(+), 11 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index e12762260..587b4f888 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -55,6 +55,10 @@ -define(CURR_LDOC_FORMAT, 0). +% 0 - Attachment storage version + +-define(CURR_ATT_STORAGE_VER, 0). + % Misc constants -define(PDICT_DB_KEY, '$fabric_db_handle'). diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 3d6d9245e..9b9efdac2 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -913,7 +913,8 @@ read_attachment(Db, DocId, AttId) -> write_attachment(Db, DocId, Att) -> Data = couch_att:fetch(data, Att), - {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data), + Encoding = couch_att:fetch(encoding, Att), + {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data, Encoding), couch_att:store(data, {loc, Db, DocId, AttId}, Att). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 430693329..d96c3ae60 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -57,7 +57,7 @@ write_local_doc/2, read_attachment/3, - write_attachment/3, + write_attachment/4, get_last_change/1, @@ -971,26 +971,53 @@ read_attachment(#{} = Db, DocId, AttId) -> } = ensure_current(Db), AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), - case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of + Data = case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of not_found -> throw({not_found, missing}); KVs -> Vs = [V || {_K, V} <- KVs], iolist_to_binary(Vs) + end, + + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, IdKey)) of + <<>> -> + Data; % Old format, before CURR_ATT_STORAGE_VER = 0 + <<_/binary>> = InfoBin -> + {?CURR_ATT_STORAGE_VER, Compressed} = erlfdb_tuple:unpack(InfoBin), + case Compressed of + true -> binary_to_term(Data, [safe]); + false -> Data + end end. -write_attachment(#{} = Db, DocId, Data) when is_binary(Data) -> +write_attachment(#{} = Db, DocId, Data, Encoding) + when is_binary(Data), is_atom(Encoding) -> #{ tx := Tx, db_prefix := DbPrefix } = ensure_current(Db), AttId = fabric2_util:uuid(), - Chunks = chunkify_binary(Data), + + {Data1, Compressed} = case Encoding of + gzip -> + {Data, false}; + _ -> + Opts = [{minor_version, 1}, {compressed, 6}], + CompressedData = term_to_binary(Data, Opts), + case size(CompressedData) < Data of + true -> {CompressedData, true}; + false -> {Data, false} + end + end, IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), - ok = erlfdb:set(Tx, IdKey, <<>>), + InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}), + ok = erlfdb:set(Tx, IdKey, InfoVal), + + Chunks = chunkify_binary(Data1), lists:foldl(fun(Chunk, ChunkId) -> AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), @@ -1474,7 +1501,8 @@ doc_to_fdb(Db, #doc{} = Doc) -> DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), - Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]), + Opts = [{minor_version, 1}, {compressed, 6}], + Value = term_to_binary({Body, DiskAtts, Deleted}, Opts), Chunks = chunkify_binary(Value), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> @@ -1526,7 +1554,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) -> _ when is_binary(Rev) -> Rev end, - BVal = term_to_binary(Body, [{minor_version, 1}]), + BVal = term_to_binary(Body, [{minor_version, 1}, {compressed, 6}]), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix), {{K, Chunk}, ChunkId + 1} diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl index ac531e913..5d28b6da0 100644 --- a/src/fabric/test/fabric2_doc_att_tests.erl +++ b/src/fabric/test/fabric2_doc_att_tests.erl @@ -29,6 +29,7 @@ doc_crud_test_() -> fun cleanup/1, with([ ?TDEF(create_att), + ?TDEF(create_att_already_compressed), ?TDEF(delete_att), ?TDEF(multiple_atts), ?TDEF(delete_one_att), @@ -84,7 +85,48 @@ create_att({Db, _}) -> IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), - ?assertEqual(<<>>, IdVal), + ?assertEqual(erlfdb_tuple:pack({0, true}), IdVal), + Opts = [{minor_version, 1}, {compressed, 6}], + Expect = term_to_binary(<<"foobar">>, Opts), + ?assertMatch([{_, Expect}], AttVals) + end). + + +create_att_already_compressed({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, gzip}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + AttData = fabric2_db:read_attachment(Db, DocId, AttId), + ?assertEqual(<<"foobar">>, AttData), + + % Check that the raw keys exist + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(erlfdb_tuple:pack({0, false}), IdVal), ?assertMatch([{_, <<"foobar">>}], AttVals) end). @@ -175,7 +217,7 @@ large_att({Db, _}) -> AttData = iolist_to_binary([ <<"foobar">> || _ <- lists:seq(1, 60000) ]), - Att1 = mk_att(<<"long.txt">>, AttData), + Att1 = mk_att(<<"long.txt">>, AttData, gzip), {ok, _} = create_doc(Db, DocId, [Att1]), ?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)), @@ -204,12 +246,16 @@ att_on_conflict_isolation({Db, _}) -> mk_att(Name, Data) -> + mk_att(Name, Data, identity). + + +mk_att(Name, Data, Encoding) -> couch_att:new([ {name, Name}, {type, <<"application/octet-stream">>}, {att_len, size(Data)}, {data, Data}, - {encoding, identity}, + {encoding, Encoding}, {md5, <<>>} ]). -- cgit v1.2.1 From a14f62d3f0bbb16f57d692d63028579f96affc5e Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 7 Apr 2020 09:46:30 -0700 Subject: Add mango_plugin Implement the following extention points: - `before_find(Req) -> {ok, Req}` - `after_find(Req, HttpResp, KVs) -> {ok, KVs}` --- src/mango/src/mango_epi.erl | 4 +++- src/mango/src/mango_httpd.erl | 20 +++++++++++--------- src/mango/src/mango_plugin.erl | 43 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 10 deletions(-) create mode 100644 src/mango/src/mango_plugin.erl diff --git a/src/mango/src/mango_epi.erl b/src/mango/src/mango_epi.erl index 1fcd05b7f..d593d6371 100644 --- a/src/mango/src/mango_epi.erl +++ b/src/mango/src/mango_epi.erl @@ -33,7 +33,9 @@ providers() -> ]. services() -> - []. + [ + {mango, mango_plugin} + ]. data_subscriptions() -> []. diff --git a/src/mango/src/mango_httpd.erl b/src/mango/src/mango_httpd.erl index 94aa866d2..8d5a2123d 100644 --- a/src/mango/src/mango_httpd.erl +++ b/src/mango/src/mango_httpd.erl @@ -187,17 +187,18 @@ handle_explain_req(Req, _Db) -> chttpd:send_method_not_allowed(Req, "POST"). -handle_find_req(#httpd{method='POST'}=Req, Db) -> - chttpd:validate_ctype(Req, "application/json"), - Body = chttpd:json_body_obj(Req), +handle_find_req(#httpd{method='POST'}=Req0, Db) -> + {ok, Req1} = mango_plugin:before_find(Req0), + chttpd:validate_ctype(Req1, "application/json"), + Body = chttpd:json_body_obj(Req1), {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), - {ok, Resp0} = start_find_resp(Req), + {ok, Resp0} = start_find_resp(Req1), case run_find(Resp0, Db, Sel, Opts) of {ok, AccOut} -> - end_find_resp(AccOut); + end_find_resp(Req1, AccOut); {error, Error} -> - chttpd:send_error(Req, Error) + chttpd:send_error(Req1, Error) end; @@ -225,14 +226,15 @@ start_find_resp(Req) -> chttpd:start_delayed_json_response(Req, 200, [], "{\"docs\":["). -end_find_resp(Acc0) -> - #vacc{resp=Resp00, buffer=Buf, kvs=KVs, threshold=Max} = Acc0, +end_find_resp(Req, Acc0) -> + #vacc{resp=Resp00, buffer=Buf, kvs=KVs0, threshold=Max} = Acc0, {ok, Resp0} = chttpd:close_delayed_json_object(Resp00, Buf, "\r\n]", Max), + {ok, KVs1} = mango_plugin:after_find(Req, Resp0, KVs0), FinalAcc = lists:foldl(fun({K, V}, Acc) -> JK = ?JSON_ENCODE(K), JV = ?JSON_ENCODE(V), [JV, ": ", JK, ",\r\n" | Acc] - end, [], KVs), + end, [], KVs1), Chunk = lists:reverse(FinalAcc, ["}\r\n"]), {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, Chunk), chttpd:end_delayed_json_response(Resp1). diff --git a/src/mango/src/mango_plugin.erl b/src/mango/src/mango_plugin.erl new file mode 100644 index 000000000..296a35419 --- /dev/null +++ b/src/mango/src/mango_plugin.erl @@ -0,0 +1,43 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mango_plugin). + +-export([ + before_find/1, + after_find/3 +]). + +-define(SERVICE_ID, mango). + +%% ------------------------------------------------------------------ +%% API Function Definitions +%% ------------------------------------------------------------------ + +before_find(HttpReq0) -> + with_pipe(before_find, [HttpReq0]). + + +after_find(HttpReq, HttpResp, Arg0) -> + with_pipe(after_find, [HttpReq, HttpResp, Arg0]). + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +with_pipe(Func, Args) -> + do_apply(Func, Args, [pipe]). + + +do_apply(Func, Args, Opts) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + couch_epi:apply(Handle, ?SERVICE_ID, Func, Args, Opts). -- cgit v1.2.1 From 56137f341e0fd22c7027a52b5c7e5eb1aa75aee0 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 9 Apr 2020 16:48:25 -0400 Subject: Fix job removal notifications Fix the case when a job is removed while there are subscribers waiting for it. Most of the logic was already there except: * Handle the case when when data decoded from subscription results could be `not_found`, in that case we just pass that atom back as is. * Need to notify the watch when jobs are removed or couch_jobs_notifiers would wake up and send notification messages. --- src/couch_jobs/src/couch_jobs_fdb.erl | 4 ++++ src/couch_jobs/test/couch_jobs_tests.erl | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 4c8cd9f37..891aedc79 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -122,6 +122,7 @@ remove(#{jtx := true} = JTx0, #{job := true} = Job) -> #jv{stime = STime} -> couch_jobs_pending:remove(JTx, Type, JobId, STime), erlfdb:clear(Tx, Key), + update_watch(JTx, Type), ok; not_found -> {error, not_found} @@ -422,6 +423,9 @@ encode_data(#{} = JobData) -> end. +decode_data(not_found) -> + not_found; + decode_data(#{} = JobData) -> JobData; diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl index af95eebe6..fbe4e93a0 100644 --- a/src/couch_jobs/test/couch_jobs_tests.erl +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -56,6 +56,8 @@ couch_jobs_basic_test_() -> fun accept_max_schedtime/1, fun accept_no_schedule/1, fun subscribe/1, + fun remove_when_subscribed_and_pending/1, + fun remove_when_subscribed_and_running/1, fun subscribe_wait_multiple/1, fun enqueue_inactive/1, fun remove_running_job/1, @@ -571,6 +573,32 @@ subscribe(#{t1 := T, j1 := J}) -> end). +remove_when_subscribed_and_pending(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{<<"x">> => 1}), + {ok, SId, pending, _} = couch_jobs:subscribe(T, J), + + couch_jobs:remove(?TX, T, J), + + ?assertMatch({T, J, not_found, not_found}, couch_jobs:wait(SId, 5000)), + ?assertEqual(timeout, couch_jobs:wait(SId, 50)) + end). + + +remove_when_subscribed_and_running(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{<<"z">> => 2}), + {ok, SId, pending, _} = couch_jobs:subscribe(T, J), + {ok, #{}, _} = couch_jobs:accept(T), + ?assertMatch({_, _, running, _}, couch_jobs:wait(SId, 5000)), + + couch_jobs:remove(?TX, T, J), + + ?assertMatch({T, J, not_found, not_found}, couch_jobs:wait(SId, 5000)), + ?assertEqual(timeout, couch_jobs:wait(SId, 50)) + end). + + subscribe_wait_multiple(#{t1 := T, j1 := J1, j2 := J2}) -> ?_test(begin ok = couch_jobs:add(?TX, T, J1, #{}), -- cgit v1.2.1 From daf10824af695669dde61828414e8aff9e2de9d9 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 9 Apr 2020 10:42:18 -0500 Subject: Fix division by zero --- src/couch_rate/src/couch_rate_limiter.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/couch_rate/src/couch_rate_limiter.erl b/src/couch_rate/src/couch_rate_limiter.erl index 349da8d5a..6f852b1d8 100644 --- a/src/couch_rate/src/couch_rate_limiter.erl +++ b/src/couch_rate/src/couch_rate_limiter.erl @@ -135,6 +135,8 @@ budget(Id, #?STATE{} = State) -> overloaded -> %% decrease budget {max(1, round(R * MultiplicativeFactor)), State}; + underloaded when W == 0 orelse Latency == 0 -> + {max(1, round(MR)), State}; underloaded -> ReadWriteRatio = min(1, MR / max(1, MW)), SingleWrite = Latency / W, -- cgit v1.2.1 From cbad08d5fa4308fa9d243bae8eabd4872eaa5837 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 9 Apr 2020 17:25:42 -0400 Subject: Make 'make check' run all the passing FDB tests on this branch This will let CI gate PRs hopefully as we now have the necessary FDB server bits installed in Jenkins. Eventually when we can run all the tests revert to using `make check-all-tests` as the default. --- Makefile | 10 ++++++---- build-aux/Jenkinsfile.pr | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 592093ea3..ebdab2200 100644 --- a/Makefile +++ b/Makefile @@ -144,9 +144,11 @@ fauxton: share/www ################################################################################ -.PHONY: check +# When we can run all the tests with FDB switch this back to be the default +# "make check" command +.PHONY: check-all-tests # target: check - Test everything -check: all python-black +check-all-tests: all python-black @$(MAKE) eunit @$(MAKE) javascript @$(MAKE) mango-test @@ -158,8 +160,8 @@ else subdirs=$(shell ls src) endif -.PHONY: check-fdb -check-fdb: +.PHONY: check +check: all make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs make exunit tests=src/couch_rate/test/exunit/ diff --git a/build-aux/Jenkinsfile.pr b/build-aux/Jenkinsfile.pr index 8c9cbd930..ca548ff27 100644 --- a/build-aux/Jenkinsfile.pr +++ b/build-aux/Jenkinsfile.pr @@ -21,7 +21,7 @@ cd build tar -xf ${WORKSPACE}/apache-couchdb-*.tar.gz cd apache-couchdb-* . /usr/local/kerl/${KERL_VER}/activate -./configure --with-curl --spidermonkey-version 60 +./configure --with-curl make check || (make build-report && false) ''' -- cgit v1.2.1 From 2bb0ccda2935c028b4fe7fc8edd28f0d1a7febf8 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 10 Apr 2020 07:06:17 -0700 Subject: Fix incorrect usage of couch_epi in mango plugin Previously we used the value returned from couch_epi apply as is. However it returns a list of arguments passed in the same order. --- src/mango/src/mango_plugin.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mango/src/mango_plugin.erl b/src/mango/src/mango_plugin.erl index 296a35419..de23f8e7c 100644 --- a/src/mango/src/mango_plugin.erl +++ b/src/mango/src/mango_plugin.erl @@ -24,11 +24,14 @@ %% ------------------------------------------------------------------ before_find(HttpReq0) -> - with_pipe(before_find, [HttpReq0]). + [HttpReq1] = with_pipe(before_find, [HttpReq0]), + {ok, HttpReq1}. after_find(HttpReq, HttpResp, Arg0) -> - with_pipe(after_find, [HttpReq, HttpResp, Arg0]). + [_HttpReq, _HttpResp, Arg1] = with_pipe(after_find, [HttpReq, HttpResp, Arg0]), + {ok, Arg1}. + %% ------------------------------------------------------------------ %% Internal Function Definitions -- cgit v1.2.1 From d4bc3a50f7d30f318180950de62b91c0da4a3846 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 10 Apr 2020 11:38:02 -0400 Subject: Fix flaky fabric2_index test Previously in the the test we first set up the callback, then disabled the indexing. By that time, there was a chance that building could have started especially in a slower execution environment. --- src/fabric/test/fabric2_index_tests.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fabric/test/fabric2_index_tests.erl b/src/fabric/test/fabric2_index_tests.erl index 3fc8a5b18..e0c3e8ba9 100644 --- a/src/fabric/test/fabric2_index_tests.erl +++ b/src/fabric/test/fabric2_index_tests.erl @@ -213,14 +213,14 @@ updater_processes_stop(#{}) -> indexing_can_be_disabled(#{db1 := Db}) -> - Mod = fabric2_test_callback7, - setup_callback(Mod), - meck:expect(config, get_boolean, fun ("fabric", "index_updater_enabled", _) -> false; (_, _, Default) -> Default end), + Mod = fabric2_test_callback7, + setup_callback(Mod), + create_doc(Db), timer:sleep(500), ?assertEqual(0, meck:num_calls(Mod, build_indices, 2)), -- cgit v1.2.1 From 742c64e6a7d9a60844005a04ff9c0c22e7a6721a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 15:04:07 -0500 Subject: Fix index updater configuration keys The defaults commented out in `default.ini` did not match the names used in `fabric2_index.erl`. --- rel/overlay/etc/default.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index d2a2c7257..376089a98 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -228,14 +228,14 @@ port = 6984 ;fdb_directory = couchdb ; ; Enable or disable index auto-updater -;index_autoupdater_enabled = true +;index_updater_enabled = true ; ; How long to wait from the first db update event until index building is ; triggered. -;index_autoupdater_delay_msec = 60000 +;index_updater_delay_msec = 60000 ; ; How often to check if databases may need their indices updated. -;index_autoupdater_resolution_msec = 10000 +;index_updater_resolution_msec = 10000 ; [rexi] ; buffer_count = 2000 -- cgit v1.2.1 From 247b80997fc2b1777606746c6b4ec4eaa48ad352 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 16:04:06 -0500 Subject: Rename variables to indicate transaction state Usually we indicate the transaction status of a Db handle by naming it `TxDb`. This updates fabric2_index:build_indices/2 to match that pattern. Co-Authored-By: Nick Vatamaniuc --- src/fabric/src/fabric2_index.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl index 938210514..098d6edd4 100644 --- a/src/fabric/src/fabric2_index.erl +++ b/src/fabric/src/fabric2_index.erl @@ -155,12 +155,12 @@ process_updates_iter([Db | Rest], Cont) -> process_updates_iter(Rest, Cont). -build_indices(_Db, []) -> +build_indices(_TxDb, []) -> []; -build_indices(Db, DDocs) -> +build_indices(TxDb, DDocs) -> lists:flatmap(fun(Mod) -> - Mod:build_indices(Db, DDocs) + Mod:build_indices(TxDb, DDocs) end, registrations()). -- cgit v1.2.1 From 3c0a017b63b31b1e8d62707c0b909d8f24def1ba Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 16:07:43 -0500 Subject: Move process_db/1 to match the logical progression Functions are easier to read and process if they're defined in the order that they are referenced. Co-Authored-By: Nick Vatamaniuc --- src/fabric/src/fabric2_index.erl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl index 098d6edd4..8d0affebb 100644 --- a/src/fabric/src/fabric2_index.erl +++ b/src/fabric/src/fabric2_index.erl @@ -155,6 +155,16 @@ process_updates_iter([Db | Rest], Cont) -> process_updates_iter(Rest, Cont). +process_db(DbName) when is_binary(DbName) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs1 = get_design_docs(TxDb), + DDocs2 = lists:filter(fun should_update/1, DDocs1), + DDocs3 = shuffle(DDocs2), + build_indices(TxDb, DDocs3) + end). + + build_indices(_TxDb, []) -> []; @@ -168,16 +178,6 @@ registrations() -> application:get_env(fabric, indices, []). -process_db(DbName) when is_binary(DbName) -> - {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), - fabric2_fdb:transactional(Db, fun(TxDb) -> - DDocs1 = get_design_docs(TxDb), - DDocs2 = lists:filter(fun should_update/1, DDocs1), - DDocs3 = shuffle(DDocs2), - build_indices(TxDb, DDocs3) - end). - - get_design_docs(Db) -> Callback = fun ({meta, _}, Acc) -> {ok, Acc}; -- cgit v1.2.1 From 3e1c822794990cab06002c4d8a7a6ac22069d9a4 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 14:33:15 -0500 Subject: Update to use `fabric2_db:get_design_docs/1` --- src/fabric/src/fabric2_index.erl | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl index 8d0affebb..9a6607e2e 100644 --- a/src/fabric/src/fabric2_index.erl +++ b/src/fabric/src/fabric2_index.erl @@ -158,7 +158,7 @@ process_updates_iter([Db | Rest], Cont) -> process_db(DbName) when is_binary(DbName) -> {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), fabric2_fdb:transactional(Db, fun(TxDb) -> - DDocs1 = get_design_docs(TxDb), + DDocs1 = fabric2_db:get_design_docs(TxDb), DDocs2 = lists:filter(fun should_update/1, DDocs1), DDocs3 = shuffle(DDocs2), build_indices(TxDb, DDocs3) @@ -178,22 +178,6 @@ registrations() -> application:get_env(fabric, indices, []). -get_design_docs(Db) -> - Callback = fun - ({meta, _}, Acc) -> {ok, Acc}; - (complete, Acc) -> {ok, Acc}; - ({row, Row}, Acc) -> {ok, [get_doc(Db, Row) | Acc]} - end, - {ok, DDocs} = fabric2_db:fold_design_docs(Db, Callback, [], []), - DDocs. - - -get_doc(Db, Row) -> - {_, DocId} = lists:keyfind(id, 1, Row), - {ok, #doc{deleted = false} = Doc} = fabric2_db:open_doc(Db, DocId, []), - Doc. - - should_update(#doc{body = {Props}}) -> couch_util:get_value(<<"autoupdate">>, Props, true). -- cgit v1.2.1 From 7bc9148b75b1396b91b6cdccca2c8e87b791e0f0 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 14:50:45 -0500 Subject: Extend fabric2_index callbacks for index cleanup Each registered index type can now get a signal on when to clean up their indexes. --- src/fabric/src/fabric2_index.erl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl index 9a6607e2e..7f9d51974 100644 --- a/src/fabric/src/fabric2_index.erl +++ b/src/fabric/src/fabric2_index.erl @@ -19,6 +19,7 @@ -export([ register_index/1, db_updated/1, + cleanup/1, start_link/0 ]). @@ -38,6 +39,9 @@ -callback build_indices(Db :: map(), DDocs :: list(#doc{})) -> [{ok, JobId::binary()} | {error, any()}]. +-callback cleanup_indices(Db :: map(), DDocs :: list(#doc{})) -> + [ok | {error, any()}]. + -define(SHARDS, 32). -define(DEFAULT_DELAY_MSEC, 60000). @@ -54,6 +58,25 @@ db_updated(DbName) when is_binary(DbName) -> ets:insert_new(Table, {DbName, now_msec()}). +cleanup(Db) -> + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(TxDb), + lists:foreach(fun(Mod) -> + Mod:cleanup_indices(TxDb, DDocs) + end, registrations()) + end) + catch + error:database_does_not_exist -> + ok; + Tag:Reason -> + Stack = erlang:get_stacktrace(), + DbName = fabric2_db:name(Db), + LogMsg = "~p failed to cleanup indices for `~s` ~p:~p ~p", + couch_log:error(LogMsg, [?MODULE, DbName, Tag, Reason, Stack]) + end. + + start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). -- cgit v1.2.1 From e0d0391ff8a639101814fae8e74f73b2403561fd Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 14:51:47 -0500 Subject: Implement couch_views:cleanup_indices/2 --- src/couch_views/src/couch_views.erl | 18 +++++++++++++++ src/couch_views/src/couch_views_fdb.erl | 40 ++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 2acba00a6..cc183643b 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -21,6 +21,7 @@ % fabric2_index behavior build_indices/2, + cleanup_indices/2, get_info/2 ]). @@ -76,6 +77,23 @@ build_indices(#{} = Db, DDocs) when is_list(DDocs) -> end, DDocs). +cleanup_indices(#{} = Db, DDocs) when is_list(DDocs) -> + DbName = fabric2_db:name(Db), + ActiveSigs = lists:filtermap(fun(DDoc) -> + try couch_views_util:ddoc_to_mrst(DbName, DDoc) of + {ok, #mrst{sig = Sig}} -> + {true, Sig} + catch _:_ -> + false + end + end, DDocs), + ExistingSigs = couch_views_fdb:list_signatures(Db), + StaleSigs = ExistingSigs -- ActiveSigs, + lists:foreach(fun(Sig) -> + couch_views_fdb:clear_index(Db, Sig) + end, StaleSigs). + + get_info(Db, DDoc) -> DbName = fabric2_db:name(Db), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 3b008d44b..2181e5373 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -27,7 +27,10 @@ fold_map_idx/6, - write_doc/4 + write_doc/4, + + list_signatures/1, + clear_index/2 ]). -ifdef(TEST). @@ -211,6 +214,41 @@ write_doc(TxDb, Sig, ViewIds, Doc) -> end, lists:zip3(ViewIds, Results, KVSizes)). +list_signatures(Db) -> + #{ + db_prefix := DbPrefix + } = Db, + ViewSeqRange = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ}, + RangePrefix = erlfdb_tuple:pack(ViewSeqRange, DbPrefix), + fabric2_fdb:fold_range(Db, RangePrefix, fun({Key, _Val}, Acc) -> + {Sig} = erlfdb_tuple:unpack(Key, RangePrefix), + [Sig | Acc] + end, [], []). + + +clear_index(Db, Signature) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + % Clear index info keys + Keys = [ + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Signature}, + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Signature}, + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Signature} + ], + lists:foreach(fun(Key) -> + FDBKey = erlfdb_tuple:pack(Key, DbPrefix), + erlfdb:clear(Tx, FDBKey) + end, Keys), + + % Clear index data + RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature}, + RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix), + erlfdb:clear_range_startswith(Tx, RangePrefix). + + % For each row in a map view we store the the key/value % in FoundationDB: % -- cgit v1.2.1 From 4275a496dbfcec36ff0777b1cf350fffcc7756b9 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 14:52:26 -0500 Subject: Implement _view_cleanup for FoundationDB --- src/chttpd/src/chttpd_db.erl | 2 +- src/chttpd/src/chttpd_httpd_handlers.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 730cf3ef5..8dd0c931b 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -265,7 +265,7 @@ handle_compact_req(Req, _Db) -> send_method_not_allowed(Req, "POST"). handle_view_cleanup_req(Req, Db) -> - ok = fabric:cleanup_index_files_all_nodes(Db), + ok = fabric2_index:cleanup(Db), send_json(Req, 202, {[{ok, true}]}). diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 3fd56c354..79ec3db8e 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -40,7 +40,7 @@ url_handler(<<"_session">>) -> fun chttpd_auth:handle_session_req/1; url_handler(<<"_up">>) -> fun chttpd_misc:handle_up_req/1; url_handler(_) -> no_match. -db_handler(<<"_view_cleanup">>) -> fun ?MODULE:not_implemented/2; +db_handler(<<"_view_cleanup">>) -> fun chttpd_db:handle_view_cleanup_req/2; db_handler(<<"_compact">>) -> fun chttpd_db:handle_compact_req/2; db_handler(<<"_design">>) -> fun chttpd_db:handle_design_req/2; db_handler(<<"_partition">>) -> fun chttpd_db:handle_partition_req/2; -- cgit v1.2.1 From 7aeb54bf6012c234c5806db6f438427c9cb53c4a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 15:00:45 -0500 Subject: Optionally cleanup stale indices automatically --- rel/overlay/etc/default.ini | 3 +++ src/fabric/src/fabric2_index.erl | 22 +++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 376089a98..e10a5a0c7 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -236,6 +236,9 @@ port = 6984 ; ; How often to check if databases may need their indices updated. ;index_updater_resolution_msec = 10000 +; +; Enable or disable automatic stale index removal in the auto-updater +;index_updater_remove_old_indices = false ; [rexi] ; buffer_count = 2000 diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl index 7f9d51974..25c31a8c8 100644 --- a/src/fabric/src/fabric2_index.erl +++ b/src/fabric/src/fabric2_index.erl @@ -61,10 +61,8 @@ db_updated(DbName) when is_binary(DbName) -> cleanup(Db) -> try fabric2_fdb:transactional(Db, fun(TxDb) -> - DDocs = fabric2_db:get_design_docs(TxDb), - lists:foreach(fun(Mod) -> - Mod:cleanup_indices(TxDb, DDocs) - end, registrations()) + DDocs = fabric2_db:get_design_docs(Db), + cleanup_indices(TxDb, DDocs) end) catch error:database_does_not_exist -> @@ -184,7 +182,11 @@ process_db(DbName) when is_binary(DbName) -> DDocs1 = fabric2_db:get_design_docs(TxDb), DDocs2 = lists:filter(fun should_update/1, DDocs1), DDocs3 = shuffle(DDocs2), - build_indices(TxDb, DDocs3) + build_indices(TxDb, DDocs3), + case auto_cleanup() of + true -> cleanup_indices(TxDb, DDocs1); + false -> ok + end end). @@ -197,6 +199,12 @@ build_indices(TxDb, DDocs) -> end, registrations()). +cleanup_indices(TxDb, DDocs) -> + lists:foreach(fun(Mod) -> + Mod:cleanup_indices(TxDb, DDocs) + end, registrations()). + + registrations() -> application:get_env(fabric, indices, []). @@ -227,3 +235,7 @@ delay_msec() -> resolution_msec() -> config:get_integer("fabric", "index_updater_resolution_msec", ?DEFAULT_RESOLUTION_MSEC). + + +auto_cleanup() -> + config:get_boolean("fabric", "index_updater_remove_old_indices", false). -- cgit v1.2.1 From 2e5a5566be88036ae6bb8d1f420bd6028f7c6253 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 9 Apr 2020 12:57:51 -0500 Subject: Remove jobs on index cleanup --- src/couch_views/src/couch_views.erl | 1 + src/couch_views/src/couch_views_jobs.erl | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index cc183643b..9d518ebce 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -90,6 +90,7 @@ cleanup_indices(#{} = Db, DDocs) when is_list(DDocs) -> ExistingSigs = couch_views_fdb:list_signatures(Db), StaleSigs = ExistingSigs -- ActiveSigs, lists:foreach(fun(Sig) -> + couch_views_jobs:remove(Db, Sig), couch_views_fdb:clear_index(Db, Sig) end, StaleSigs). diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index b97e7ce0f..76cc56337 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -15,7 +15,8 @@ -export([ set_timeout/0, build_view/3, - build_view_async/2 + build_view_async/2, + remove/2 ]). -ifdef(TEST). @@ -60,6 +61,12 @@ build_view_async(TxDb0, Mrst) -> {ok, JobId}. +remove(TxDb, Sig) -> + DbName = fabric2_db:name(TxDb), + JobId = job_id(DbName, Sig), + couch_jobs:remove(TxDb, ?INDEX_JOB_TYPE, JobId). + + ensure_correct_tx(#{tx := undefined} = TxDb) -> TxDb; -- cgit v1.2.1 From 30fdef77571c67c945d70fb54c07157c4643f828 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 10 Apr 2020 15:07:22 -0500 Subject: Remove failed view jobs If a client notices that a job has failed we restart it. If a job failed for a different design document id then we resubmit the build request. --- src/couch_views/src/couch_views_jobs.erl | 26 +++++++++++++++++------ src/couch_views/test/couch_views_indexer_test.erl | 2 +- src/couch_views/test/couch_views_map_test.erl | 2 +- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 76cc56337..d0de44ea8 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -35,7 +35,7 @@ set_timeout() -> build_view(TxDb, Mrst, UpdateSeq) -> {ok, JobId} = build_view_async(TxDb, Mrst), - case wait_for_job(JobId, UpdateSeq) of + case wait_for_job(JobId, Mrst#mrst.idx_name, UpdateSeq) of ok -> ok; retry -> build_view(TxDb, Mrst, UpdateSeq) end. @@ -77,10 +77,10 @@ ensure_correct_tx(#{tx := Tx} = TxDb) -> end. -wait_for_job(JobId, UpdateSeq) -> +wait_for_job(JobId, DDocId, UpdateSeq) -> case couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId) of {ok, Subscription, _State, _Data} -> - wait_for_job(JobId, Subscription, UpdateSeq); + wait_for_job(JobId, Subscription, DDocId, UpdateSeq); {ok, finished, Data} -> case Data of #{<<"view_seq">> := ViewSeq} when ViewSeq >= UpdateSeq -> @@ -91,21 +91,35 @@ wait_for_job(JobId, UpdateSeq) -> end. -wait_for_job(JobId, Subscription, UpdateSeq) -> +wait_for_job(JobId, Subscription, DDocId, UpdateSeq) -> case wait(Subscription) of + {not_found, not_found} -> + erlang:error(index_not_found); {error, Error} -> erlang:error(Error); + {finished, #{<<"error">> := <<"ddoc_deleted">>} = Data} -> + case maps:get(<<"ddoc_id">>, Data) of + DDocId -> + couch_jobs:remove(undefined, ?INDEX_JOB_TYPE, JobId), + erlang:error({ddoc_deleted, maps:get(<<"reason">>, Data)}); + _OtherDocId -> + % A different design doc wiht the same signature + % was deleted. Resubmit this job which will overwrite + % the ddoc_id in the job. + retry + end; {finished, #{<<"error">> := Error, <<"reason">> := Reason}} -> + couch_jobs:remove(undefined, ?INDEX_JOB_TYPE, JobId), erlang:error({binary_to_existing_atom(Error, latin1), Reason}); {finished, #{<<"view_seq">> := ViewSeq}} when ViewSeq >= UpdateSeq -> ok; {finished, _} -> - wait_for_job(JobId, UpdateSeq); + wait_for_job(JobId, DDocId, UpdateSeq); {_State, #{<<"view_seq">> := ViewSeq}} when ViewSeq >= UpdateSeq -> couch_jobs:unsubscribe(Subscription), ok; {_, _} -> - wait_for_job(JobId, Subscription, UpdateSeq) + wait_for_job(JobId, Subscription, DDocId, UpdateSeq) end. diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 8ddb64b9c..54f787da3 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -375,7 +375,7 @@ index_autoupdater_callback(Db) -> ?assertMatch([{ok, <<_/binary>>}], Result), [{ok, JobId}] = Result, - ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DbSeq)). + ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DDoc#doc.id, DbSeq)). index_budget_is_changing(Db) -> ok = meck:new(couch_rate, [passthrough]), diff --git a/src/couch_views/test/couch_views_map_test.erl b/src/couch_views/test/couch_views_map_test.erl index 7d1e94b2c..2b679f07c 100644 --- a/src/couch_views/test/couch_views_map_test.erl +++ b/src/couch_views/test/couch_views_map_test.erl @@ -409,7 +409,7 @@ should_map_update_is_lazy() -> {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), JobId = couch_views_jobs:job_id(Db, Mrst), UpdateSeq = fabric2_db:get_update_seq(Db), - ok = couch_views_jobs:wait_for_job(JobId, UpdateSeq), + ok = couch_views_jobs:wait_for_job(JobId, DDoc#doc.id, UpdateSeq), Args2 = #{ start_key => 8, -- cgit v1.2.1 From 757515231ff3d2e0228a078a1771837eff0b66d0 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 25 Mar 2020 15:16:22 -0500 Subject: Implement couch_views_cleanup_test.erl Add tests for view cleanup. --- src/couch_views/test/couch_views_cleanup_test.erl | 411 ++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 src/couch_views/test/couch_views_cleanup_test.erl diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl new file mode 100644 index 000000000..b5e081a98 --- /dev/null +++ b/src/couch_views/test/couch_views_cleanup_test.erl @@ -0,0 +1,411 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_cleanup_test). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +clean_old_indices_test_() -> + { + "Test cleanup of stale indices", + { + setup, + fun setup_all/0, + fun cleanup_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(empty_db), + ?TDEF_FE(db_with_no_ddocs), + ?TDEF_FE(db_with_ddoc), + ?TDEF_FE(db_with_many_ddocs), + ?TDEF_FE(after_ddoc_deletion), + ?TDEF_FE(all_ddocs_deleted), + ?TDEF_FE(after_ddoc_recreated), + ?TDEF_FE(refcounted_sigs), + ?TDEF_FE(removes_old_jobs), + ?TDEF_FE(after_job_accepted_initial_build), + ?TDEF_FE(after_job_accepted_rebuild), + ?TDEF_FE(during_index_initial_build), + ?TDEF_FE(during_index_rebuild) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]). + + +cleanup_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + Opts = [{user_ctx, ?ADMIN_USER}], + {ok, Db} = fabric2_db:create(?tempdb(), Opts), + Db. + + +cleanup(Db) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_db(Db) -> + ?assertEqual(ok, fabric2_index:cleanup(Db)). + + +db_with_no_ddocs(Db) -> + create_docs(Db, 10), + ?assertEqual(ok, fabric2_index:cleanup(Db)). + + +db_with_ddoc(Db) -> + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + ?assertEqual(10, length(run_query(Db, DDoc))), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + ?assertEqual(10, length(run_query(Db, DDoc))). + + +db_with_many_ddocs(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 5), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + ?assertEqual(ok, fabric2_index:cleanup(Db)). + + +after_ddoc_deletion(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 2), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + [ToDel | RestDDocs] = DDocs, + delete_doc(Db, ToDel), + % Not yet cleaned up + ?assertEqual(true, view_has_data(Db, ToDel)), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + ?assertError({ddoc_deleted, _}, run_query(Db, ToDel)), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, RestDDocs). + + +all_ddocs_deleted(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 5), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + lists:foreach(fun(DDoc) -> + delete_doc(Db, DDoc) + end, DDocs), + % Not yet cleaned up + lists:foreach(fun(DDoc) -> + ?assertEqual(true, view_has_data(Db, DDoc)) + end, DDocs), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + lists:foreach(fun(DDoc) -> + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc)) + end, DDocs). + + +after_ddoc_recreated(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 3), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + [ToDel | RestDDocs] = DDocs, + Deleted = delete_doc(Db, ToDel), + % Not yet cleaned up + ?assertEqual(true, view_has_data(Db, ToDel)), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + ?assertError({ddoc_deleted, _}, run_query(Db, ToDel)), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, RestDDocs), + recreate_doc(Db, Deleted), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs). + + +refcounted_sigs(Db) -> + create_docs(Db, 10), + DDoc1 = create_ddoc(Db, <<"1">>), + DDoc2 = create_doc(Db, <<"_design/2">>, DDoc1#doc.body), + ?assertEqual(10, length(run_query(Db, DDoc1))), + ?assertEqual(10, length(run_query(Db, DDoc2))), + + ?assertEqual(true, view_has_data(Db, DDoc1)), + ?assertEqual(true, view_has_data(Db, DDoc2)), + + delete_doc(Db, DDoc1), + ok = fabric2_index:cleanup(Db), + + ?assertEqual(true, view_has_data(Db, DDoc1)), + ?assertEqual(true, view_has_data(Db, DDoc2)), + + delete_doc(Db, DDoc2), + ok = fabric2_index:cleanup(Db), + + ?assertEqual(false, view_has_data(Db, DDoc1)), + ?assertEqual(false, view_has_data(Db, DDoc2)). + + +removes_old_jobs(Db) -> + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + + ?assertEqual(10, length(run_query(Db, DDoc))), + ?assertEqual(true, view_has_data(Db, DDoc)), + ?assertEqual(true, job_exists(Db, DDoc)), + + delete_doc(Db, DDoc), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + + ?assertEqual(false, view_has_data(Db, DDoc)), + ?assertEqual(false, job_exists(Db, DDoc)). + + +after_job_accepted_initial_build(Db) -> + cleanup_during_initial_build(Db, fun meck_intercept_job_accept/2). + + +after_job_accepted_rebuild(Db) -> + cleanup_during_rebuild(Db, fun meck_intercept_job_accept/2). + + +during_index_initial_build(Db) -> + cleanup_during_initial_build(Db, fun meck_intercept_job_update/2). + + +during_index_rebuild(Db) -> + cleanup_during_rebuild(Db, fun meck_intercept_job_update/2). + + +cleanup_during_initial_build(Db, InterruptFun) -> + InterruptFun(fabric2_db:name(Db), self()), + + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + + {_, Ref1} = spawn_monitor(fun() -> run_query(Db, DDoc) end), + + receive {JobPid, triggered} -> ok end, + delete_doc(Db, DDoc), + ok = fabric2_index:cleanup(Db), + JobPid ! continue, + + receive {'DOWN', Ref1, _, _, _} -> ok end, + + ok = fabric2_index:cleanup(Db), + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc)), + + ?assertEqual(false, view_has_data(Db, DDoc)), + ?assertEqual(false, job_exists(Db, DDoc)). + + +cleanup_during_rebuild(Db, InterruptFun) -> + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + ?assertEqual(10, length(run_query(Db, DDoc))), + + InterruptFun(fabric2_db:name(Db), self()), + + create_docs(Db, 10, 10), + + {_, Ref1} = spawn_monitor(fun() -> run_query(Db, DDoc) end), + + receive {JobPid, triggered} -> ok end, + delete_doc(Db, DDoc), + ok = fabric2_index:cleanup(Db), + JobPid ! continue, + + receive {'DOWN', Ref1, _, _, _} -> ok end, + + ok = fabric2_index:cleanup(Db), + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc)), + + ?assertEqual(false, view_has_data(Db, DDoc)), + ?assertEqual(false, job_exists(Db, DDoc)). + + + +run_query(Db, DDocId) when is_binary(DDocId) -> + {ok, DDoc} = fabric2_db:open_doc(Db, <<"_design/", DDocId/binary>>), + run_query(Db, DDoc); + +run_query(Db, DDoc) -> + Fun = fun default_cb/2, + {ok, Result} = couch_views:query(Db, DDoc, <<"bar">>, Fun, [], #{}), + Result. + + +default_cb(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_cb({final, Info}, []) -> + {ok, [Info]}; +default_cb({final, _}, Acc) -> + {ok, Acc}; +default_cb({meta, _}, Acc) -> + {ok, Acc}; +default_cb(ok, ddoc_updated) -> + {ok, ddoc_updated}; +default_cb(Row, Acc) -> + {ok, [Row | Acc]}. + + +view_has_data(Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + SigKeyTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}, + SigKey = erlfdb_tuple:pack(SigKeyTuple, DbPrefix), + SigVal = erlfdb:wait(erlfdb:get(Tx, SigKey)), + + RangeKeyTuple = {?DB_VIEWS, ?VIEW_DATA, Sig}, + RangeKey = erlfdb_tuple:pack(RangeKeyTuple, DbPrefix), + Range = erlfdb:wait(erlfdb:get_range_startswith(Tx, RangeKey)), + + SigVal /= not_found andalso Range /= [] + end). + + +meck_intercept_job_accept(TgtDbName, ParentPid) -> + meck:new(fabric2_db, [passthrough]), + meck:expect(fabric2_db, open, fun + (DbName, Opts) when DbName == TgtDbName -> + Result = meck:passthrough([DbName, Opts]), + ParentPid ! {self(), triggered}, + receive continue -> ok end, + meck:unload(), + Result; + (DbName, Opts) -> + meck:passthrough([DbName, Opts]) + end). + + +meck_intercept_job_update(_DbName, ParentPid) -> + meck:new(couch_jobs, [passthrough]), + meck:expect(couch_jobs, finish, fun(Tx, Job, Data) -> + ParentPid ! {self(), triggered}, + receive continue -> ok end, + Result = meck:passthrough([Tx, Job, Data]), + meck:unload(), + Result + end). + + +create_ddoc(Db, Id) -> + MapFunFmt = "function(doc) {var f = \"~s\"; emit(doc.val, f)}", + MapFun = io_lib:format(MapFunFmt, [Id]), + Body = {[ + {<<"views">>, {[ + {<<"bar">>, {[{<<"map">>, iolist_to_binary(MapFun)}]}} + ]}} + ]}, + create_doc(Db, <<"_design/", Id/binary>>, Body). + + +recreate_doc(Db, #doc{deleted = true} = Doc) -> + #doc{ + id = DDocId, + body = Body + } = Doc, + create_doc(Db, DDocId, Body). + + +create_ddocs(Db, Count) when is_integer(Count), Count > 1 -> + lists:map(fun(Seq) -> + Id = io_lib:format("~6..0b", [Seq]), + create_ddoc(Db, iolist_to_binary(Id)) + end, lists:seq(1, Count)). + + +create_doc(Db, Id) -> + create_doc(Db, Id, {[{<<"value">>, Id}]}). + + +create_doc(Db, Id, Body) -> + Doc = #doc{ + id = Id, + body = Body + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc), + Doc#doc{revs = {Pos, [Rev]}}. + + +create_docs(Db, Count) -> + create_docs(Db, Count, 0). + + +create_docs(Db, Count, Offset) -> + lists:map(fun(Seq) -> + Id = io_lib:format("~6..0b", [Seq]), + create_doc(Db, iolist_to_binary(Id)) + end, lists:seq(Offset + 1, Offset + Count)). + + +delete_doc(Db, DDoc) -> + #doc{ + revs = {_, [_ | _] = Revs} + } = DDoc, + {ok, {NewPos, Rev}} = fabric2_db:update_doc(Db, DDoc#doc{deleted = true}), + DDoc#doc{ + revs = {NewPos, [Rev | Revs]}, + deleted = true + }. + + +job_exists(Db, DDoc) -> + JobId = job_id(Db, DDoc), + case couch_jobs:get_job_data(Db, ?INDEX_JOB_TYPE, JobId) of + {ok, _} -> true; + {error, not_found} -> false + end. + + +job_id(Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + HexSig = fabric2_util:to_hex(Sig), + <>. -- cgit v1.2.1 From 2e78bebf2e40486681ed9dafa0e5f552de06a910 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Wed, 1 Apr 2020 17:10:56 +0200 Subject: Port recreate docs test --- test/elixir/README.md | 2 +- test/elixir/test/recreate_doc_test.exs | 165 +++++++++++++++++++++++++++++++++ test/javascript/tests/recreate_doc.js | 1 + 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/recreate_doc_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 0bd69660b..32add2aba 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -63,7 +63,7 @@ X means done, - means partially - [X] Port proxyauth.js - [X] Port purge.js - [ ] Port reader_acl.js - - [ ] Port recreate_doc.js + - [X] Port recreate_doc.js - [X] Port reduce_builtin.js - [X] Port reduce_false.js - [ ] Port reduce_false_temp.js diff --git a/test/elixir/test/recreate_doc_test.exs b/test/elixir/test/recreate_doc_test.exs new file mode 100644 index 000000000..08f92293e --- /dev/null +++ b/test/elixir/test/recreate_doc_test.exs @@ -0,0 +1,165 @@ +defmodule RecreateDocTest do + use CouchTestCase + + @moduletag :recreate_doc + + @moduledoc """ + Test CouchDB document recreation + This is a port of the recreate_doc.js suite + """ + + @tag :with_db + test "recreate document", context do + db_name = context[:db_name] + + # First create a new document with the ID "foo", and delete it again + doc = %{_id: "foo", a: "bar", b: 42} + {:ok, resp} = create_doc(db_name, doc) + first_rev = resp.body["rev"] + + resp = Couch.delete("/#{db_name}/foo?rev=#{first_rev}") + assert resp.status_code == 200 + + # Now create a new document with the same ID, save it, and then modify it + doc = %{_id: "foo"} + + for _i <- 0..9 do + {:ok, _} = create_doc(db_name, doc) + resp = Couch.get("/#{db_name}/foo") + + updated_doc = + resp.body + |> Map.put("a", "baz") + + resp = Couch.put("/#{db_name}/foo", body: updated_doc) + assert resp.status_code == 201 + rev = resp.body["rev"] + resp = Couch.delete("/#{db_name}/foo?rev=#{rev}") + assert resp.status_code == 200 + end + end + + @tag :with_db + test "COUCHDB-292 - recreate a deleted document", context do + db_name = context[:db_name] + # First create a new document with the ID "foo", and delete it again + doc = %{_id: "foo", a: "bar", b: 42} + {:ok, resp} = create_doc(db_name, doc) + first_rev = resp.body["rev"] + + resp = Couch.delete("/#{db_name}/foo?rev=#{first_rev}") + assert resp.status_code == 200 + + # COUCHDB-292 now attempt to save the document with a prev that's since + # been deleted and this should generate a conflict exception + updated_doc = + doc + |> Map.put(:_rev, first_rev) + + resp = Couch.put("/#{db_name}/foo", body: updated_doc) + assert resp.status_code == 409 + + # same as before, but with binary + bin_att_doc = %{ + _id: "foo", + _rev: first_rev, + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + } + } + } + + resp = Couch.put("/#{db_name}/foo", body: bin_att_doc) + assert resp.status_code == 409 + end + + @tag :with_db + test "Recreate a deleted document with non-exsistant rev", context do + db_name = context[:db_name] + + doc = %{_id: "foo", a: "bar", b: 42} + {:ok, resp} = create_doc(db_name, doc) + first_rev = resp.body["rev"] + + resp = Couch.delete("/#{db_name}/foo?rev=#{first_rev}") + assert resp.status_code == 200 + + # random non-existant prev rev + updated_doc = + doc + |> Map.put(:_rev, "1-asfafasdf") + + resp = Couch.put("/#{db_name}/foo", body: updated_doc) + assert resp.status_code == 409 + + # random non-existant prev rev with bin + bin_att_doc = %{ + _id: "foo", + _rev: "1-aasasfasdf", + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + } + } + } + + resp = Couch.put("/#{db_name}/foo", body: bin_att_doc) + assert resp.status_code == 409 + end + + @tag :with_db + test "COUCHDB-1265 - changes feed after we try and break the update_seq tree", + context do + db_name = context[:db_name] + + # Test COUCHDB-1265 - Reinserting an old revision into the revision tree causes + # duplicates in the update_seq tree. + revs = create_rev_doc(db_name, "a", 3) + + resp = + Couch.put("/#{db_name}/a", + body: Enum.at(revs, 0), + query: [new_edits: false] + ) + + assert resp.status_code == 201 + + resp = + Couch.put("/#{db_name}/a", + body: Enum.at(revs, -1) + ) + + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/_changes") + assert resp.status_code == 200 + + assert length(resp.body["results"]) == 1 + end + + # function to create a doc with multiple revisions + defp create_rev_doc(db_name, id, num_revs) do + doc = %{_id: id, count: 0} + {:ok, resp} = create_doc(db_name, doc) + create_rev_doc(db_name, id, num_revs, [Map.put(doc, :_rev, resp.body["rev"])]) + end + + defp create_rev_doc(db_name, id, num_revs, revs) do + if length(revs) < num_revs do + doc = %{_id: id, _rev: Enum.at(revs, -1)[:_rev], count: length(revs)} + {:ok, resp} = create_doc(db_name, doc) + + create_rev_doc( + db_name, + id, + num_revs, + revs ++ [Map.put(doc, :_rev, resp.body["rev"])] + ) + else + revs + end + end +end diff --git a/test/javascript/tests/recreate_doc.js b/test/javascript/tests/recreate_doc.js index 154a6e45b..1aa44ede8 100644 --- a/test/javascript/tests/recreate_doc.js +++ b/test/javascript/tests/recreate_doc.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.recreate_doc = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}, {"w": 3}); -- cgit v1.2.1 From 4e2f18c03e478855e927d9d0ae7bd757427c2edd Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 14 Apr 2020 03:06:25 -0700 Subject: Merge keys from rebar.config This change allows creation of local `src/couch/rebar.config` and `rebar.config` files to set additional configuration options. This is useful for: - disabling deprecation warnings `{nowarn_deprecated_function, MFAs}` - control debugging in eunit tests - `DEBUG` - `{eunit_compile_opts, [{d, DEBUG, true}]}` - `NODEBUG` - `{eunit_compile_opts, [{d, NODEBUG, true}]}` --- .gitignore | 1 + rebar.config.script | 9 +++++++-- src/couch/.gitignore | 2 ++ src/couch/rebar.config.script | 7 ++++++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 955403a98..cd4608809 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ ebin/ erl_crash.dump erln8.config install.mk +rebar.config rel/*.config rel/couchdb rel/dev* diff --git a/rebar.config.script b/rebar.config.script index 6f9f65c73..b3ea2c933 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -223,6 +223,11 @@ AddConfig = [ {post_hooks, [{compile, "escript support/build_js.escript"}]} ], -C = lists:foldl(fun({K, V}, CfgAcc) -> - lists:keystore(K, 1, CfgAcc, {K, V}) +lists:foldl(fun({K, V}, CfgAcc) -> + case lists:keyfind(K, 1, CfgAcc) of + {K, Existent} when is_list(Existent) andalso is_list(V) -> + lists:keystore(K, 1, CfgAcc, {K, Existent ++ V}); + false -> + lists:keystore(K, 1, CfgAcc, {K, V}) + end end, CONFIG, AddConfig). diff --git a/src/couch/.gitignore b/src/couch/.gitignore index e1fa65333..861974adb 100644 --- a/src/couch/.gitignore +++ b/src/couch/.gitignore @@ -19,3 +19,5 @@ test/engines/log/ .rebar/ .eunit + +rebar.config diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index 91e24d99e..80e6bd12e 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -229,5 +229,10 @@ AddConfig = [ ]. lists:foldl(fun({K, V}, CfgAcc) -> - lists:keystore(K, 1, CfgAcc, {K, V}) + case lists:keyfind(K, 1, CfgAcc) of + {K, Existent} when is_list(Existent) andalso is_list(V) -> + lists:keystore(K, 1, CfgAcc, {K, Existent ++ V}); + false -> + lists:keystore(K, 1, CfgAcc, {K, V}) + end end, CONFIG, AddConfig). -- cgit v1.2.1 From 522627eb88d8a280b62a125cf008991438848865 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 26 Feb 2019 18:16:50 +0000 Subject: Integrate emilio - erang linter --- .gitignore | 1 + Makefile | 6 ++- Makefile.win | 6 ++- bin/warnings_in_scope | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++ configure | 13 ++++++ configure.ps1 | 14 ++++++ emilio.config | 20 ++++++++ 7 files changed, 183 insertions(+), 2 deletions(-) create mode 100755 bin/warnings_in_scope create mode 100644 emilio.config diff --git a/.gitignore b/.gitignore index 60e6d145a..3cfa3721e 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ src/couch/priv/couch_js/**/*.d src/couch/priv/icu_driver/couch_icu_driver.d src/mango/src/mango_cursor_text.nocompile src/docs/ +src/emilio/ src/ets_lru/ src/excoveralls/ src/fauxton/ diff --git a/Makefile b/Makefile index 97fc97c85..fff1df528 100644 --- a/Makefile +++ b/Makefile @@ -147,6 +147,7 @@ fauxton: share/www .PHONY: check # target: check - Test everything check: all python-black + @$(MAKE) emilio @$(MAKE) eunit @$(MAKE) javascript @$(MAKE) mango-test @@ -198,6 +199,9 @@ soak-eunit: couch @$(REBAR) setup_eunit 2> /dev/null while [ $$? -eq 0 ] ; do $(REBAR) -r eunit $(EUNIT_OPTS) ; done +emilio: + @bin/emilio -c emilio.config src/ | bin/warnings_in_scope -s 3 + .venv/bin/black: @python3 -m venv .venv @.venv/bin/pip3 install black || touch .venv/bin/black @@ -260,7 +264,7 @@ elixir-credo: elixir-init .PHONY: javascript # target: javascript - Run JavaScript test suites or specific ones defined by suites option javascript: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 -javascript: +javascript: @$(MAKE) devclean @mkdir -p share/www/script/test diff --git a/Makefile.win b/Makefile.win index bdecc7315..0fc4d91c7 100644 --- a/Makefile.win +++ b/Makefile.win @@ -134,6 +134,7 @@ fauxton: share\www .PHONY: check # target: check - Test everything check: all python-black + @$(MAKE) emilio @$(MAKE) eunit @$(MAKE) javascript @$(MAKE) mango-test @@ -175,6 +176,9 @@ just-eunit: export ERL_AFLAGS = "-config $(shell echo %cd%)/rel/files/eunit.conf just-eunit: @$(REBAR) -r eunit $(EUNIT_OPTS) +emilio: + @bin\emilio -c emilio.config src\ | python.exe bin\warnings_in_scope -s 3 + .venv/bin/black: @python.exe -m venv .venv @.venv\Scripts\pip3.exe install black || copy /b .venv\Scripts\black.exe +,, @@ -359,7 +363,7 @@ install: release @echo . @echo To install CouchDB into your system, copy the rel\couchdb @echo to your desired installation location. For example: - @echo xcopy /E rel\couchdb C:\CouchDB\ + @echo xcopy /E rel\couchdb C:\CouchDB\ @echo . ################################################################################ diff --git a/bin/warnings_in_scope b/bin/warnings_in_scope new file mode 100755 index 000000000..2a854211a --- /dev/null +++ b/bin/warnings_in_scope @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +import os +import subprocess +from pathlib import Path +import optparse +import sys +import re + +def run(command, cwd=None): + try: + return subprocess.Popen( + command, shell=True, cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + except OSError as err: + raise OSError("Error in command '{0}': {1}".format(command, err)) + +def parse_location(line): + # take substring between @@ + # take second part of it + location = line.split(b'@@')[1].strip().split(b' ')[1] + tokens = location.split(b',') + if len(tokens) == 1: + return (int(tokens[0][1:]), 1) + elif len(tokens) == 2: + return (int(tokens[0][1:]), int(tokens[1])) + +def changed_files(directory, scope): + result = {} + proc = run('git diff --no-prefix --unified={0}'.format(scope), cwd=str(directory)) + file_path = None + for line in iter(proc.stdout.readline, b''): + if line.startswith(b'diff --git '): + # this would be problematic if directory has space in the name + file_name = line.split(b' ')[3].strip() + file_path = str(directory.joinpath(str(file_name, 'utf-8'))) + result[file_path] = set() + continue + if line.startswith(b'@@'): + start_pos, number_of_lines = parse_location(line) + for line_number in range(start_pos, start_pos + number_of_lines): + result[file_path].add(line_number) + return result + +def print_changed(file_name, line_number): + print('{0}:{1}'.format(str(file_name), str(line_number))) + +def changes(dirs, scope): + result = {} + for directory in dirs: + result.update(changed_files(directory, scope)) + return result + +def repositories(root): + for directory in Path(root).rglob('.git'): + if not directory.is_dir(): + continue + yield directory.parent + +def setup_argparse(): + parser = optparse.OptionParser(description="Filter output to remove unrelated warning") + parser.add_option( + "-r", + "--regexp", + dest="regexp", + default='(?P[^:]+):(?P\d+).*', + help="Regexp used to extract file_name and line number", + ) + parser.add_option( + "-s", + "--scope", + dest="scope", + default=0, + help="Number of lines surrounding the change we consider relevant", + ) + parser.add_option( + "-p", + "--print-only", + action="store_true", + dest="print_only", + default=False, + help="Print changed lines only", + ) + return parser.parse_args() + +def filter_stdin(regexp, changes): + any_matches = False + for line in iter(sys.stdin.readline, ''): + matches = re.match(regexp, line) + if matches: + file_name = matches.group('file_name') + line_number = int(matches.group('line')) + if file_name in changes and line_number in changes[file_name]: + print(line, end='') + any_matches = True + return any_matches + +def validate_regexp(regexp): + index = regexp.groupindex + if 'file_name' in index and 'line' in index: + return True + else: + raise TypeError("Regexp must define following groups:\n - file_name\n - line") + +def main(): + opts, args = setup_argparse() + if opts.print_only: + for file_name, changed_lines in changes(repositories('.'), opts.scope).items(): + for line_number in changed_lines: + print_changed(file_name, line_number) + return 0 + else: + regexp = re.compile(opts.regexp) + validate_regexp(regexp) + if filter_stdin(regexp, changes(repositories('.'), opts.scope)): + return 1 + else: + return 0 + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + pass + diff --git a/configure b/configure index 38e62e317..854366c8a 100755 --- a/configure +++ b/configure @@ -255,12 +255,25 @@ install_local_rebar() { fi } +install_local_emilio() { + if [ ! -x "${rootdir}/bin/emilio" ]; then + if [ ! -d "${rootdir}/src/emilio" ]; then + git clone --depth 1 https://github.com/cloudant-labs/emilio ${rootdir}/src/emilio + fi + cd ${rootdir}/src/emilio && ${REBAR} compile escriptize; cd ${rootdir} + mv ${rootdir}/src/emilio/emilio ${rootdir}/bin/emilio + chmod +x ${rootdir}/bin/emilio + cd ${rootdir}/src/emilio && ${REBAR} clean; cd ${rootdir} + fi +} if [ -z "${REBAR}" ]; then install_local_rebar REBAR=${rootdir}/bin/rebar fi +install_local_emilio + # only update dependencies, when we are not in a release tarball if [ -d .git -a $SKIP_DEPS -ne 1 ]; then echo "==> updating dependencies" diff --git a/configure.ps1 b/configure.ps1 index c74fbcf41..65f8517d6 100644 --- a/configure.ps1 +++ b/configure.ps1 @@ -205,6 +205,20 @@ if ((Get-Command "rebar.cmd" -ErrorAction SilentlyContinue) -eq $null) $env:Path += ";$rootdir\bin" } +# check for emilio; if not found, get it and build it +if ((Get-Command "emilio.cmd" -ErrorAction SilentlyContinue) -eq $null) +{ + Write-Verbose "==> emilio.cmd not found; bootstrapping..." + if (-Not (Test-Path "src\emilio")) + { + git clone --depth 1 https://github.com/wohali/emilio $rootdir\src\emilio + } + cmd /c "cd $rootdir\src\emilio && rebar compile escriptize; cd $rootdir" + cp $rootdir\src\emilio\emilio $rootdir\bin\emilio + cp $rootdir\src\emilio\bin\emilio.cmd $rootdir\bin\emilio.cmd + cmd /c "cd $rootdir\src\emilio && rebar clean; cd $rootdir" +} + # only update dependencies, when we are not in a release tarball if ( (Test-Path .git -PathType Container) -and (-not $SkipDeps) ) { Write-Verbose "==> updating dependencies" diff --git a/emilio.config b/emilio.config new file mode 100644 index 000000000..0dad93898 --- /dev/null +++ b/emilio.config @@ -0,0 +1,20 @@ +{ignore, [ + "src[\/]bear[\/]*", + "src[\/]b64url[\/]*", + "src[\/]docs[\/]*", + "src[\/]*[\/].eunit[\/]*", + "src[\/]fauxton[\/]*", + "src[\/]rebar[\/]*", + "src[\/]emilio[\/]*", + "src[\/]folsom[\/]*", + "src[\/]mochiweb[\/]*", + "src[\/]snappy[\/]*", + "src[\/]ssl_verify_fun[\/]*", + "src[\/]ibrowse[\/]*", + "src[\/]jiffy[\/]*", + "src[\/]meck[\/]*", + "src[\/]proper[\/]*", + "src[\/]recon[\/]*", + "src[\/]hyper[\/]*", + "src[\/]triq[\/]*" +]}. -- cgit v1.2.1 From 9da549ee9a063287436c44711ce4fd99c3ebc03c Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Tue, 26 Feb 2019 18:16:50 +0000 Subject: Integrate emilio - erang linter --- .gitignore | 1 + Makefile | 4 ++ Makefile.win | 6 ++- bin/warnings_in_scope | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++ configure | 13 ++++++ configure.ps1 | 14 ++++++ emilio.config | 20 ++++++++ 7 files changed, 182 insertions(+), 1 deletion(-) create mode 100755 bin/warnings_in_scope create mode 100644 emilio.config diff --git a/.gitignore b/.gitignore index 955403a98..e2d3eff6f 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,7 @@ src/couch/priv/couch_js/**/*.d src/couch/priv/icu_driver/couch_icu_driver.d src/mango/src/mango_cursor_text.nocompile src/docs/ +src/emilio/ src/erlfdb/ src/ets_lru/ src/excoveralls/ diff --git a/Makefile b/Makefile index ebdab2200..2e3b33889 100644 --- a/Makefile +++ b/Makefile @@ -162,6 +162,7 @@ endif .PHONY: check check: all + @$(MAKE) emilio make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs make exunit tests=src/couch_rate/test/exunit/ @@ -207,6 +208,9 @@ soak-eunit: couch @$(REBAR) setup_eunit 2> /dev/null while [ $$? -eq 0 ] ; do $(REBAR) -r eunit $(EUNIT_OPTS) ; done +emilio: + @bin/emilio -c emilio.config src/ | bin/warnings_in_scope -s 3 + .venv/bin/black: @python3 -m venv .venv @.venv/bin/pip3 install black || touch .venv/bin/black diff --git a/Makefile.win b/Makefile.win index 30ebe0ee3..885b7741c 100644 --- a/Makefile.win +++ b/Makefile.win @@ -134,6 +134,7 @@ fauxton: share\www .PHONY: check # target: check - Test everything check: all python-black + @$(MAKE) emilio @$(MAKE) eunit @$(MAKE) javascript @$(MAKE) mango-test @@ -175,6 +176,9 @@ just-eunit: export ERL_AFLAGS = "-config $(shell echo %cd%)/rel/files/eunit.conf just-eunit: @$(REBAR) -r eunit $(EUNIT_OPTS) +emilio: + @bin\emilio -c emilio.config src\ | python.exe bin\warnings_in_scope -s 3 + .venv/bin/black: @python.exe -m venv .venv @.venv\Scripts\pip3.exe install black || copy /b .venv\Scripts\black.exe +,, @@ -356,7 +360,7 @@ install: release @echo . @echo To install CouchDB into your system, copy the rel\couchdb @echo to your desired installation location. For example: - @echo xcopy /E rel\couchdb C:\CouchDB\ + @echo xcopy /E rel\couchdb C:\CouchDB\ @echo . ################################################################################ diff --git a/bin/warnings_in_scope b/bin/warnings_in_scope new file mode 100755 index 000000000..2a854211a --- /dev/null +++ b/bin/warnings_in_scope @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +import os +import subprocess +from pathlib import Path +import optparse +import sys +import re + +def run(command, cwd=None): + try: + return subprocess.Popen( + command, shell=True, cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + except OSError as err: + raise OSError("Error in command '{0}': {1}".format(command, err)) + +def parse_location(line): + # take substring between @@ + # take second part of it + location = line.split(b'@@')[1].strip().split(b' ')[1] + tokens = location.split(b',') + if len(tokens) == 1: + return (int(tokens[0][1:]), 1) + elif len(tokens) == 2: + return (int(tokens[0][1:]), int(tokens[1])) + +def changed_files(directory, scope): + result = {} + proc = run('git diff --no-prefix --unified={0}'.format(scope), cwd=str(directory)) + file_path = None + for line in iter(proc.stdout.readline, b''): + if line.startswith(b'diff --git '): + # this would be problematic if directory has space in the name + file_name = line.split(b' ')[3].strip() + file_path = str(directory.joinpath(str(file_name, 'utf-8'))) + result[file_path] = set() + continue + if line.startswith(b'@@'): + start_pos, number_of_lines = parse_location(line) + for line_number in range(start_pos, start_pos + number_of_lines): + result[file_path].add(line_number) + return result + +def print_changed(file_name, line_number): + print('{0}:{1}'.format(str(file_name), str(line_number))) + +def changes(dirs, scope): + result = {} + for directory in dirs: + result.update(changed_files(directory, scope)) + return result + +def repositories(root): + for directory in Path(root).rglob('.git'): + if not directory.is_dir(): + continue + yield directory.parent + +def setup_argparse(): + parser = optparse.OptionParser(description="Filter output to remove unrelated warning") + parser.add_option( + "-r", + "--regexp", + dest="regexp", + default='(?P[^:]+):(?P\d+).*', + help="Regexp used to extract file_name and line number", + ) + parser.add_option( + "-s", + "--scope", + dest="scope", + default=0, + help="Number of lines surrounding the change we consider relevant", + ) + parser.add_option( + "-p", + "--print-only", + action="store_true", + dest="print_only", + default=False, + help="Print changed lines only", + ) + return parser.parse_args() + +def filter_stdin(regexp, changes): + any_matches = False + for line in iter(sys.stdin.readline, ''): + matches = re.match(regexp, line) + if matches: + file_name = matches.group('file_name') + line_number = int(matches.group('line')) + if file_name in changes and line_number in changes[file_name]: + print(line, end='') + any_matches = True + return any_matches + +def validate_regexp(regexp): + index = regexp.groupindex + if 'file_name' in index and 'line' in index: + return True + else: + raise TypeError("Regexp must define following groups:\n - file_name\n - line") + +def main(): + opts, args = setup_argparse() + if opts.print_only: + for file_name, changed_lines in changes(repositories('.'), opts.scope).items(): + for line_number in changed_lines: + print_changed(file_name, line_number) + return 0 + else: + regexp = re.compile(opts.regexp) + validate_regexp(regexp) + if filter_stdin(regexp, changes(repositories('.'), opts.scope)): + return 1 + else: + return 0 + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + pass + diff --git a/configure b/configure index 38e62e317..854366c8a 100755 --- a/configure +++ b/configure @@ -255,12 +255,25 @@ install_local_rebar() { fi } +install_local_emilio() { + if [ ! -x "${rootdir}/bin/emilio" ]; then + if [ ! -d "${rootdir}/src/emilio" ]; then + git clone --depth 1 https://github.com/cloudant-labs/emilio ${rootdir}/src/emilio + fi + cd ${rootdir}/src/emilio && ${REBAR} compile escriptize; cd ${rootdir} + mv ${rootdir}/src/emilio/emilio ${rootdir}/bin/emilio + chmod +x ${rootdir}/bin/emilio + cd ${rootdir}/src/emilio && ${REBAR} clean; cd ${rootdir} + fi +} if [ -z "${REBAR}" ]; then install_local_rebar REBAR=${rootdir}/bin/rebar fi +install_local_emilio + # only update dependencies, when we are not in a release tarball if [ -d .git -a $SKIP_DEPS -ne 1 ]; then echo "==> updating dependencies" diff --git a/configure.ps1 b/configure.ps1 index c74fbcf41..65f8517d6 100644 --- a/configure.ps1 +++ b/configure.ps1 @@ -205,6 +205,20 @@ if ((Get-Command "rebar.cmd" -ErrorAction SilentlyContinue) -eq $null) $env:Path += ";$rootdir\bin" } +# check for emilio; if not found, get it and build it +if ((Get-Command "emilio.cmd" -ErrorAction SilentlyContinue) -eq $null) +{ + Write-Verbose "==> emilio.cmd not found; bootstrapping..." + if (-Not (Test-Path "src\emilio")) + { + git clone --depth 1 https://github.com/wohali/emilio $rootdir\src\emilio + } + cmd /c "cd $rootdir\src\emilio && rebar compile escriptize; cd $rootdir" + cp $rootdir\src\emilio\emilio $rootdir\bin\emilio + cp $rootdir\src\emilio\bin\emilio.cmd $rootdir\bin\emilio.cmd + cmd /c "cd $rootdir\src\emilio && rebar clean; cd $rootdir" +} + # only update dependencies, when we are not in a release tarball if ( (Test-Path .git -PathType Container) -and (-not $SkipDeps) ) { Write-Verbose "==> updating dependencies" diff --git a/emilio.config b/emilio.config new file mode 100644 index 000000000..0dad93898 --- /dev/null +++ b/emilio.config @@ -0,0 +1,20 @@ +{ignore, [ + "src[\/]bear[\/]*", + "src[\/]b64url[\/]*", + "src[\/]docs[\/]*", + "src[\/]*[\/].eunit[\/]*", + "src[\/]fauxton[\/]*", + "src[\/]rebar[\/]*", + "src[\/]emilio[\/]*", + "src[\/]folsom[\/]*", + "src[\/]mochiweb[\/]*", + "src[\/]snappy[\/]*", + "src[\/]ssl_verify_fun[\/]*", + "src[\/]ibrowse[\/]*", + "src[\/]jiffy[\/]*", + "src[\/]meck[\/]*", + "src[\/]proper[\/]*", + "src[\/]recon[\/]*", + "src[\/]hyper[\/]*", + "src[\/]triq[\/]*" +]}. -- cgit v1.2.1 From 36364516d32a368b5c58ee197f9c3fbb82394f81 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 15 Apr 2020 10:19:58 -0700 Subject: Enable configurable binary chunk size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the size of binary chunks used for values is fixed at the FDB imposed limit of 100kB, although they recommend using 10KB [1], (also note they subtly change units). This makes that value configurable, allowing e.g. benchmarks to compare performance of runs with varying chunk size. The cost is a ~10µs config lookup penalty each time data needs to be chunked. [1] https://www.foundationdb.org/files/record-layer-paper.pdf --- rel/overlay/etc/default.ini | 3 +++ src/fabric/include/fabric2.hrl | 2 +- src/fabric/src/fabric2_fdb.erl | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index e10a5a0c7..dfc67f7fb 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -239,6 +239,9 @@ port = 6984 ; ; Enable or disable automatic stale index removal in the auto-updater ;index_updater_remove_old_indices = false +; +; Byte size of binary chunks written to FDB values. Defaults to FDB max limit. +;binary_chunk_size = 100000 ; [rexi] ; buffer_count = 2000 diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 587b4f888..2e588f8a3 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -77,4 +77,4 @@ -define(TRANSACTION_CANCELLED, 1025). --define(BINARY_CHUNK_SIZE, 100000). +-define(DEFAULT_BINARY_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index d96c3ae60..53102d6e9 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -1628,12 +1628,16 @@ sum_rem_rev_sizes(RevInfos) -> chunkify_binary(Data) -> + chunkify_data(Data, binary_chunk_size()). + + +chunkify_data(Data, Size) -> case Data of <<>> -> []; - <> -> - [Head | chunkify_binary(Rest)]; - <<_/binary>> when size(Data) < ?BINARY_CHUNK_SIZE -> + <> -> + [Head | chunkify_data(Rest, Size)]; + <<_/binary>> when size(Data) < Size -> [Data] end. @@ -1988,6 +1992,11 @@ get_info_wait_int(#info_future{} = InfoFuture) -> [CProp | MProps]. +binary_chunk_size() -> + config:get_integer( + "fabric", "binary_chunk_size", ?DEFAULT_BINARY_CHUNK_SIZE). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). -- cgit v1.2.1 From 27cbad74404d6d1c6ecf5717cf70c438f6c03eea Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Wed, 15 Apr 2020 15:27:26 -0700 Subject: report changes stats intermittently (#2777) * report changes stats intermittently with boolean market Stats are reported at the end of a request. With changes feeds, sometimes the request can be long or forever. This commit allows stats to be reported intermittently via a configurable time in seconds. The report function can return a boolean whether stats was reported so that a reset may not necessarily be needed. --- src/chttpd/src/chttpd.erl | 7 +- src/chttpd/src/chttpd_db.erl | 25 +++++--- src/chttpd/src/chttpd_stats.erl | 96 +++++++++++++++++++++------- src/chttpd/test/eunit/chttpd_stats_tests.erl | 77 ++++++++++++++++++++++ 4 files changed, 171 insertions(+), 34 deletions(-) create mode 100644 src/chttpd/test/eunit/chttpd_stats_tests.erl diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 2641007f7..4640258a8 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -269,8 +269,9 @@ handle_request_int(MochiReq) -> before_request(HttpReq) -> ctrace:is_enabled() andalso start_span(HttpReq), try - chttpd_stats:init(), - chttpd_plugin:before_request(HttpReq) + {ok, HttpReq1} = chttpd_plugin:before_request(HttpReq), + chttpd_stats:init(HttpReq1), + {ok, HttpReq1} catch Tag:Error -> {error, catch_error(HttpReq, Tag, Error)} end. @@ -285,7 +286,7 @@ after_request(HttpReq, HttpResp0) -> {ok, HttpResp0#httpd_resp{status = aborted}} end, HttpResp2 = update_stats(HttpReq, HttpResp1), - chttpd_stats:report(HttpReq, HttpResp2), + chttpd_stats:report(HttpResp2), maybe_log(HttpReq, HttpResp2), HttpResp2. diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 8dd0c931b..8cfcfecaa 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -50,7 +50,8 @@ chunks_sent = 0, buffer = [], bufsize = 0, - threshold + threshold, + include_docs }). -define(IS_ALL_DOCS(T), ( @@ -117,7 +118,8 @@ handle_changes_req_tx(#httpd{}=Req, Db) -> Acc0 = #cacc{ feed = list_to_atom(Feed), mochi = Req, - threshold = Max + threshold = Max, + include_docs = ChangesArgs#changes_args.include_docs }, try ChangesFun({fun changes_callback/2, Acc0}) @@ -133,8 +135,9 @@ handle_changes_req_tx(#httpd{}=Req, Db) -> changes_callback(start, #cacc{feed = continuous} = Acc) -> {ok, Resp} = chttpd:start_delayed_json_response(Acc#cacc.mochi, 200), {ok, Acc#cacc{mochi = Resp, responding = true}}; -changes_callback({change, Change}, #cacc{feed = continuous} = Acc) -> - chttpd_stats:incr_rows(), +changes_callback({change, Change}, #cacc{feed = continuous, + include_docs = IncludeDocs} = Acc) -> + incr_stats_changes_feed(IncludeDocs), Data = [?JSON_ENCODE(Change) | "\n"], Len = iolist_size(Data), maybe_flush_changes_feed(Acc, Data, Len); @@ -157,8 +160,9 @@ changes_callback(start, #cacc{feed = eventsource} = Acc) -> ], {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, Headers), {ok, Acc#cacc{mochi = Resp, responding = true}}; -changes_callback({change, {ChangeProp}=Change}, #cacc{feed = eventsource} = Acc) -> - chttpd_stats:incr_rows(), +changes_callback({change, {ChangeProp}=Change}, + #cacc{feed = eventsource, include_docs = IncludeDocs} = Acc) -> + incr_stats_changes_feed(IncludeDocs), Seq = proplists:get_value(seq, ChangeProp), Chunk = [ "data: ", ?JSON_ENCODE(Change), @@ -189,8 +193,8 @@ changes_callback(start, Acc) -> FirstChunk = "{\"results\":[\n", {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [], FirstChunk), {ok, Acc#cacc{mochi = Resp, responding = true}}; -changes_callback({change, Change}, Acc) -> - chttpd_stats:incr_rows(), +changes_callback({change, Change}, #cacc{include_docs = IncludeDocs} = Acc) -> + incr_stats_changes_feed(IncludeDocs), Data = [Acc#cacc.prepend, ?JSON_ENCODE(Change)], Len = iolist_size(Data), maybe_flush_changes_feed(Acc, Data, Len); @@ -252,6 +256,11 @@ maybe_flush_changes_feed(Acc0, Data, Len) -> }, {ok, Acc}. +incr_stats_changes_feed(IncludeDocs) -> + chttpd_stats:incr_rows(), + if not IncludeDocs -> ok; true -> + chttpd_stats:incr_reads() + end. % Return the same response as if a compaction succeeded even though _compaction % isn't a valid operation in CouchDB >= 4.x anymore. This is mostly to not diff --git a/src/chttpd/src/chttpd_stats.erl b/src/chttpd/src/chttpd_stats.erl index 59ec9268d..27e9c3180 100644 --- a/src/chttpd/src/chttpd_stats.erl +++ b/src/chttpd/src/chttpd_stats.erl @@ -14,8 +14,8 @@ -export([ - init/0, - report/2, + init/1, + report/1, incr_reads/0, incr_reads/1, @@ -24,29 +24,40 @@ incr_writes/1, incr_rows/0, - incr_rows/1 + incr_rows/1, + + update_interval/1 ]). -record(st, { reads = 0, writes = 0, - rows = 0 + rows = 0, + reporter, + last_report_ts = 0, + interval, + request }). -define(KEY, chttpd_stats). +-define(INTERVAL_IN_SEC, 60). - -init() -> - put(?KEY, #st{}). +init(Request) -> + Reporter = config:get("chttpd", "stats_reporter"), + Time = erlang:monotonic_time(second), + Interval = config:get_integer("chttpd", "stats_reporting_interval", + ?INTERVAL_IN_SEC), + put(?KEY, #st{reporter = Reporter, last_report_ts = Time, + interval = Interval, request = Request}). -report(HttpReq, HttpResp) -> +report(HttpResp) -> try case get(?KEY) of #st{} = St -> - report(HttpReq, HttpResp, St); + report(HttpResp, St); _ -> ok end @@ -57,19 +68,18 @@ report(HttpReq, HttpResp) -> end. -report(HttpReq, HttpResp, St) -> - case config:get("chttpd", "stats_reporter") of - undefined -> - ok; - ModStr -> - Mod = list_to_existing_atom(ModStr), - #st{ - reads = Reads, - writes = Writes, - rows = Rows - } = St, - Mod:report(HttpReq, HttpResp, Reads, Writes, Rows) - end. +report(HttpResp, #st{reporter = undefined}) -> + ok; + +report(HttpResp, #st{reporter = Reporter} = St) -> + Mod = list_to_existing_atom(Reporter), + #st{ + reads = Reads, + writes = Writes, + rows = Rows, + request = HttpReq + } = St, + Mod:report(HttpReq, HttpResp, Reads, Writes, Rows). incr_reads() -> @@ -101,7 +111,47 @@ incr(Idx, Count) -> #st{} = St -> Total = element(Idx, St) + Count, NewSt = setelement(Idx, St, Total), - put(?KEY, NewSt); + put(?KEY, NewSt), + maybe_report_intermittent(St); + _ -> + ok + end. + + +maybe_report_intermittent(State) -> + #st{last_report_ts = LastTime, interval = Interval} = State, + CurrentTime = erlang:monotonic_time(second), + case CurrentTime - LastTime of + Change when Change >= Interval -> + % Since response is not available during the request, we set + % this undefined. Modules that call: + % Mod:report(HttpReq, HttpResp, Reads, Writes, Rows) should + % be aware of this. Mod:report should also return a boolean + % to indicate if reset should occur + case ?MODULE:report(undefined) of + true -> + reset_stats(State, CurrentTime); + _ -> + ok + end; _ -> ok end. + + +update_interval(Interval) -> + case get(?KEY) of + #st{} = St -> + put(?KEY, St#st{interval = Interval}); + _ -> + ok + end. + + +reset_stats(State, NewTime) -> + put(?KEY, State#st{ + reads = 0, + writes = 0, + rows = 0, + last_report_ts = NewTime + }). diff --git a/src/chttpd/test/eunit/chttpd_stats_tests.erl b/src/chttpd/test/eunit/chttpd_stats_tests.erl new file mode 100644 index 000000000..1742285a1 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_stats_tests.erl @@ -0,0 +1,77 @@ +-module(chttpd_stats_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +start() -> + ok = application:start(config), + ok = application:start(couch_log). + + +stop(_) -> + ok = application:stop(config), + ok = application:stop(couch_log). + + +setup() -> + ok = meck:new(chttpd_stats, [passthrough]). + + +teardown(_) -> + meck:unload(), + ok. + + + +chttpd_stats_test_() -> + { + "chttpd_stats tests", + { + setup, + fun start/0, + fun stop/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_reset/1, + fun test_no_reset/1 + ] + } + } + }. + + +test_reset(_) -> + ?_test(begin + chttpd_stats:init(undefined), + chttpd_stats:incr_rows(3), + chttpd_stats:incr_rows(), + chttpd_stats:incr_writes(5), + chttpd_stats:incr_writes(), + chttpd_stats:incr_reads(), + chttpd_stats:incr_reads(2), + State1 = get(chttpd_stats), + ?assertMatch({st, 3, 6, 4, _, _, _, _}, State1), + + ok = meck:expect(chttpd_stats, report, fun(_) -> true end), + % force a reset with 0 interval + chttpd_stats:update_interval(0), + % after this is called, the report should happen and rows should + % reset to 0 + chttpd_stats:incr_rows(), + ResetState = get(chttpd_stats), + ?assertMatch({st, 0, 0, 0, _, _, _, _}, ResetState) + end). + + +test_no_reset(_) -> + ?_test(begin + ok = meck:expect(chttpd_stats, report, fun(_) -> false end), + chttpd_stats:init(undefined), + chttpd_stats:update_interval(0), + chttpd_stats:incr_rows(), + State = get(chttpd_stats), + ?assertMatch({st, 0, 0, 1, _, _, _, _}, State) + end). -- cgit v1.2.1 From 0eb1a73af424aa6403c420f8f6ebb3fe4d760f62 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 15 Apr 2020 22:01:13 -0700 Subject: Refactor expiring cache FDB interface - Rename `clear_expired_range` to `clear_range_to` - Move `EXPIRING_CACHE` layer prefix into fabric2.hrl - Move primary key setting to just after key & value calculations - Factor out `get_val/2` to lookup a key from FDB and unpack the value - Factor out `prefixes/2` - Factor out `fold_range/5` --- .../src/couch_expiring_cache_fdb.erl | 60 +++++++++++++--------- .../src/couch_expiring_cache_server.erl | 2 +- src/fabric/include/fabric2.hrl | 1 + 3 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl index fa8508e14..dc337665c 100644 --- a/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl +++ b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl @@ -16,15 +16,15 @@ insert/6, lookup/3, clear_all/1, - clear_expired_range/3 + clear_range_to/3 ]). --define(EXPIRING_CACHE, 53). % coordinate with fabric2.hrl -define(PK, 1). -define(EXP, 2). +-include_lib("fabric/include/fabric2.hrl"). -include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). @@ -41,9 +41,9 @@ insert(#{jtx := true} = JTx, Name, Key, Val, StaleTS, ExpiresTS) -> #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), PK = primary_key(Name, Key, LayerPrefix), PV = erlfdb_tuple:pack({Val, StaleTS, ExpiresTS}), + ok = erlfdb:set(Tx, PK, PV), XK = expiry_key(ExpiresTS, Name, Key, LayerPrefix), XV = erlfdb_tuple:pack({}), - ok = erlfdb:set(Tx, PK, PV), ok = erlfdb:set(Tx, XK, XV). @@ -52,11 +52,10 @@ insert(#{jtx := true} = JTx, Name, Key, Val, StaleTS, ExpiresTS) -> lookup(#{jtx := true} = JTx, Name, Key) -> #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), PK = primary_key(Name, Key, LayerPrefix), - case erlfdb:wait(erlfdb:get(Tx, PK)) of + case get_val(Tx, PK) of not_found -> not_found; - Bin when is_binary(Bin) -> - {Val, StaleTS, ExpiresTS} = erlfdb_tuple:unpack(Bin), + {Val, StaleTS, ExpiresTS} -> Now = erlang:system_time(?TIME_UNIT), if Now < StaleTS -> {fresh, Val}; @@ -76,32 +75,30 @@ clear_all(Name) -> end). --spec clear_expired_range(Name :: binary(), EndTS :: ?TIME_UNIT, +-spec clear_range_to(Name :: binary(), EndTS :: ?TIME_UNIT, Limit :: non_neg_integer()) -> OldestTS :: ?TIME_UNIT. -clear_expired_range(Name, EndTS, Limit) when Limit > 0 -> - fabric2_fdb:transactional(fun(Tx) -> - LayerPrefix = fabric2_fdb:get_dir(Tx), - ExpiresPrefix = erlfdb_tuple:pack( - {?EXPIRING_CACHE, Name, ?EXP}, LayerPrefix), - fabric2_fdb:fold_range({tx, Tx}, ExpiresPrefix, fun({K, _V}, Acc) -> - Unpacked = erlfdb_tuple:unpack(K, ExpiresPrefix), - couch_log:debug("~p clearing ~p", [?MODULE, Unpacked]), - {ExpiresTS, Key} = Unpacked, - clear_expired(Tx, ExpiresTS, Name, Key, LayerPrefix), +clear_range_to(Name, EndTS, Limit) when Limit > 0 -> + fold_range(Name, EndTS, Limit, + fun(Tx, PK, XK, _Key, ExpiresTS, Acc) -> + ok = erlfdb:clear(Tx, PK), + ok = erlfdb:clear(Tx, XK), oldest_ts(ExpiresTS, Acc) - end, 0, [{end_key, EndTS}, {limit, Limit}]) - end). + end, 0). %% Private -clear_expired(Tx, ExpiresTS, Name, Key, Prefix) -> - PK = primary_key(Name, Key, Prefix), - XK = expiry_key(ExpiresTS, Name, Key, Prefix), - ok = erlfdb:clear(Tx, PK), - ok = erlfdb:clear(Tx, XK). +fold_range(Name, EndTS, Limit, Fun, Acc0) when Limit > 0 -> + fabric2_fdb:transactional(fun(Tx) -> + {LayerPrefix, ExpiresPrefix} = prefixes(Tx, Name), + fabric2_fdb:fold_range({tx, Tx}, ExpiresPrefix, fun({XK, _XV}, Acc) -> + {ExpiresTS, Key} = erlfdb_tuple:unpack(XK, ExpiresPrefix), + PK = primary_key(Name, Key, LayerPrefix), + Fun(Tx, PK, XK, Key, ExpiresTS, Acc) + end, Acc0, [{end_key, EndTS}, {limit, Limit}]) + end). oldest_ts(TS, 0) -> TS; % handle initial Acc = 0 case @@ -114,3 +111,18 @@ primary_key(Name, Key, Prefix) -> expiry_key(ExpiresTS, Name, Key, Prefix) -> erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?EXP, ExpiresTS, Key}, Prefix). + + +prefixes(Tx, Name) -> + Layer = fabric2_fdb:get_dir(Tx), + Expires = erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?EXP}, Layer), + {Layer, Expires}. + + +get_val(Tx, PK) -> + case erlfdb:wait(erlfdb:get(Tx, PK)) of + not_found -> + not_found; + Bin when is_binary(Bin) -> + erlfdb_tuple:unpack(Bin) + end. diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl index 99d386485..eb74e6eb2 100644 --- a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl +++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl @@ -82,7 +82,7 @@ handle_info(remove_expired, St) -> NowTS = erlang:system_time(?TIME_UNIT), OldestTS = max(OldestTS0, - couch_expiring_cache_fdb:clear_expired_range(Name, NowTS, BatchSize)), + couch_expiring_cache_fdb:clear_range_to(Name, NowTS, BatchSize)), Elapsed = erlang:system_time(?TIME_UNIT) - NowTS, {noreply, St#{ diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 2e588f8a3..bf3e2aa03 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -24,6 +24,7 @@ -define(DB_HCA, 2). -define(DELETED_DBS, 3). -define(DBS, 15). +-define(EXPIRING_CACHE, 53). -define(TX_IDS, 255). % Database Level: (LayerPrefix, ?DBS, DbPrefix, X, ...) -- cgit v1.2.1 From 4098c12abeffdf89988663ec9d17544f6509256f Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 15 Apr 2020 22:01:33 -0700 Subject: Clean up old expiry key on update insert When an existing key is inserted with different timestamps, the primary key is the same but the primary value is different from the existing one. Currently, this results in a new expiry key being inserted, but the old one is not deleted and lingers until it is removed by the inexorable advance of time via the `remove_expired` server messages. This checks whether there's already primary key for the inserted key, and if so, cleans up the existing expiry key before proceeding with the insert. --- .../src/couch_expiring_cache_fdb.erl | 27 ++++++++++++++++++++++ .../test/couch_expiring_cache_tests.erl | 19 +++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl index dc337665c..7c4ad8f6f 100644 --- a/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl +++ b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl @@ -13,6 +13,7 @@ -module(couch_expiring_cache_fdb). -export([ + get_range_to/3, insert/6, lookup/3, clear_all/1, @@ -40,6 +41,16 @@ insert(#{jtx := true} = JTx, Name, Key, Val, StaleTS, ExpiresTS) -> #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), PK = primary_key(Name, Key, LayerPrefix), + case get_val(Tx, PK) of + not_found -> + ok; + {_OldVal, _OldStaleTS, OldExpiresTS} -> + % Clean up current expiry key for this primary key. No + % need to clean up the existing primary key since it will + % be overwritten below. + OldXK = expiry_key(OldExpiresTS, Name, Key, LayerPrefix), + ok = erlfdb:clear(Tx, OldXK) + end, PV = erlfdb_tuple:pack({Val, StaleTS, ExpiresTS}), ok = erlfdb:set(Tx, PK, PV), XK = expiry_key(ExpiresTS, Name, Key, LayerPrefix), @@ -87,6 +98,22 @@ clear_range_to(Name, EndTS, Limit) when Limit > 0 -> end, 0). +-spec get_range_to(Name :: binary(), EndTS :: ?TIME_UNIT, + Limit :: non_neg_integer()) -> + [{Key :: binary(), Val :: binary()}]. +get_range_to(Name, EndTS, Limit) when Limit > 0 -> + fold_range(Name, EndTS, Limit, + fun(Tx, PK, _XK, Key, _ExpiresTS, Acc) -> + case get_val(Tx, PK) of + not_found -> + couch_log:error("~p:entry missing Key: ~p", [?MODULE, Key]), + Acc; + Val -> + [{Key, Val} | Acc] + end + end, []). + + %% Private diff --git a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl index aeb1df6f0..2e06fcc5a 100644 --- a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl +++ b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl @@ -66,7 +66,7 @@ teardown(#{pid := Pid}) -> simple_lifecycle(_) -> - ?_test(begin + {timeout, 10, ?_test(begin Now = erlang:system_time(?TIME_UNIT), StaleTS = Now + 100, ExpiresTS = Now + 200, @@ -76,14 +76,29 @@ simple_lifecycle(_) -> ?assertEqual(ok, couch_expiring_cache_fdb:clear_all(Name)), ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)), + ?assertEqual([], entries(Name)), ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val, StaleTS, ExpiresTS)), ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)), ok = wait_lookup(Name, Key, {stale, Val}), + + % Refresh the existing key with updated timestamps + ?assertEqual(ok, + couch_expiring_cache:insert(Name, Key, Val, + StaleTS + 100, ExpiresTS + 100)), + ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)), + ?assertEqual(1, length(entries(Name))), + ok = wait_lookup(Name, Key, {stale, Val}), ok = wait_lookup(Name, Key, expired), ok = wait_lookup(Name, Key, not_found), + ?assertEqual([], entries(Name)), ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)) - end). + end)}. + + +entries(Name) -> + FarFuture = erlang:system_time(?TIME_UNIT) * 2, + couch_expiring_cache_fdb:get_range_to(Name, FarFuture, _Limit=100). wait_lookup(Name, Key, Expect) -> -- cgit v1.2.1 From f71c4c0a2a9b8979508487cef8d7ec9bca222c78 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 14 Apr 2020 17:43:04 -0400 Subject: Allow using cached security and revs_limit properties By default, transactions are used to check metadata, and possibly reopen the db, to get a current db handle. However, if a `max_age` option is provided and db handle was checked less than `max_age` milliseconds ago, use properties from that cached handle instead. The main use of this feature be in pluggable authorization handlers where it might be necessary to inspect the security doc multiple times for the same request before a final decision is made. `revs_limit/1` was updated as well, mainly for consistency since it is almost identical to `get_security/1`. --- src/fabric/src/fabric2_db.erl | 36 ++++++++++++++++++++++++------- src/fabric/src/fabric2_fdb.erl | 8 +++++-- src/fabric/test/fabric2_db_misc_tests.erl | 34 ++++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 11 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 9b9efdac2..b94226190 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -50,7 +50,9 @@ get_instance_start_time/1, get_pid/1, get_revs_limit/1, + get_revs_limit/2, get_security/1, + get_security/2, get_update_seq/1, get_user_ctx/1, get_uuid/1, @@ -500,17 +502,21 @@ get_pid(#{}) -> get_revs_limit(#{} = Db) -> - #{revs_limit := RevsLimit} = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:ensure_current(TxDb) - end), - RevsLimit. + get_revs_limit(Db, []). + + +get_revs_limit(#{} = Db, Opts) -> + CurrentDb = get_cached_db(Db, Opts), + maps:get(revs_limit, CurrentDb). get_security(#{} = Db) -> - #{security_doc := SecDoc} = fabric2_fdb:transactional(Db, fun(TxDb) -> - fabric2_fdb:ensure_current(TxDb) - end), - SecDoc. + get_security(Db, []). + + +get_security(#{} = Db, Opts) -> + CurrentDb = get_cached_db(Db, Opts), + maps:get(security_doc, CurrentDb). get_update_seq(#{} = Db) -> @@ -2037,3 +2043,17 @@ open_json_doc(Db, DocId, OpenOpts, DocOpts) -> {ok, #doc{} = Doc} -> [{doc, couch_doc:to_json_obj(Doc, DocOpts)}] end. + + +get_cached_db(#{} = Db, Opts) when is_list(Opts) -> + MaxAge = fabric2_util:get_value(max_age, Opts, 0), + Now = erlang:monotonic_time(millisecond), + Age = Now - maps:get(check_current_ts, Db), + case Age < MaxAge of + true -> + Db; + false -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:ensure_current(TxDb) + end) + end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 53102d6e9..03f3bad82 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -231,6 +231,7 @@ create(#{} = Db0, Options) -> revs_limit => 1000, security_doc => {[]}, user_ctx => UserCtx, + check_current_ts => erlang:monotonic_time(millisecond), validate_doc_update_funs => [], before_doc_update => undefined, @@ -272,6 +273,7 @@ open(#{} = Db0, Options) -> security_doc => {[]}, user_ctx => UserCtx, + check_current_ts => erlang:monotonic_time(millisecond), % Place holders until we implement these % bits. @@ -1273,8 +1275,10 @@ check_db_version(#{} = Db, CheckDbVersion) -> case erlfdb:wait(erlfdb:get(Tx, DbVersionKey)) of DbVersion -> put(?PDICT_CHECKED_DB_IS_CURRENT, true), - on_commit(Tx, fun() -> fabric2_server:store(Db) end), - Db; + Now = erlang:monotonic_time(millisecond), + Db1 = Db#{check_current_ts := Now}, + on_commit(Tx, fun() -> fabric2_server:store(Db1) end), + Db1; _NewDBVersion -> fabric2_server:remove(maps:get(name, Db)), throw({?MODULE, reopen}) diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 19599823e..9c95ca565 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -38,6 +38,7 @@ misc_test_() -> ?TDEF(accessors), ?TDEF(set_revs_limit), ?TDEF(set_security), + ?TDEF(get_security_cached), ?TDEF(is_system_db), ?TDEF(validate_dbname), ?TDEF(validate_doc_ids), @@ -113,6 +114,30 @@ set_security({DbName, Db, _}) -> ?assertEqual(SecObj, fabric2_db:get_security(Db2)). +get_security_cached({DbName, Db, _}) -> + OldSecObj = fabric2_db:get_security(Db), + SecObj = {[ + {<<"admins">>, {[ + {<<"names">>, [<<"foo1">>]}, + {<<"roles">>, []} + ]}} + ]}, + + % Set directly so we don't auto-update the local cache + {ok, Db1} = fabric2_db:open(DbName, [?ADMIN_CTX]), + ?assertMatch({ok, #{}}, fabric2_fdb:transactional(Db1, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, security_doc, SecObj) + end)), + + {ok, Db2} = fabric2_db:open(DbName, [?ADMIN_CTX]), + ?assertEqual(OldSecObj, fabric2_db:get_security(Db2, [{max_age, 1000}])), + + timer:sleep(100), + ?assertEqual(SecObj, fabric2_db:get_security(Db2, [{max_age, 50}])), + + ?assertEqual(ok, fabric2_db:set_security(Db2, OldSecObj)). + + is_system_db({DbName, Db, _}) -> ?assertEqual(false, fabric2_db:is_system_db(Db)), ?assertEqual(false, fabric2_db:is_system_db_name("foo")), @@ -305,12 +330,19 @@ metadata_bump({DbName, _, _}) -> erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) end), + % Save timetamp before ensure_current/1 is called + TsBeforeEnsureCurrent = erlang:monotonic_time(millisecond), + % Perform a random operation which calls ensure_current {ok, _} = fabric2_db:get_db_info(Db), % Check that db handle in the cache got the new metadata version + % and that check_current_ts was updated CachedDb = fabric2_server:fetch(DbName, undefined), - ?assertMatch(#{md_version := NewMDVersion}, CachedDb). + ?assertMatch(#{ + md_version := NewMDVersion, + check_current_ts := Ts + } when Ts >= TsBeforeEnsureCurrent, CachedDb). db_version_bump({DbName, _, _}) -> -- cgit v1.2.1 From 0b8dfa6cc4ec45b4101c49f0eacbe3e2aff8fad4 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Fri, 17 Apr 2020 10:00:50 +0200 Subject: Fetch doc in same transaction as _all_doc row --- src/fabric/src/fabric2_db.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index b94226190..6e629f7b4 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -954,7 +954,7 @@ fold_docs(Db, UserFun, UserAcc0, Options) -> Row1 = case lists:keyfind(include_docs, 1, Options) of {include_docs, true} -> - Row0 ++ open_json_doc(Db, DocId, OpenOpts, DocOpts); + Row0 ++ open_json_doc(TxDb, DocId, OpenOpts, DocOpts); _ -> Row0 end, -- cgit v1.2.1 From f9dc8354ac1401d5e5973b52ed084d0b77028546 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 17 Apr 2020 20:12:25 -0500 Subject: Fix couchjs utf8 conversions (#2786) * Remove unused string conversion functions * Set UTF-8 encoding when compiling scripts * Encode JavaScript strings as UTF-8 for printing * Check that only strings are passed to print * Use builtin UTF-8 conversions in http.cpp * Add tests for couchjs UTF-8 support * Remove custom UTF-8 conversion functions We're now using 100% built-in functionality of SpiderMonkey to handle all UTF-8 conversions. * Report error messages at global scope Previously we weren't reporting any uncaught exceptions or compilation errors. This changes that to print any compilation errors or any uncaught exceptions with stack traces. The previous implementation of `couch_error` was attempting to call `String.replace` on the `stack` member string of the thrown exception. This likely never worked and attempting to fix I was unable to properly invoke the `String.replace` function. This changes the implementation to use the builtin stack formatting method instead. * Modernize sources to minimize changes for 68 These are a handful of changes that modernize various aspects of the couchjs 60 source files. Behaviorally they're all benign but will shorten the diff required for adding support for SpiderMonkey 68. Co-authored-by: Joan Touzet --- src/couch/priv/couch_js/60/http.cpp | 214 +++++++++-------------- src/couch/priv/couch_js/60/main.cpp | 69 ++++++-- src/couch/priv/couch_js/60/utf8.cpp | 301 -------------------------------- src/couch/priv/couch_js/60/utf8.h | 19 -- src/couch/priv/couch_js/60/util.cpp | 196 ++++++++++++--------- src/couch/priv/couch_js/60/util.h | 4 +- src/couch/test/eunit/couch_js_tests.erl | 140 +++++++++++++-- 7 files changed, 374 insertions(+), 569 deletions(-) delete mode 100644 src/couch/priv/couch_js/60/utf8.cpp delete mode 100644 src/couch/priv/couch_js/60/utf8.h diff --git a/src/couch/priv/couch_js/60/http.cpp b/src/couch/priv/couch_js/60/http.cpp index 9ab47b2f0..e1e44d622 100644 --- a/src/couch/priv/couch_js/60/http.cpp +++ b/src/couch/priv/couch_js/60/http.cpp @@ -18,7 +18,6 @@ #include #include #include "config.h" -#include "utf8.h" #include "util.h" // Soft dependency on cURL bindings because they're @@ -100,7 +99,6 @@ http_check_enabled() #ifdef XP_WIN #define strcasecmp _strcmpi #define strncasecmp _strnicmp -#define snprintf _snprintf #endif @@ -109,7 +107,7 @@ typedef struct curl_slist CurlHeaders; typedef struct { int method; - char* url; + std::string url; CurlHeaders* req_headers; int16_t last_status; } HTTPData; @@ -127,21 +125,15 @@ const char* METHODS[] = {"GET", "HEAD", "POST", "PUT", "DELETE", "COPY", "OPTION #define OPTIONS 6 -static bool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t blen); - - -static JSString* -str_from_binary(JSContext* cx, char* data, size_t length); +static bool go(JSContext* cx, JSObject* obj, HTTPData* http, std::string& body); bool http_ctor(JSContext* cx, JSObject* req) { - HTTPData* http = NULL; + HTTPData* http = new HTTPData(); bool ret = false; - http = (HTTPData*) malloc(sizeof(HTTPData)); if(!http) { JS_ReportErrorUTF8(cx, "Failed to create CouchHTTP instance."); @@ -149,7 +141,6 @@ http_ctor(JSContext* cx, JSObject* req) } http->method = -1; - http->url = NULL; http->req_headers = NULL; http->last_status = -1; @@ -159,7 +150,7 @@ http_ctor(JSContext* cx, JSObject* req) goto success; error: - if(http) free(http); + if(http) delete http; success: return ret; @@ -171,9 +162,8 @@ http_dtor(JSFreeOp* fop, JSObject* obj) { HTTPData* http = (HTTPData*) JS_GetPrivate(obj); if(http) { - if(http->url) free(http->url); if(http->req_headers) curl_slist_free_all(http->req_headers); - free(http); + delete http; } } @@ -182,56 +172,50 @@ bool http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc) { HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* method = NULL; int methid; - bool ret = false; if(!http) { JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; + return false; } - if(mth.isUndefined()) { - JS_ReportErrorUTF8(cx, "You must specify a method."); - goto done; + if(!mth.isString()) { + JS_ReportErrorUTF8(cx, "Method must be a string."); + return false; } - method = enc_string(cx, mth, NULL); - if(!method) { + std::string method; + if(!js_to_string(cx, JS::RootedValue(cx, mth), method)) { JS_ReportErrorUTF8(cx, "Failed to encode method."); - goto done; + return false; } for(methid = 0; METHODS[methid] != NULL; methid++) { - if(strcasecmp(METHODS[methid], method) == 0) break; + if(strcasecmp(METHODS[methid], method.c_str()) == 0) break; } if(methid > OPTIONS) { JS_ReportErrorUTF8(cx, "Invalid method specified."); - goto done; + return false; } http->method = methid; - if(url.isUndefined()) { - JS_ReportErrorUTF8(cx, "You must specify a URL."); - goto done; - } - - if(http->url != NULL) { - free(http->url); - http->url = NULL; + if(!url.isString()) { + JS_ReportErrorUTF8(cx, "URL must be a string"); + return false; } - http->url = enc_string(cx, url, NULL); - if(http->url == NULL) { + std::string urlstr; + if(!js_to_string(cx, JS::RootedValue(cx, url), urlstr)) { JS_ReportErrorUTF8(cx, "Failed to encode URL."); - goto done; + return false; } + http->url = urlstr; if(snc.isBoolean() && snc.isTrue()) { JS_ReportErrorUTF8(cx, "Synchronous flag must be false."); - goto done; + return false; } if(http->req_headers) { @@ -242,11 +226,7 @@ http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value // Disable Expect: 100-continue http->req_headers = curl_slist_append(http->req_headers, "Expect:"); - ret = true; - -done: - if(method) free(method); - return ret; + return true; } @@ -254,88 +234,60 @@ bool http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val) { HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* keystr = NULL; - char* valstr = NULL; - char* hdrbuf = NULL; - size_t hdrlen = -1; - bool ret = false; if(!http) { JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; + return false; } - if(name.isUndefined()) + if(!name.isString()) { - JS_ReportErrorUTF8(cx, "You must speciy a header name."); - goto done; + JS_ReportErrorUTF8(cx, "Header names must be strings."); + return false; } - keystr = enc_string(cx, name, NULL); - if(!keystr) + std::string keystr; + if(!js_to_string(cx, JS::RootedValue(cx, name), keystr)) { JS_ReportErrorUTF8(cx, "Failed to encode header name."); - goto done; + return false; } - if(val.isUndefined()) + if(!val.isString()) { - JS_ReportErrorUTF8(cx, "You must specify a header value."); - goto done; + JS_ReportErrorUTF8(cx, "Header values must be strings."); + return false; } - valstr = enc_string(cx, val, NULL); - if(!valstr) - { + std::string valstr; + if(!js_to_string(cx, JS::RootedValue(cx, val), valstr)) { JS_ReportErrorUTF8(cx, "Failed to encode header value."); - goto done; - } - - hdrlen = strlen(keystr) + strlen(valstr) + 3; - hdrbuf = (char*) malloc(hdrlen * sizeof(char)); - if(!hdrbuf) { - JS_ReportErrorUTF8(cx, "Failed to allocate header buffer."); - goto done; + return false; } - snprintf(hdrbuf, hdrlen, "%s: %s", keystr, valstr); - http->req_headers = curl_slist_append(http->req_headers, hdrbuf); - - ret = true; + std::string header = keystr + ": " + valstr; + http->req_headers = curl_slist_append(http->req_headers, header.c_str()); -done: - if(keystr) free(keystr); - if(valstr) free(valstr); - if(hdrbuf) free(hdrbuf); - return ret; + return true; } bool http_send(JSContext* cx, JSObject* req, JS::Value body) { HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* bodystr = NULL; - size_t bodylen = 0; - bool ret = false; if(!http) { JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; + return false; } - if(!body.isUndefined()) { - bodystr = enc_string(cx, body, &bodylen); - if(!bodystr) { - JS_ReportErrorUTF8(cx, "Failed to encode body."); - goto done; - } + std::string bodystr; + if(!js_to_string(cx, JS::RootedValue(cx, body), bodystr)) { + JS_ReportErrorUTF8(cx, "Failed to encode body."); + return false; } - ret = go(cx, req, http, bodystr, bodylen); - -done: - if(bodystr) free(bodystr); - return ret; + return go(cx, req, http, bodystr); } int @@ -395,7 +347,7 @@ typedef struct { HTTPData* http; JSContext* cx; JSObject* resp_headers; - char* sendbuf; + const char* sendbuf; size_t sendlen; size_t sent; int sent_once; @@ -417,10 +369,9 @@ static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data); static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data); static bool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) +go(JSContext* cx, JSObject* obj, HTTPData* http, std::string& body) { CurlState state; - char* referer; JSString* jsbody; bool ret = false; JS::Value tmp; @@ -431,8 +382,8 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) state.cx = cx; state.http = http; - state.sendbuf = body; - state.sendlen = bodylen; + state.sendbuf = body.c_str(); + state.sendlen = body.size(); state.sent = 0; state.sent_once = 0; @@ -463,13 +414,13 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) tmp = JS_GetReservedSlot(obj, 0); - if(!(referer = enc_string(cx, tmp, NULL))) { + std::string referer; + if(!js_to_string(cx, JS::RootedValue(cx, tmp), referer)) { JS_ReportErrorUTF8(cx, "Failed to encode referer."); if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; + return ret; } - curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer); - free(referer); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer.c_str()); if(http->method < 0 || http->method > OPTIONS) { JS_ReportErrorUTF8(cx, "INTERNAL: Unknown method."); @@ -490,15 +441,15 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); } - if(body && bodylen) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, bodylen); + if(body.size() > 0) { + curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, body.size()); } else { curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, 0); } // curl_easy_setopt(HTTP_HANDLE, CURLOPT_VERBOSE, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url.c_str()); curl_easy_setopt(HTTP_HANDLE, CURLOPT_HTTPHEADER, http->req_headers); curl_easy_setopt(HTTP_HANDLE, CURLOPT_READDATA, &state); curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKDATA, &state); @@ -532,12 +483,13 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) if(state.recvbuf) { state.recvbuf[state.read] = '\0'; - jsbody = dec_string(cx, state.recvbuf, state.read+1); + std::string bodystr(state.recvbuf, state.read); + jsbody = string_to_js(cx, bodystr); if(!jsbody) { // If we can't decode the body as UTF-8 we forcefully // convert it to a string by just forcing each byte // to a char16_t. - jsbody = str_from_binary(cx, state.recvbuf, state.read); + jsbody = JS_NewStringCopyN(cx, state.recvbuf, state.read); if(!jsbody) { if(!JS_IsExceptionPending(cx)) { JS_ReportErrorUTF8(cx, "INTERNAL: Failed to decode body."); @@ -572,7 +524,7 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) static size_t send_body(void *ptr, size_t size, size_t nmem, void *data) { - CurlState* state = (CurlState*) data; + CurlState* state = static_cast(data); size_t length = size * nmem; size_t towrite = state->sendlen - state->sent; @@ -598,19 +550,19 @@ send_body(void *ptr, size_t size, size_t nmem, void *data) static int seek_body(void* ptr, curl_off_t offset, int origin) { - CurlState* state = (CurlState*) ptr; + CurlState* state = static_cast(ptr); if(origin != SEEK_SET) return -1; - state->sent = (size_t) offset; - return (int) state->sent; + state->sent = static_cast(offset); + return static_cast(state->sent); } static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data) { - CurlState* state = (CurlState*) data; + CurlState* state = static_cast(data); char code[4]; - char* header = (char*) ptr; + char* header = static_cast(ptr); size_t length = size * nmem; JSString* hdr = NULL; uint32_t hdrlen; @@ -638,7 +590,8 @@ recv_header(void *ptr, size_t size, size_t nmem, void *data) } // Append the new header to our array. - hdr = dec_string(state->cx, header, length); + std::string hdrstr(header, length); + hdr = string_to_js(state->cx, hdrstr); if(!hdr) { return CURLE_WRITE_ERROR; } @@ -659,14 +612,17 @@ recv_header(void *ptr, size_t size, size_t nmem, void *data) static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data) { - CurlState* state = (CurlState*) data; + CurlState* state = static_cast(data); size_t length = size * nmem; char* tmp = NULL; if(!state->recvbuf) { state->recvlen = 4096; state->read = 0; - state->recvbuf = (char *)JS_malloc(state->cx, state->recvlen); + state->recvbuf = static_cast(JS_malloc( + state->cx, + state->recvlen + )); } if(!state->recvbuf) { @@ -676,7 +632,12 @@ recv_body(void *ptr, size_t size, size_t nmem, void *data) // +1 so we can add '\0' back up in the go function. size_t oldlen = state->recvlen; while(length+1 > state->recvlen - state->read) state->recvlen *= 2; - tmp = (char *) JS_realloc(state->cx, state->recvbuf, oldlen, state->recvlen); + tmp = static_cast(JS_realloc( + state->cx, + state->recvbuf, + oldlen, + state->recvlen + )); if(!tmp) return CURLE_WRITE_ERROR; state->recvbuf = tmp; @@ -685,23 +646,4 @@ recv_body(void *ptr, size_t size, size_t nmem, void *data) return length; } -JSString* -str_from_binary(JSContext* cx, char* data, size_t length) -{ - char16_t* conv = (char16_t*) JS_malloc(cx, length * sizeof(char16_t)); - JSString* ret = NULL; - size_t i; - - if(!conv) return NULL; - - for(i = 0; i < length; i++) { - conv[i] = (char16_t) data[i]; - } - - ret = JS_NewUCString(cx, conv, length); - if(!ret) JS_free(cx, conv); - - return ret; -} - #endif /* HAVE_CURL */ diff --git a/src/couch/priv/couch_js/60/main.cpp b/src/couch/priv/couch_js/60/main.cpp index b6157ed85..828b9dab5 100644 --- a/src/couch/priv/couch_js/60/main.cpp +++ b/src/couch/priv/couch_js/60/main.cpp @@ -28,7 +28,6 @@ #include "config.h" #include "http.h" -#include "utf8.h" #include "util.h" static bool enableSharedMemory = true; @@ -99,8 +98,9 @@ req_ctor(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_open(JSContext* cx, unsigned int argc, JS::Value* vp) { - JSObject* obj = JS_THIS_OBJECT(cx, vp); JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::Value vobj = args.computeThis(cx); + JSObject* obj = vobj.toObjectOrNull(); bool ret = false; if(argc == 2) { @@ -119,8 +119,9 @@ req_open(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_set_hdr(JSContext* cx, unsigned int argc, JS::Value* vp) { - JSObject* obj = JS_THIS_OBJECT(cx, vp); JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::Value vobj = args.computeThis(cx); + JSObject* obj = vobj.toObjectOrNull(); bool ret = false; if(argc == 2) { @@ -137,8 +138,9 @@ req_set_hdr(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_send(JSContext* cx, unsigned int argc, JS::Value* vp) { - JSObject* obj = JS_THIS_OBJECT(cx, vp); JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::Value vobj = args.computeThis(cx); + JSObject* obj = vobj.toObjectOrNull(); bool ret = false; if(argc == 1) { @@ -155,7 +157,9 @@ static bool req_status(JSContext* cx, unsigned int argc, JS::Value* vp) { JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - JSObject* obj = JS_THIS_OBJECT(cx, vp); + JS::Value vobj = args.computeThis(cx); + JSObject* obj = vobj.toObjectOrNull(); + int status = http_status(cx, obj); if(status < 0) @@ -169,8 +173,10 @@ static bool base_url(JSContext *cx, unsigned int argc, JS::Value* vp) { JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - JSObject* obj = JS_THIS_OBJECT(cx, vp); - couch_args *cargs = (couch_args*)JS_GetContextPrivate(cx); + JS::Value vobj = args.computeThis(cx); + JSObject* obj = vobj.toObjectOrNull(); + + couch_args *cargs = static_cast(JS_GetContextPrivate(cx)); JS::Value uri_val; bool rc = http_uri(cx, obj, cargs, &uri_val); args.rval().set(uri_val); @@ -226,9 +232,15 @@ evalcx(JSContext *cx, unsigned int argc, JS::Value* vp) if (!sandbox) return false; } - JS_BeginRequest(cx); + JSAutoRequest ar(cx); + if (!sandbox) { + sandbox = NewSandbox(cx, false); + if (!sandbox) + return false; + } + js::AutoStableStringChars strChars(cx); if (!strChars.initTwoByte(cx, str)) return false; @@ -237,12 +249,6 @@ evalcx(JSContext *cx, unsigned int argc, JS::Value* vp) size_t srclen = chars.length(); const char16_t* src = chars.begin().get(); - if (!sandbox) { - sandbox = NewSandbox(cx, false); - if (!sandbox) - return false; - } - if(srclen == 0) { args.rval().setObject(*sandbox); } else { @@ -283,7 +289,19 @@ static bool print(JSContext* cx, unsigned int argc, JS::Value* vp) { JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - couch_print(cx, argc, args); + + bool use_stderr = false; + if(argc > 1 && args[1].isTrue()) { + use_stderr = true; + } + + if(!args[0].isString()) { + JS_ReportErrorUTF8(cx, "Unable to print non-string value."); + return false; + } + + couch_print(cx, args[0], use_stderr); + args.rval().setUndefined(); return true; } @@ -386,7 +404,7 @@ static JSFunctionSpec global_functions[] = { static bool csp_allows(JSContext* cx) { - couch_args *args = (couch_args*)JS_GetContextPrivate(cx); + couch_args* args = static_cast(JS_GetContextPrivate(cx)); if(args->eval) { return true; } else { @@ -473,10 +491,18 @@ main(int argc, const char* argv[]) // Compile and run JS::CompileOptions options(cx); options.setFileAndLine(args->scripts[i], 1); + options.setUTF8(true); JS::RootedScript script(cx); if(!JS_CompileScript(cx, scriptsrc, slen, options, &script)) { - fprintf(stderr, "Failed to compile script.\n"); + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to compile script.\n"); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } return 1; } @@ -484,7 +510,14 @@ main(int argc, const char* argv[]) JS::RootedValue result(cx); if(JS_ExecuteScript(cx, script, &result) != true) { - fprintf(stderr, "Failed to execute script.\n"); + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to execute script.\n"); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } return 1; } diff --git a/src/couch/priv/couch_js/60/utf8.cpp b/src/couch/priv/couch_js/60/utf8.cpp deleted file mode 100644 index 38dfa6224..000000000 --- a/src/couch/priv/couch_js/60/utf8.cpp +++ /dev/null @@ -1,301 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#include -#include -#include -#include -#include "config.h" -#include "util.h" - -static int -enc_char(uint8_t *utf8Buffer, uint32_t ucs4Char) -{ - int utf8Length = 1; - - if (ucs4Char < 0x80) - { - *utf8Buffer = (uint8_t)ucs4Char; - } - else - { - int i; - uint32_t a = ucs4Char >> 11; - utf8Length = 2; - while(a) - { - a >>= 5; - utf8Length++; - } - i = utf8Length; - while(--i) - { - utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80); - ucs4Char >>= 6; - } - *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char); - } - - return utf8Length; -} - -static bool -enc_charbuf(const char16_t* src, size_t srclen, char* dst, size_t* dstlenp) -{ - size_t i; - size_t utf8Len; - size_t dstlen = *dstlenp; - size_t origDstlen = dstlen; - char16_t c; - char16_t c2; - uint32_t v; - uint8_t utf8buf[6]; - - if(!dst) - { - dstlen = origDstlen = (size_t) -1; - } - - while(srclen) - { - c = *src++; - srclen--; - - if(c <= 0xD7FF || c >= 0xE000) - { - v = (uint32_t) c; - } - else if(c >= 0xD800 && c <= 0xDBFF) - { - if(srclen < 1) goto buffer_too_small; - c2 = *src++; - srclen--; - if(c2 >= 0xDC00 && c2 <= 0xDFFF) - { - v = (uint32_t) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000); - } - else - { - // Invalid second half of surrogate pair - v = (uint32_t) 0xFFFD; - // Undo our character advancement - src--; - srclen++; - } - } - else - { - // Invalid first half surrogate pair - v = (uint32_t) 0xFFFD; - } - - if(v < 0x0080) - { - /* no encoding necessary - performance hack */ - if(!dstlen) goto buffer_too_small; - if(dst) *dst++ = (char) v; - utf8Len = 1; - } - else - { - utf8Len = enc_char(utf8buf, v); - if(utf8Len > dstlen) goto buffer_too_small; - if(dst) - { - for (i = 0; i < utf8Len; i++) - { - *dst++ = (char) utf8buf[i]; - } - } - } - dstlen -= utf8Len; - } - - *dstlenp = (origDstlen - dstlen); - return true; - -buffer_too_small: - *dstlenp = (origDstlen - dstlen); - return false; -} - -char* -enc_string(JSContext* cx, JS::Value arg, size_t* buflen) -{ - JSString* str = NULL; - const char16_t* src = NULL; - char* bytes = NULL; - size_t srclen = 0; - size_t byteslen = 0; - js::AutoStableStringChars rawChars(cx); - - str = arg.toString(); - if(!str) goto error; - - if (!rawChars.initTwoByte(cx, str)) - return NULL; - - src = rawChars.twoByteRange().begin().get(); - srclen = JS_GetStringLength(str); - - if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error; - - bytes = (char *)JS_malloc(cx, (byteslen) + 1); - bytes[byteslen] = 0; - - if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error; - - if(buflen) *buflen = byteslen; - goto success; - -error: - if(bytes != NULL) JS_free(cx, bytes); - bytes = NULL; - -success: - return bytes; -} - -static uint32_t -dec_char(const uint8_t *utf8Buffer, int utf8Length) -{ - uint32_t ucs4Char; - uint32_t minucs4Char; - - /* from Unicode 3.1, non-shortest form is illegal */ - static const uint32_t minucs4Table[] = { - 0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000 - }; - - if (utf8Length == 1) - { - ucs4Char = *utf8Buffer; - } - else - { - ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1); - minucs4Char = minucs4Table[utf8Length-2]; - while(--utf8Length) - { - ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F); - } - if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF) - { - ucs4Char = 0xFFFD; - } - } - - return ucs4Char; -} - -static bool -dec_charbuf(const char *src, size_t srclen, char16_t *dst, size_t *dstlenp) -{ - uint32_t v; - size_t offset = 0; - size_t j; - size_t n; - size_t dstlen = *dstlenp; - size_t origDstlen = dstlen; - - if(!dst) dstlen = origDstlen = (size_t) -1; - - while(srclen) - { - v = (uint8_t) *src; - n = 1; - - if(v & 0x80) - { - while(v & (0x80 >> n)) - { - n++; - } - - if(n > srclen) goto buffer_too_small; - if(n == 1 || n > 6) goto bad_character; - - for(j = 1; j < n; j++) - { - if((src[j] & 0xC0) != 0x80) goto bad_character; - } - - v = dec_char((const uint8_t *) src, n); - if(v >= 0x10000) - { - v -= 0x10000; - - if(v > 0xFFFFF || dstlen < 2) - { - *dstlenp = (origDstlen - dstlen); - return false; - } - - if(dstlen < 2) goto buffer_too_small; - - if(dst) - { - *dst++ = (char16_t)((v >> 10) + 0xD800); - v = (char16_t)((v & 0x3FF) + 0xDC00); - } - dstlen--; - } - } - - if(!dstlen) goto buffer_too_small; - if(dst) *dst++ = (char16_t) v; - - dstlen--; - offset += n; - src += n; - srclen -= n; - } - - *dstlenp = (origDstlen - dstlen); - return true; - -bad_character: - *dstlenp = (origDstlen - dstlen); - return false; - -buffer_too_small: - *dstlenp = (origDstlen - dstlen); - return false; -} - -JSString* -dec_string(JSContext* cx, const char* bytes, size_t byteslen) -{ - JSString* str = NULL; - char16_t* chars = NULL; - size_t charslen; - - if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) goto error; - - chars = (char16_t *)JS_malloc(cx, (charslen + 1) * sizeof(char16_t)); - if(!chars) return NULL; - chars[charslen] = 0; - - if(!dec_charbuf(bytes, byteslen, chars, &charslen)) goto error; - - str = JS_NewUCString(cx, chars, charslen - 1); - if(!str) goto error; - - goto success; - -error: - if(chars != NULL) JS_free(cx, chars); - str = NULL; - -success: - return str; -} diff --git a/src/couch/priv/couch_js/60/utf8.h b/src/couch/priv/couch_js/60/utf8.h deleted file mode 100644 index c8b1f4d82..000000000 --- a/src/couch/priv/couch_js/60/utf8.h +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#ifndef COUCH_JS_UTF_8_H -#define COUCH_JS_UTF_8_H - -char* enc_string(JSContext* cx, JS::Value arg, size_t* buflen); -JSString* dec_string(JSContext* cx, const char* buf, size_t buflen); - -#endif diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp index 92c6cbf4a..c37c41f2f 100644 --- a/src/couch/priv/couch_js/60/util.cpp +++ b/src/couch/priv/couch_js/60/util.cpp @@ -13,53 +13,76 @@ #include #include +#include + #include #include +#include #include +#include #include "help.h" #include "util.h" -#include "utf8.h" std::string js_to_string(JSContext* cx, JS::HandleValue val) { + JS::AutoSaveExceptionState exc_state(cx); JS::RootedString sval(cx); sval = val.toString(); JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, sval)); if(!chars) { JS_ClearPendingException(cx); - fprintf(stderr, "Error converting value to string.\n"); - exit(3); + return std::string(); } return chars.get(); } -std::string -js_to_string(JSContext* cx, JSString *str) +bool +js_to_string(JSContext* cx, JS::HandleValue val, std::string& str) { - JS::UniqueChars chars(JS_EncodeString(cx, str)); - if(!chars) { - JS_ClearPendingException(cx); - fprintf(stderr, "Error converting to string.\n"); - exit(3); + if(!val.isString()) { + return false; } - return chars.get(); + if(JS_GetStringLength(val.toString()) == 0) { + str = ""; + return true; + } + + std::string conv = js_to_string(cx, val); + if(!conv.size()) { + return false; + } + + str = conv; + return true; } JSString* -string_to_js(JSContext* cx, const std::string& s) +string_to_js(JSContext* cx, const std::string& raw) { - JSString* ret = JS_NewStringCopyN(cx, s.c_str(), s.size()); - if(ret != nullptr) { - return ret; + JS::UTF8Chars utf8(raw.c_str(), raw.size()); + JS::UniqueTwoByteChars utf16; + size_t len; + + utf16.reset(JS::UTF8CharsToNewTwoByteCharsZ(cx, utf8, &len).get()); + if(!utf16) { + return nullptr; + } + + JSString* ret = JS_NewUCString(cx, utf16.get(), len); + + if(ret) { + // JS_NewUCString took ownership on success. We shift + // the resulting pointer into Unused to silence the + // compiler warning. + mozilla::Unused << utf16.release(); } - fprintf(stderr, "Unable to allocate string object.\n"); - exit(3); + return ret; } size_t @@ -84,21 +107,21 @@ couch_readfile(const char* file, char** outbuf_p) while((nread = fread(fbuf, 1, 16384, fp)) > 0) { if(buf == NULL) { - buf = (char*) malloc(nread + 1); + buf = new char[nread + 1]; if(buf == NULL) { fprintf(stderr, "Out of memory.\n"); exit(3); } memcpy(buf, fbuf, nread); } else { - tmp = (char*) malloc(buflen + nread + 1); + tmp = new char[buflen + nread + 1]; if(tmp == NULL) { fprintf(stderr, "Out of memory.\n"); exit(3); } memcpy(tmp, buf, buflen); memcpy(tmp+buflen, fbuf, nread); - free(buf); + delete buf; buf = tmp; } buflen += nread; @@ -114,12 +137,17 @@ couch_parse_args(int argc, const char* argv[]) couch_args* args; int i = 1; - args = (couch_args*) malloc(sizeof(couch_args)); + args = new couch_args(); if(args == NULL) return NULL; - memset(args, '\0', sizeof(couch_args)); + args->eval = 0; + args->use_http = 0; + args->use_test_funs = 0; args->stack_size = 64L * 1024L * 1024L; + args->scripts = nullptr; + args->uri_file = nullptr; + args->uri = nullptr; while(i < argc) { if(strcmp("-h", argv[i]) == 0) { @@ -193,7 +221,7 @@ couch_readline(JSContext* cx, FILE* fp) size_t oldbyteslen = 256; size_t readlen = 0; - bytes = (char *)JS_malloc(cx, byteslen); + bytes = static_cast(JS_malloc(cx, byteslen)); if(bytes == NULL) return NULL; while((readlen = couch_fgets(bytes+used, byteslen-used, fp)) > 0) { @@ -207,7 +235,7 @@ couch_readline(JSContext* cx, FILE* fp) // Double our buffer and read more. oldbyteslen = byteslen; byteslen *= 2; - tmp = (char *)JS_realloc(cx, bytes, oldbyteslen, byteslen); + tmp = static_cast(JS_realloc(cx, bytes, oldbyteslen, byteslen)); if(!tmp) { JS_free(cx, bytes); return NULL; @@ -222,8 +250,8 @@ couch_readline(JSContext* cx, FILE* fp) return JS_NewStringCopyZ(cx, nullptr); } - // Shring the buffer to the actual data size - tmp = (char *)JS_realloc(cx, bytes, byteslen, used); + // Shrink the buffer to the actual data size + tmp = static_cast(JS_realloc(cx, bytes, byteslen, used)); if(!tmp) { JS_free(cx, bytes); return NULL; @@ -238,22 +266,16 @@ couch_readline(JSContext* cx, FILE* fp) void -couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) +couch_print(JSContext* cx, JS::HandleValue obj, bool use_stderr) { - uint8_t* bytes = nullptr; - FILE *stream = stdout; + FILE* stream = stdout; - if (argc) { - if (argc > 1 && argv[1].isTrue()) { - stream = stderr; - } - JSString* str = JS::ToString(cx, argv.get(0)); - bytes = reinterpret_cast(JS_EncodeString(cx, str)); - fprintf(stream, "%s", bytes); - JS_free(cx, bytes); + if(use_stderr) { + stream = stderr; } - fputc('\n', stream); + std::string val = js_to_string(cx, obj); + fprintf(stream, "%s\n", val.c_str()); fflush(stream); } @@ -261,51 +283,63 @@ couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) void couch_error(JSContext* cx, JSErrorReport* report) { - JS::RootedValue v(cx), stack(cx), replace(cx); - char* bytes; - JSObject* regexp; - - if(!report || !JSREPORT_IS_WARNING(report->flags)) - { - fprintf(stderr, "%s\n", report->message().c_str()); - - // Print a stack trace, if available. - if (JSREPORT_IS_EXCEPTION(report->flags) && - JS_GetPendingException(cx, &v)) - { - // Clear the exception before an JS method calls or the result is - // infinite, recursive error report generation. - JS_ClearPendingException(cx); - - // Use JS regexp to indent the stack trace. - // If the regexp can't be created, don't JS_ReportErrorUTF8 since it is - // probably not productive to wind up here again. - JS::RootedObject vobj(cx, v.toObjectOrNull()); - - if(JS_GetProperty(cx, vobj, "stack", &stack) && - (regexp = JS_NewRegExpObject( - cx, "^(?=.)", 6, JSREG_GLOB | JSREG_MULTILINE))) - { - // Set up the arguments to ``String.replace()`` - JS::AutoValueVector re_args(cx); - JS::RootedValue arg0(cx, JS::ObjectValue(*regexp)); - auto arg1 = JS::StringValue(string_to_js(cx, "\t")); - - if (re_args.append(arg0) && re_args.append(arg1)) { - // Perform the replacement - JS::RootedObject sobj(cx, stack.toObjectOrNull()); - if(JS_GetProperty(cx, sobj, "replace", &replace) && - JS_CallFunctionValue(cx, sobj, replace, re_args, &v)) - { - // Print the result - bytes = enc_string(cx, v, NULL); - fprintf(stderr, "Stacktrace:\n%s", bytes); - JS_free(cx, bytes); - } - } - } + if(!report) { + return; + } + + if(JSREPORT_IS_WARNING(report->flags)) { + return; + } + + std::ostringstream msg; + msg << "error: " << report->message().c_str(); + + mozilla::Maybe ac; + JS::RootedValue exc(cx); + JS::RootedObject exc_obj(cx); + JS::RootedObject stack_obj(cx); + JS::RootedString stack_str(cx); + JS::RootedValue stack_val(cx); + + if(!JS_GetPendingException(cx, &exc)) { + goto done; + } + + // Clear the exception before an JS method calls or the result is + // infinite, recursive error report generation. + JS_ClearPendingException(cx); + + exc_obj.set(exc.toObjectOrNull()); + stack_obj.set(JS::ExceptionStackOrNull(exc_obj)); + + if(!stack_obj) { + // Compilation errors don't have a stack + + msg << " at "; + + if(report->filename) { + msg << report->filename; + } else { + msg << ""; + } + + if(report->lineno) { + msg << ':' << report->lineno << ':' << report->column; } + + goto done; + } + + if(!JS::BuildStackString(cx, stack_obj, &stack_str, 2)) { + goto done; } + + stack_val.set(JS::StringValue(stack_str)); + msg << std::endl << std::endl << js_to_string(cx, stack_val).c_str(); + +done: + msg << std::endl; + fprintf(stderr, "%s", msg.str().c_str()); } diff --git a/src/couch/priv/couch_js/60/util.h b/src/couch/priv/couch_js/60/util.h index 407e3e602..4c27f0f66 100644 --- a/src/couch/priv/couch_js/60/util.h +++ b/src/couch/priv/couch_js/60/util.h @@ -26,14 +26,14 @@ typedef struct { } couch_args; std::string js_to_string(JSContext* cx, JS::HandleValue val); -std::string js_to_string(JSContext* cx, JSString *str); +bool js_to_string(JSContext* cx, JS::HandleValue val, std::string& str); JSString* string_to_js(JSContext* cx, const std::string& s); couch_args* couch_parse_args(int argc, const char* argv[]); int couch_fgets(char* buf, int size, FILE* fp); JSString* couch_readline(JSContext* cx, FILE* fp); size_t couch_readfile(const char* file, char** outbuf_p); -void couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv); +void couch_print(JSContext* cx, JS::HandleValue str, bool use_stderr); void couch_error(JSContext* cx, JSErrorReport* report); void couch_oom(JSContext* cx, void* data); bool couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs); diff --git a/src/couch/test/eunit/couch_js_tests.erl b/src/couch/test/eunit/couch_js_tests.erl index cd6452cf9..c2c62463b 100644 --- a/src/couch/test/eunit/couch_js_tests.erl +++ b/src/couch/test/eunit/couch_js_tests.erl @@ -14,17 +14,6 @@ -include_lib("eunit/include/eunit.hrl"). --define(FUNC, << - "var state = [];\n" - "function(doc) {\n" - " var val = \"0123456789ABCDEF\";\n" - " for(var i = 0; i < 165535; i++) {\n" - " state.push([val, val]);\n" - " }\n" - "}\n" ->>). - - couch_js_test_() -> { "Test couchjs", @@ -33,15 +22,142 @@ couch_js_test_() -> fun test_util:start_couch/0, fun test_util:stop_couch/1, [ + fun should_create_sandbox/0, + fun should_roundtrip_utf8/0, + fun should_roundtrip_modified_utf8/0, + fun should_replace_broken_utf16/0, + fun should_allow_js_string_mutations/0, {timeout, 60000, fun should_exit_on_oom/0} ] } }. +should_create_sandbox() -> + % Try and detect whether we can see out of the + % sandbox or not. + Src = << + "function(doc) {\n" + " try {\n" + " emit(false, typeof(Couch.compile_function));\n" + " } catch (e) {\n" + " emit(true, e.message);\n" + " }\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, <<"{}">>]), + ?assertEqual([[[true, <<"Couch is not defined">>]]], Result). + + +should_roundtrip_utf8() -> + % Try round tripping UTF-8 both directions through + % couchjs. These tests use hex encoded values of + % Ä (C384) and Ü (C39C) so as to avoid odd editor/Erlang encoding + % strangeness. + Src = << + "function(doc) {\n" + " emit(doc.value, \"", 16#C3, 16#9C, "\");\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Doc = {[ + {<<"value">>, <<16#C3, 16#84>>} + ]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + ?assertEqual([[[<<16#C3, 16#84>>, <<16#C3, 16#9C>>]]], Result). + + +should_roundtrip_modified_utf8() -> + % Mimicing the test case from the mailing list + Src = << + "function(doc) {\n" + " emit(doc.value.toLowerCase(), \"", 16#C3, 16#9C, "\");\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Doc = {[ + {<<"value">>, <<16#C3, 16#84>>} + ]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + ?assertEqual([[[<<16#C3, 16#A4>>, <<16#C3, 16#9C>>]]], Result). + + +should_replace_broken_utf16() -> + % This test reverse the surrogate pair of + % the Boom emoji U+1F4A5 + Src = << + "function(doc) {\n" + " emit(doc.value.split(\"\").reverse().join(\"\"), 1);\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Doc = {[ + {<<"value">>, list_to_binary(xmerl_ucs:to_utf8([16#1F4A5]))} + ]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + % Invalid UTF-8 gets replaced with the 16#FFFD replacement + % marker + Markers = list_to_binary(xmerl_ucs:to_utf8([16#FFFD, 16#FFFD])), + ?assertEqual([[[Markers, 1]]], Result). + + +should_allow_js_string_mutations() -> + % This binary corresponds to this string: мама мыла раму + % Which I'm told translates to: "mom was washing the frame" + MomWashedTheFrame = << + 16#D0, 16#BC, 16#D0, 16#B0, 16#D0, 16#BC, 16#D0, 16#B0, 16#20, + 16#D0, 16#BC, 16#D1, 16#8B, 16#D0, 16#BB, 16#D0, 16#B0, 16#20, + 16#D1, 16#80, 16#D0, 16#B0, 16#D0, 16#BC, 16#D1, 16#83 + >>, + Mom = <<16#D0, 16#BC, 16#D0, 16#B0, 16#D0, 16#BC, 16#D0, 16#B0>>, + Washed = <<16#D0, 16#BC, 16#D1, 16#8B, 16#D0, 16#BB, 16#D0, 16#B0>>, + Src1 = << + "function(doc) {\n" + " emit(\"length\", doc.value.length);\n" + "}\n" + >>, + Src2 = << + "function(doc) {\n" + " emit(\"substring\", doc.value.substring(5, 9));\n" + "}\n" + >>, + Src3 = << + "function(doc) {\n" + " emit(\"slice\", doc.value.slice(0, 4));\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src1]), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src2]), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src3]), + Doc = {[{<<"value">>, MomWashedTheFrame}]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + io:format(standard_error, "~w~n~w~n", [MomWashedTheFrame, Result]), + Expect = [ + [[<<"length">>, 14]], + [[<<"substring">>, Washed]], + [[<<"slice">>, Mom]] + ], + ?assertEqual(Expect, Result). + + should_exit_on_oom() -> + Src = << + "var state = [];\n" + "function(doc) {\n" + " var val = \"0123456789ABCDEF\";\n" + " for(var i = 0; i < 165535; i++) {\n" + " state.push([val, val]);\n" + " }\n" + "}\n" + >>, Proc = couch_query_servers:get_os_process(<<"javascript">>), - true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, ?FUNC]), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), trigger_oom(Proc). trigger_oom(Proc) -> -- cgit v1.2.1 From 45e0c30368cf071dc3de0df32efba95dde5abcd8 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 20 Apr 2020 13:47:39 -0700 Subject: Fix typo in error message --- src/chttpd/src/chttpd_httpd_handlers.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 79ec3db8e..d50115917 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -514,5 +514,5 @@ not_supported(#httpd{} = Req, _Db) -> not_implemented(#httpd{} = Req, _Db) -> - Msg = <<"resouce is not implemented">>, + Msg = <<"resource is not implemented">>, chttpd:send_error(Req, 501, not_implemented, Msg). -- cgit v1.2.1 From 975110db2aa68581fa00e8f55eb5ff63c32ed17e Mon Sep 17 00:00:00 2001 From: Ronny Date: Tue, 21 Apr 2020 19:31:29 +0200 Subject: Update README.rst (#2537) Update the description of the behavior of the script ./dev/run. Co-authored-by: Joan Touzet --- README.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index aaf4e17d3..c527913b5 100644 --- a/README.rst +++ b/README.rst @@ -74,9 +74,11 @@ layer in front of this cluster by running ``./dev/run --with-haproxy listening on port 5984. For Fauxton developers fixing the admin-party does not work via the button in -Fauxton. To fix the admin party you have to run ``./dev/run`` with the ``admin`` -flag, e.g. ``./dev/run --admin=username:password``. If you want to have an -admin-party, just omit the flag. +Fauxton. If you run ``./dev/run``, an admin user ``root`` with a random password +is generated (see the output of the script). If you want to set an admin user, +start with the admin flag, e.g. ``./dev/run --admin=username:password``. If you +want to have an admin-party, run ``./dev/run --with-admin-party-please``. To see +all available options, please check ``./dev/run --help``. Contributing to CouchDB ----------------------- -- cgit v1.2.1 From 5748ef39ff18325369ce2c81121933c115fb0c0e Mon Sep 17 00:00:00 2001 From: Will Holley Date: Tue, 21 Apr 2020 19:09:18 +0100 Subject: Bump fauxton to v1.2.3 (#2515) Co-authored-by: Joan Touzet --- rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config.script b/rebar.config.script index 408ad3d48..bfca5c84e 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -153,7 +153,7 @@ DepDescs = [ {docs, {url, "https://github.com/apache/couchdb-documentation"}, {tag, "3.0.0"}, [raw]}, {fauxton, {url, "https://github.com/apache/couchdb-fauxton"}, - {tag, "v1.2.2"}, [raw]}, + {tag, "v1.2.3"}, [raw]}, %% Third party deps {folsom, "folsom", {tag, "CouchDB-0.8.3"}}, {hyper, "hyper", {tag, "CouchDB-2.2.0-6"}}, -- cgit v1.2.1 From a8413bce72fb03f9a56d251fcb2c9198953d4bbf Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Tue, 21 Apr 2020 14:44:23 -0700 Subject: fix operator issue with empty arrays (#2805) Previously, in https://github.com/apache/couchdb/pull/1783, the logic was wrong in relation to how certain operators interacted with empty arrays. We modify this logic to make it such that: {"foo":"bar", "bar":{"$in":[]}} and {"foo":"bar", "bar":{"$all":[]}} should return 0 results. --- src/mango/src/mango_selector.erl | 4 ++-- src/mango/test/21-empty-selector-tests.py | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 3ea83c220..e884dc55c 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -421,7 +421,7 @@ match({[{<<"$not">>, Arg}]}, Value, Cmp) -> not match(Arg, Value, Cmp); match({[{<<"$all">>, []}]}, _, _) -> - true; + false; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. @@ -506,7 +506,7 @@ match({[{<<"$gt">>, Arg}]}, Value, Cmp) -> Cmp(Value, Arg) > 0; match({[{<<"$in">>, []}]}, _, _) -> - true; + false; match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values)-> Pred = fun(Arg) -> lists:foldl(fun(Value,Match) -> diff --git a/src/mango/test/21-empty-selector-tests.py b/src/mango/test/21-empty-selector-tests.py index beb222c85..31ad8e645 100644 --- a/src/mango/test/21-empty-selector-tests.py +++ b/src/mango/test/21-empty-selector-tests.py @@ -35,14 +35,36 @@ def make_empty_selector_suite(klass): docs = self.db.find({"age": 22, "$or": []}) assert len(docs) == 1 + def test_empty_array_in_with_age(self): + resp = self.db.find({"age": 22, "company": {"$in": []}}, explain=True) + self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) + docs = self.db.find({"age": 22, "company": {"$in": []}}) + assert len(docs) == 0 + def test_empty_array_and_with_age(self): resp = self.db.find( - {"age": 22, "$and": [{"b": {"$all": []}}]}, explain=True + {"age": 22, "$and": []}, explain=True ) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) docs = self.db.find({"age": 22, "$and": []}) assert len(docs) == 1 + def test_empty_array_all_age(self): + resp = self.db.find( + {"age": 22, "company": {"$all": []}}, explain=True + ) + self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) + docs = self.db.find({"age": 22, "company": {"$all": []}}) + assert len(docs) == 0 + + def test_empty_array_nested_all_with_age(self): + resp = self.db.find( + {"age": 22, "$and": [{"company": {"$all": []}}]}, explain=True + ) + self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) + docs = self.db.find( {"age": 22, "$and": [{"company": {"$all": []}}]}) + assert len(docs) == 0 + def test_empty_arrays_complex(self): resp = self.db.find({"$or": [], "a": {"$in": []}}, explain=True) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) -- cgit v1.2.1 From 440ab2641e2d409c2b4bf4fa8f1d8ee792d5143f Mon Sep 17 00:00:00 2001 From: Simon Klassen <6997477+sklassen@users.noreply.github.com> Date: Thu, 23 Apr 2020 04:14:25 +0800 Subject: Replace VM_ARGS with ARGS_FILE which is set as it is in couchdb script /etc/vm.args; also parses name from config. (#2738) Co-authored-by: sklassen Co-authored-by: Joan Touzet --- rel/overlay/bin/remsh | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/rel/overlay/bin/remsh b/rel/overlay/bin/remsh index c5e932a8d..d1fcdd95f 100755 --- a/rel/overlay/bin/remsh +++ b/rel/overlay/bin/remsh @@ -32,17 +32,28 @@ BINDIR=$ROOTDIR/erts-$ERTS_VSN/bin PROGNAME=${0##*/} VERBOSE="" -NODE="couchdb@127.0.0.1" +DEFAULT_NODE="couchdb@127.0.0.1" LHOST=127.0.0.1 -VM_ARGS=$COUCHDB_BIN_DIR/../etc/vm.args + +ARGS_FILE="${COUCHDB_ARGS_FILE:-$ROOTDIR/etc/vm.args}" + +# If present, extract cookie from ERL_FLAGS +# This is used by the CouchDB Dockerfile and Helm chart +NODE=$(echo "$ERL_FLAGS" | sed 's/^.*name \([^ ][^ ]*\).*$/\1/g') +if test -f "$ARGS_FILE"; then +# else attempt to extract from vm.args + ARGS_FILE_COOKIE=$(awk '$1=="-name"{print $2}' "$ARGS_FILE") + NODE="${NODE:-$ARGS_FILE_COOKIE}" +fi +NODE="${NODE:-$DEFAULT_NODE}" # If present, extract cookie from ERL_FLAGS # This is used by the CouchDB Dockerfile and Helm chart COOKIE=$(echo "$ERL_FLAGS" | sed 's/^.*setcookie \([^ ][^ ]*\).*$/\1/g') -if test -f "$VM_ARGS"; then +if test -f "$ARGS_FILE"; then # else attempt to extract from vm.args - VM_ARGS_COOKIE=$(awk '$1=="-setcookie"{print $2}' "$VM_ARGS") - COOKIE="${COOKIE:-$VM_ARGS_COOKIE}" + ARGS_FILE_COOKIE=$(awk '$1=="-setcookie"{print $2}' "$ARGS_FILE") + COOKIE="${COOKIE:-$ARGS_FILE_COOKIE}" fi COOKIE="${COOKIE:-monster}" -- cgit v1.2.1 From f3d596544c759568553d298bbb729a522df6d6bd Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Thu, 23 Apr 2020 00:14:48 +0200 Subject: fix: use correct logging module name, fixes #2797 (#2798) Co-authored-by: Joan Touzet --- src/setup/src/setup.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/setup/src/setup.erl b/src/setup/src/setup.erl index 3d23229b8..4867f6096 100644 --- a/src/setup/src/setup.erl +++ b/src/setup/src/setup.erl @@ -262,7 +262,7 @@ sync_config(Section, Key, Value) -> ok -> ok; error -> - log:error("~p sync_admin results ~p errors ~p", + couch_log:error("~p sync_admin results ~p errors ~p", [?MODULE, Results, Errors]), Reason = "Cluster setup unable to sync admin passwords", throw({setup_error, Reason}) -- cgit v1.2.1 From 38952237def3d6f6390ed5a0f201d580ca27db14 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 23 Apr 2020 12:10:07 +0200 Subject: Add after_interactive_write plugin to couch_views_updater --- src/couch_views/src/couch_views_epi.erl | 4 ++- src/couch_views/src/couch_views_plugin.erl | 40 +++++++++++++++++++++++++++++ src/couch_views/src/couch_views_updater.erl | 5 +++- 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 src/couch_views/src/couch_views_plugin.erl diff --git a/src/couch_views/src/couch_views_epi.erl b/src/couch_views/src/couch_views_epi.erl index 6d39d9a5e..127b09f13 100644 --- a/src/couch_views/src/couch_views_epi.erl +++ b/src/couch_views/src/couch_views_epi.erl @@ -39,7 +39,9 @@ providers() -> services() -> - []. + [ + {couch_views, couch_views_plugin} + ]. data_subscriptions() -> diff --git a/src/couch_views/src/couch_views_plugin.erl b/src/couch_views/src/couch_views_plugin.erl new file mode 100644 index 000000000..f8169179a --- /dev/null +++ b/src/couch_views/src/couch_views_plugin.erl @@ -0,0 +1,40 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_plugin). + + +-export([ + after_interactive_write/4 +]). + + +-define(SERVICE_ID, couch_views). + + +after_interactive_write(Db, Mrst, Result, DocNumber) -> + with_pipe(after_interactive_write, [Db, Mrst, Result, DocNumber]), + ok. + + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +with_pipe(Func, Args) -> + do_apply(Func, Args, [pipe]). + + +do_apply(Func, Args, Opts) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + couch_epi:apply(Handle, ?SERVICE_ID, Func, Args, Opts). \ No newline at end of file diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl index f405123fa..30dfac326 100644 --- a/src/couch_views/src/couch_views_updater.erl +++ b/src/couch_views/src/couch_views_updater.erl @@ -86,7 +86,10 @@ write_doc(Db, #doc{deleted = Deleted} = Doc) -> case should_index_doc(Doc, Mrst) of true -> {Mrst1, Result1} = couch_views_indexer:map_docs(Mrst, Result0), - couch_views_indexer:write_docs(Db, Mrst1, Result1, State), + DocNumber = couch_views_indexer:write_docs(Db, Mrst1, + Result1, State), + couch_views_plugin:after_interactive_write(Db, Mrst1, + Result1, DocNumber), couch_eval:release_map_context(Mrst1#mrst.qserver); false -> ok -- cgit v1.2.1 From f522b880b68109feb5960d6d8244ac034990059e Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 22 Apr 2020 15:17:20 +0200 Subject: Refactor fetching rev code in fabric2_fdb Use a common `get_revs_future` and `get_revs_wait` for fetching winning revs and all revs. --- src/couch_views/src/couch_views_indexer.erl | 2 +- src/fabric/src/fabric2_db.erl | 2 +- src/fabric/src/fabric2_fdb.erl | 36 ++++++++++++++--------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index ab5aaade2..bd1bd4de6 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -384,7 +384,7 @@ fetch_docs(Db, Changes) -> RevFutures = maps:keys(RevState), BodyState = lists:foldl(fun(RevFuture, Acc) -> {Id, Change} = maps:get(RevFuture, RevState), - Revs = fabric2_fdb:get_winning_revs_wait(Db, RevFuture), + Revs = fabric2_fdb:get_revs_wait(Db, RevFuture), % I'm assuming that in this changes transaction that the winning % doc body exists since it is listed in the changes feed as not deleted diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 6e629f7b4..15694cdde 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1534,7 +1534,7 @@ update_doc_interactive(Db, Doc0, Options) -> update_doc_interactive(Db, Doc0, Future, _Options) -> - RevInfos = fabric2_fdb:get_winning_revs_wait(Db, Future), + RevInfos = fabric2_fdb:get_revs_wait(Db, Future), {Winner, SecondPlace} = case RevInfos of [] -> {not_found, not_found}; [WRI] -> {WRI, not_found}; diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 03f3bad82..b1ada52fc 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -42,9 +42,10 @@ incr_stat/4, get_all_revs/2, + get_all_revs_future/2, get_winning_revs/3, get_winning_revs_future/3, - get_winning_revs_wait/2, + get_revs_wait/2, get_non_deleted_rev/3, get_doc_body/3, @@ -602,46 +603,45 @@ incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) -> get_all_revs(#{} = Db, DocId) -> DbName = maps:get(name, Db, undefined), with_span('db.get_all_revs', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = ensure_current(Db), - - Prefix = erlfdb_tuple:pack({?DB_REVS, DocId}, DbPrefix), - Options = [{streaming_mode, want_all}], - Future = erlfdb:get_range_startswith(Tx, Prefix, Options), - lists:map(fun({K, V}) -> - Key = erlfdb_tuple:unpack(K, DbPrefix), - Val = erlfdb_tuple:unpack(V), - fdb_to_revinfo(Key, Val) - end, erlfdb:wait(Future)) + Future = get_all_revs_future(Db, DocId), + get_revs_wait(Db, Future) end). +get_all_revs_future(#{} = Db, DocId) -> + Options = [{streaming_mode, want_all}], + get_revs_future(Db, DocId, Options). + + get_winning_revs(Db, DocId, NumRevs) -> DbName = maps:get(name, Db, undefined), with_span('db.get_winning_revs', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> Future = get_winning_revs_future(Db, DocId, NumRevs), - get_winning_revs_wait(Db, Future) + get_revs_wait(Db, Future) end). get_winning_revs_future(#{} = Db, DocId, NumRevs) -> + Options = [{reverse, true}, {limit, NumRevs}], + get_revs_future(Db, DocId, Options). + + +get_revs_future(#{} = Db, DocId, Options) -> #{ tx := Tx, db_prefix := DbPrefix } = ensure_current(Db), {StartKey, EndKey} = erlfdb_tuple:range({?DB_REVS, DocId}, DbPrefix), - Options = [{reverse, true}, {limit, NumRevs}], erlfdb:fold_range_future(Tx, StartKey, EndKey, Options). -get_winning_revs_wait(#{} = Db, RangeFuture) -> +get_revs_wait(#{} = Db, RangeFuture) -> #{ tx := Tx, db_prefix := DbPrefix } = ensure_current(Db), + RevRows = erlfdb:fold_range_wait(Tx, RangeFuture, fun({K, V}, Acc) -> Key = erlfdb_tuple:unpack(K, DbPrefix), Val = erlfdb_tuple:unpack(V), @@ -1185,7 +1185,7 @@ load_validate_doc_funs(#{} = Db) -> id := DDocId, rev_info := RevInfoFuture } = Info, - [RevInfo] = get_winning_revs_wait(Db, RevInfoFuture), + [RevInfo] = get_revs_wait(Db, RevInfoFuture), #{deleted := Deleted} = RevInfo, if Deleted -> []; true -> [Info#{ -- cgit v1.2.1 From 5efcbfc3ca114696273f56b1c98876c95e63bda7 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 6 Apr 2020 12:04:56 +0200 Subject: Add fold_docs for DocId list Adds a fold_docs function that will do a parallel fetch for the supplied Doc Ids. This is used for _all_docs?keys=["id1", "id2"]. This uses a queue for fetching the revs and another queue for fetching the doc bodies. These queues will be drained if the future queue gets to large. --- src/chttpd/src/chttpd_db.erl | 14 ++-- src/fabric/src/fabric2_db.erl | 142 +++++++++++++++++++++++++++++++++++++ test/elixir/test/all_docs_test.exs | 30 +++++++- 3 files changed, 177 insertions(+), 9 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 8cfcfecaa..078009590 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -902,9 +902,10 @@ send_all_docs_keys(Db, #mrargs{} = Args, VAcc0) -> _ -> Args#mrargs.doc_options end, IncludeDocs = Args#mrargs.include_docs, - lists:foldl(fun(DocId, Acc) -> - OpenOpts = [deleted | DocOpts], - Row0 = case fabric2_db:open_doc(Db, DocId, OpenOpts) of + OpenOpts = [deleted | DocOpts], + + CB = fun(DocId, Doc, Acc) -> + Row0 = case Doc of {not_found, missing} -> #view_row{key = DocId}; {ok, #doc{deleted = true, revs = Revs}} -> @@ -938,9 +939,10 @@ send_all_docs_keys(Db, #mrargs{} = Args, VAcc0) -> } end, Row1 = fabric_view:transform_row(Row0), - {ok, NewAcc} = view_cb(Row1, Acc), - NewAcc - end, VAcc1, Keys). + view_cb(Row1, Acc) + end, + {ok, VAcc2} = fabric2_db:fold_docs(Db, Keys, CB, VAcc1, OpenOpts), + VAcc2. apply_args_to_keylist(Args, Keys0) -> diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 15694cdde..740f9abf6 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -110,6 +110,7 @@ fold_docs/3, fold_docs/4, + fold_docs/5, fold_design_docs/4, fold_local_docs/4, fold_changes/4, @@ -969,6 +970,61 @@ fold_docs(Db, UserFun, UserAcc0, Options) -> end). +fold_docs(Db, DocIds, UserFun, UserAcc0, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + NeedsTreeOpts = [revs_info, conflicts, deleted_conflicts], + NeedsTree = (Options -- NeedsTreeOpts /= Options), + + FetchRevs = case NeedsTree of + true -> + fun(DocId) -> + fabric2_fdb:get_all_revs_future(TxDb, DocId) + end; + false -> + fun(DocId) -> + fabric2_fdb:get_winning_revs_future(TxDb, DocId, 1) + end + end, + InitAcc = #{ + revs_q => queue:new(), + revs_count => 0, + body_q => queue:new(), + body_count => 0, + doc_opts => Options, + user_acc => UserAcc0, + user_fun => UserFun + }, + + FinalAcc1 = lists:foldl(fun(DocId, Acc) -> + #{ + revs_q := RevsQ, + revs_count := RevsCount + } = Acc, + Future = FetchRevs(DocId), + NewAcc = Acc#{ + revs_q := queue:in({DocId, Future}, RevsQ), + revs_count := RevsCount + 1 + }, + drain_fold_docs_revs_futures(TxDb, NewAcc) + end, InitAcc, DocIds), + + FinalAcc2 = drain_all_fold_docs_revs_futures(TxDb, FinalAcc1), + FinalAcc3 = drain_all_fold_docs_body_futures(TxDb, FinalAcc2), + + #{ + user_acc := FinalUserAcc + } = FinalAcc3, + {ok, FinalUserAcc} + + catch throw:{stop, StopUserAcc} -> + {ok, StopUserAcc} + end + end). + + + + fold_design_docs(Db, UserFun, UserAcc0, Options1) -> Options2 = set_design_doc_keys(Options1), fold_docs(Db, UserFun, UserAcc0, Options2). @@ -1206,6 +1262,92 @@ drain_all_deleted_info_futures(FutureQ, UserFun, Acc) -> end. +drain_fold_docs_revs_futures(_TxDb, #{revs_count := C} = Acc) when C < 100 -> + Acc; +drain_fold_docs_revs_futures(TxDb, Acc) -> + drain_one_fold_docs_revs_future(TxDb, Acc). + + +drain_all_fold_docs_revs_futures(_TxDb, #{revs_count := C} = Acc) when C =< 0 -> + Acc; +drain_all_fold_docs_revs_futures(TxDb, #{revs_count := C} = Acc) when C > 0 -> + NewAcc = drain_one_fold_docs_revs_future(TxDb, Acc), + drain_all_fold_docs_revs_futures(TxDb, NewAcc). + + +drain_one_fold_docs_revs_future(TxDb, Acc) -> + #{ + revs_q := RevsQ, + revs_count := RevsCount, + body_q := BodyQ, + body_count := BodyCount + } = Acc, + {{value, {DocId, RevsFuture}}, RestRevsQ} = queue:out(RevsQ), + + Revs = fabric2_fdb:get_revs_wait(TxDb, RevsFuture), + DocFuture = case Revs of + [] -> + {DocId, [], not_found}; + [_ | _] -> + Winner = get_rev_winner(Revs), + BodyFuture = fabric2_fdb:get_doc_body_future(TxDb, DocId, Winner), + {DocId, Revs, BodyFuture} + end, + NewAcc = Acc#{ + revs_q := RestRevsQ, + revs_count := RevsCount - 1, + body_q := queue:in(DocFuture, BodyQ), + body_count := BodyCount + 1 + }, + drain_fold_docs_body_futures(TxDb, NewAcc). + + +drain_fold_docs_body_futures(_TxDb, #{body_count := C} = Acc) when C < 100 -> + Acc; +drain_fold_docs_body_futures(TxDb, Acc) -> + drain_one_fold_docs_body_future(TxDb, Acc). + + +drain_all_fold_docs_body_futures(_TxDb, #{body_count := C} = Acc) when C =< 0 -> + Acc; +drain_all_fold_docs_body_futures(TxDb, #{body_count := C} = Acc) when C > 0 -> + NewAcc = drain_one_fold_docs_body_future(TxDb, Acc), + drain_all_fold_docs_body_futures(TxDb, NewAcc). + + +drain_one_fold_docs_body_future(TxDb, Acc) -> + #{ + body_q := BodyQ, + body_count := BodyCount, + doc_opts := DocOpts, + user_fun := UserFun, + user_acc := UserAcc + } = Acc, + {{value, {DocId, Revs, BodyFuture}}, RestBodyQ} = queue:out(BodyQ), + Doc = case BodyFuture of + not_found -> + {not_found, missing}; + _ -> + RevInfo = get_rev_winner(Revs), + Base = fabric2_fdb:get_doc_body_wait(TxDb, DocId, RevInfo, + BodyFuture), + apply_open_doc_opts(Base, Revs, DocOpts) + end, + NewUserAcc = maybe_stop(UserFun(DocId, Doc, UserAcc)), + Acc#{ + body_q := RestBodyQ, + body_count := BodyCount - 1, + user_acc := NewUserAcc + }. + + +get_rev_winner(Revs) -> + [Winner] = lists:filter(fun(Rev) -> + maps:get(winner, Rev) + end, Revs), + Winner. + + new_revid(Db, Doc) -> #doc{ id = DocId, diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index 16641aa95..d41d046b8 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -232,6 +232,26 @@ defmodule AllDocsTest do assert length(Map.get(resp, :body)["rows"]) == 3 end + @tag :with_db + test "POST with missing keys", context do + db_name = context[:db_name] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: create_docs(0..3)}) + assert resp.status_code in [201, 202] + + resp = Couch.post( + "/#{db_name}/_all_docs", + body: %{ + :keys => [1] + } + ) + + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 1 + assert hd(rows) == %{"error" => "not_found", "key" => 1} + end + @tag :with_db test "POST with keys and limit", context do db_name = context[:db_name] @@ -242,13 +262,17 @@ defmodule AllDocsTest do resp = Couch.post( "/#{db_name}/_all_docs", body: %{ - :keys => [1, 2], - :limit => 1 + :keys => ["1", "2"], + :limit => 1, + :include_docs => true } ) assert resp.status_code == 200 - assert length(Map.get(resp, :body)["rows"]) == 1 + rows = resp.body["rows"] + assert length(rows) == 1 + doc = hd(rows)["doc"] + assert doc["string"] == "1" end @tag :with_db -- cgit v1.2.1 From f332f43fca31bd6be57d58a0ae1a24439f57a716 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 23 Apr 2020 20:45:07 +0100 Subject: safer binary_to_term in mango_json_bookmark --- src/mango/src/mango_json_bookmark.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mango/src/mango_json_bookmark.erl b/src/mango/src/mango_json_bookmark.erl index 97f81cfb8..83fd00f29 100644 --- a/src/mango/src/mango_json_bookmark.erl +++ b/src/mango/src/mango_json_bookmark.erl @@ -54,7 +54,7 @@ unpack(nil) -> nil; unpack(Packed) -> try - Bookmark = binary_to_term(couch_util:decodeBase64Url(Packed)), + Bookmark = binary_to_term(couch_util:decodeBase64Url(Packed), [safe]), verify(Bookmark) catch _:_ -> ?MANGO_ERROR({invalid_bookmark, Packed}) -- cgit v1.2.1 From 232e1d51fd95c111dd81a19112de56a59e3f2a74 Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Sat, 25 Apr 2020 22:59:41 -0700 Subject: Report the chttpd_auth authentication db in session info Currently, GET `/_session` reports the `authentication_db` of the obsolete admin port 5986. This updates it to report the actual db used for authentication, provided it is configured. Otherwise, it omits `authentication_db` entirely from session info. --- src/chttpd/test/eunit/chttpd_session_tests.erl | 74 ++++++++++++++++++++++++++ src/chttpd/test/eunit/chttpd_test.hrl | 35 ++++++++++++ src/couch/src/couch_httpd_auth.erl | 3 +- 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 src/chttpd/test/eunit/chttpd_session_tests.erl create mode 100644 src/chttpd/test/eunit/chttpd_test.hrl diff --git a/src/chttpd/test/eunit/chttpd_session_tests.erl b/src/chttpd/test/eunit/chttpd_session_tests.erl new file mode 100644 index 000000000..a802d9ec2 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_session_tests.erl @@ -0,0 +1,74 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_session_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include("chttpd_test.hrl"). + +-define(USER, "chttpd_test_admin"). +-define(PASS, "pass"). + + +setup() -> + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, binary_to_list(Hashed), _Persist=false), + root_url() ++ "/_session". + + +cleanup(_) -> + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), + ok = config:delete("admins", ?USER, _Persist=false). + + +session_test_() -> + { + "Session tests", + { + setup, + fun() -> test_util:start_couch([fabric, chttpd]) end, + fun test_util:stop_couch/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(session_authentication_db_absent), + ?TDEF_FE(session_authentication_db_present) + ] + } + } + }. + + +session_authentication_db_absent(Url) -> + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), + ?assertThrow({not_found, _}, session_authentication_db(Url)). + + +session_authentication_db_present(Url) -> + Name = "_users", + ok = config:set("chttpd_auth", "authentication_db", Name, false), + ?assertEqual(list_to_binary(Name), session_authentication_db(Url)). + + +session_authentication_db(Url) -> + {ok, 200, _, Body} = test_request:get(Url, [{basic_auth, {?USER, ?PASS}}]), + couch_util:get_nested_json_value( + jiffy:decode(Body), [<<"info">>, <<"authentication_db">>]). + + +root_url() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + lists:concat(["http://", Addr, ":", Port]). diff --git a/src/chttpd/test/eunit/chttpd_test.hrl b/src/chttpd/test/eunit/chttpd_test.hrl new file mode 100644 index 000000000..6db97ec2b --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_test.hrl @@ -0,0 +1,35 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +% Borrowed from fabric2_test.hrl + +% Some test modules do not use with, so squash the unused fun compiler warning +-compile([{nowarn_unused_function, [{with, 1}]}]). + + +-define(TDEF(Name), {atom_to_list(Name), fun Name/1}). +-define(TDEF(Name, Timeout), {atom_to_list(Name), Timeout, fun Name/1}). + +-define(TDEF_FE(Name), fun(Arg) -> {atom_to_list(Name), ?_test(Name(Arg))} end). +-define(TDEF_FE(Name, Timeout), fun(Arg) -> {atom_to_list(Name), {timeout, Timeout, ?_test(Name(Arg))}} end). + + +with(Tests) -> + fun(ArgsTuple) -> + lists:map(fun + ({Name, Fun}) -> + {Name, ?_test(Fun(ArgsTuple))}; + ({Name, Timeout, Fun}) -> + {Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} + end, Tests) + end. diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 5e4450301..ed1481e2b 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -355,11 +355,12 @@ handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req, _AuthModule) -> {roles, UserCtx#user_ctx.roles} ]}}, {info, {[ - {authentication_db, ?l2b(config:get("couch_httpd_auth", "authentication_db"))}, {authentication_handlers, [ N || {N, _Fun} <- Req#httpd.authentication_handlers]} ] ++ maybe_value(authenticated, UserCtx#user_ctx.handler, fun(Handler) -> Handler + end) ++ maybe_value(authentication_db, config:get("chttpd_auth", "authentication_db"), fun(Val) -> + ?l2b(Val) end)}} ]}) end; -- cgit v1.2.1 From a5fded8c5d47bcc14699429a46dfb0cec6bd9e76 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 8 Apr 2020 16:40:26 +0100 Subject: Add native encryption support A new application, aegis, is introduced to provide strong at-rest protection of CouchDB data (where possible). Currently we encrypt the following values (if enabled): 1. Document content 2. Attachment content 3. Index values Things not encrypted: 1. _all_docs 2. _changes 3. doc id 4. doc rev 5. Index keys 6. All other metadata Co-Authored-By: Eric Avdey Co-Authored-By: Robert Samuel Newson --- configure | 19 +++ rebar.config.script | 1 + rel/reltool.config | 2 + src/aegis/rebar.config.script | 35 ++++ src/aegis/src/aegis.app.src | 34 ++++ src/aegis/src/aegis.erl | 72 ++++++++ src/aegis/src/aegis.hrl | 57 +++++++ src/aegis/src/aegis_app.erl | 26 +++ src/aegis/src/aegis_key_manager.erl | 22 +++ src/aegis/src/aegis_keywrap.erl | 97 +++++++++++ src/aegis/src/aegis_noop_key_manager.erl | 31 ++++ src/aegis/src/aegis_server.erl | 275 +++++++++++++++++++++++++++++++ src/aegis/src/aegis_sup.erl | 46 ++++++ src/aegis/test/aegis_basic_test.erl | 17 ++ src/aegis/test/aegis_server_test.erl | 165 +++++++++++++++++++ src/chttpd/src/chttpd.erl | 8 + src/couch/src/couch_keywrap.erl | 103 ------------ src/couch_views/src/couch_views_fdb.erl | 8 +- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric.app.src | 3 +- src/fabric/src/fabric2_fdb.erl | 31 ++-- 21 files changed, 932 insertions(+), 121 deletions(-) create mode 100644 src/aegis/rebar.config.script create mode 100644 src/aegis/src/aegis.app.src create mode 100644 src/aegis/src/aegis.erl create mode 100644 src/aegis/src/aegis.hrl create mode 100644 src/aegis/src/aegis_app.erl create mode 100644 src/aegis/src/aegis_key_manager.erl create mode 100644 src/aegis/src/aegis_keywrap.erl create mode 100644 src/aegis/src/aegis_noop_key_manager.erl create mode 100644 src/aegis/src/aegis_server.erl create mode 100644 src/aegis/src/aegis_sup.erl create mode 100644 src/aegis/test/aegis_basic_test.erl create mode 100644 src/aegis/test/aegis_server_test.erl delete mode 100644 src/couch/src/couch_keywrap.erl diff --git a/configure b/configure index 854366c8a..b91b18da7 100755 --- a/configure +++ b/configure @@ -96,6 +96,24 @@ parse_opts() { continue ;; + --key-manager) + if [ -n "$2" ]; then + eval AEGIS_KEY_MANAGER=$2 + shift 2 + continue + else + printf 'ERROR: "--key-manager" requires a non-empty argument.\n' >&2 + exit 1 + fi + ;; + --key-manager=?*) + eval AEGIS_KEY_MANAGER=${1#*=} + ;; + --key-manager=) + printf 'ERROR: "--key-manager" requires a non-empty argument.\n' >&2 + exit 1 + ;; + --dev) WITH_DOCS=0 WITH_FAUXTON=0 @@ -241,6 +259,7 @@ cat > $rootdir/config.erl << EOF {with_curl, $WITH_CURL}. {with_proper, $WITH_PROPER}. {erlang_md5, $ERLANG_MD5}. +{aegis_key_manager, "$AEGIS_KEY_MANAGER"}. {spidermonkey_version, "$SM_VSN"}. EOF diff --git a/rebar.config.script b/rebar.config.script index b3ea2c933..2badaba2d 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -135,6 +135,7 @@ SubDirs = [ "src/ddoc_cache", "src/dreyfus", "src/fabric", + "src/aegis", "src/couch_jobs", "src/couch_expiring_cache", "src/global_changes", diff --git a/rel/reltool.config b/rel/reltool.config index 9fbf28544..b59c95f55 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -27,6 +27,7 @@ syntax_tools, xmerl, %% couchdb + aegis, b64url, bear, chttpd, @@ -90,6 +91,7 @@ {app, xmerl, [{incl_cond, include}]}, %% couchdb + {app, aegis, [{incl_cond, include}]}, {app, b64url, [{incl_cond, include}]}, {app, bear, [{incl_cond, include}]}, {app, chttpd, [{incl_cond, include}]}, diff --git a/src/aegis/rebar.config.script b/src/aegis/rebar.config.script new file mode 100644 index 000000000..ef148bfbe --- /dev/null +++ b/src/aegis/rebar.config.script @@ -0,0 +1,35 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +CouchConfig = case filelib:is_file(os:getenv("COUCHDB_CONFIG")) of + true -> + {ok, Result} = file:consult(os:getenv("COUCHDB_CONFIG")), + Result; + false -> + [] +end. + +AegisKeyManager = case lists:keyfind(aegis_key_manager, 1, CouchConfig) of + {aegis_key_manager, Module} when Module /= "" -> + list_to_atom(Module); + _ -> + aegis_noop_key_manager +end, + +CurrentOpts = case lists:keyfind(erl_opts, 1, CONFIG) of + {erl_opts, Opts} -> Opts; + false -> [] +end, + +AegisOpts = {d, 'AEGIS_KEY_MANAGER', AegisKeyManager}, +lists:keystore(erl_opts, 1, CONFIG, {erl_opts, [AegisOpts | CurrentOpts]}). diff --git a/src/aegis/src/aegis.app.src b/src/aegis/src/aegis.app.src new file mode 100644 index 000000000..deb152674 --- /dev/null +++ b/src/aegis/src/aegis.app.src @@ -0,0 +1,34 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, aegis, + [ + {description, "If it's good enough for Zeus, it's good enough for CouchDB"}, + {vsn, git}, + {mod, {aegis_app, []}}, + {registered, [ + aegis_server + ]}, + {applications, + [kernel, + stdlib, + crypto, + couch_log, + erlfdb + ]}, + {env,[]}, + {modules, []}, + {maintainers, []}, + {licenses, []}, + {links, []} + ] +}. diff --git a/src/aegis/src/aegis.erl b/src/aegis/src/aegis.erl new file mode 100644 index 000000000..e8a0b4bfb --- /dev/null +++ b/src/aegis/src/aegis.erl @@ -0,0 +1,72 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis). +-include_lib("fabric/include/fabric2.hrl"). + + +-define(WRAPPED_KEY, {?DB_AEGIS, 1}). + + +-export([ + init_db/2, + open_db/1, + + decrypt/2, + decrypt/3, + encrypt/3, + wrap_fold_fun/2 +]). + +init_db(#{} = Db, Options) -> + Db#{ + is_encrypted => aegis_server:init_db(Db, Options) + }. + + +open_db(#{} = Db) -> + Db#{ + is_encrypted => aegis_server:open_db(Db) + }. + + +encrypt(#{} = _Db, _Key, <<>>) -> + <<>>; + +encrypt(#{is_encrypted := false}, _Key, Value) when is_binary(Value) -> + Value; + +encrypt(#{is_encrypted := true} = Db, Key, Value) + when is_binary(Key), is_binary(Value) -> + aegis_server:encrypt(Db, Key, Value). + + +decrypt(#{} = Db, Rows) when is_list(Rows) -> + lists:map(fun({Key, Value}) -> + {Key, decrypt(Db, Key, Value)} + end, Rows). + +decrypt(#{} = _Db, _Key, <<>>) -> + <<>>; + +decrypt(#{is_encrypted := false}, _Key, Value) when is_binary(Value) -> + Value; + +decrypt(#{is_encrypted := true} = Db, Key, Value) + when is_binary(Key), is_binary(Value) -> + aegis_server:decrypt(Db, Key, Value). + + +wrap_fold_fun(Db, Fun) when is_function(Fun, 2) -> + fun({Key, Value}, Acc) -> + Fun({Key, decrypt(Db, Key, Value)}, Acc) + end. diff --git a/src/aegis/src/aegis.hrl b/src/aegis/src/aegis.hrl new file mode 100644 index 000000000..2a2a2dcde --- /dev/null +++ b/src/aegis/src/aegis.hrl @@ -0,0 +1,57 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% Assume old crypto api + +-define(sha256_hmac(Key, PlainText), crypto:hmac(sha256, Key, PlainText)). + +-define(aes_gcm_encrypt(Key, IV, AAD, Data), + crypto:block_encrypt(aes_gcm, Key, IV, {AAD, Data, 16})). + +-define(aes_gcm_decrypt(Key, IV, AAD, CipherText, CipherTag), + crypto:block_decrypt(aes_gcm, Key, IV, {AAD, CipherText, CipherTag})). + +-define(aes_ecb_encrypt(Key, Data), + crypto:block_encrypt(aes_ecb, Key, Data)). + +-define(aes_ecb_decrypt(Key, Data), + crypto:block_decrypt(aes_ecb, Key, Data)). + +%% Replace macros if new crypto api is available +-ifdef(OTP_RELEASE). +-if(?OTP_RELEASE >= 22). + +-undef(sha256_hmac). +-define(sha256_hmac(Key, PlainText), crypto:mac(hmac, sha256, Key, PlainText)). + +-undef(aes_gcm_encrypt). +-define(aes_gcm_encrypt(Key, IV, AAD, Data), + crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, Data, AAD, 16, true)). + +-undef(aes_gcm_decrypt). +-define(aes_gcm_decrypt(Key, IV, AAD, CipherText, CipherTag), + crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, CipherText, + AAD, CipherTag, false)). + +-define(key_alg(Key), case bit_size(Key) of + 128 -> aes_128_ecb; 192 -> aes_192_ecb; 256 -> aes_256_ecb end). + +-undef(aes_ecb_encrypt). +-define(aes_ecb_encrypt(Key, Data), + crypto:crypto_one_time(?key_alg(Key), Key, Data, true)). + +-undef(aes_ecb_decrypt). +-define(aes_ecb_decrypt(Key, Data), + crypto:crypto_one_time(?key_alg(Key), Key, Data, false)). + +-endif. +-endif. \ No newline at end of file diff --git a/src/aegis/src/aegis_app.erl b/src/aegis/src/aegis_app.erl new file mode 100644 index 000000000..4a5a11f0c --- /dev/null +++ b/src/aegis/src/aegis_app.erl @@ -0,0 +1,26 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_app). + +-behaviour(application). + + +-export([start/2, stop/1]). + + +start(_StartType, _StartArgs) -> + aegis_sup:start_link(). + + +stop(_State) -> + ok. diff --git a/src/aegis/src/aegis_key_manager.erl b/src/aegis/src/aegis_key_manager.erl new file mode 100644 index 000000000..aa9e3429a --- /dev/null +++ b/src/aegis/src/aegis_key_manager.erl @@ -0,0 +1,22 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_key_manager). + + + +-callback init_db( + Db :: #{}, + DbOptions :: list()) -> {ok, binary()} | false. + + +-callback open_db(Db :: #{}) -> {ok, binary()} | false. diff --git a/src/aegis/src/aegis_keywrap.erl b/src/aegis/src/aegis_keywrap.erl new file mode 100644 index 000000000..58c7668e8 --- /dev/null +++ b/src/aegis/src/aegis_keywrap.erl @@ -0,0 +1,97 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_keywrap). +-include("aegis.hrl"). + +%% Implementation of NIST Special Publication 800-38F +%% For wrapping and unwrapping keys with AES. + +-export([key_wrap/2, key_unwrap/2]). + +-define(ICV1, 16#A6A6A6A6A6A6A6A6). + +-spec key_wrap(WrappingKey :: binary(), KeyToWrap :: binary()) -> binary(). +key_wrap(WrappingKey, KeyToWrap) + when is_binary(WrappingKey), bit_size(KeyToWrap) rem 64 == 0 -> + N = bit_size(KeyToWrap) div 64, + wrap(WrappingKey, <>, KeyToWrap, 1, 6 * N). + +wrap(_WrappingKey, A, R, T, End) when T > End -> + <>; +wrap(WrappingKey, A, R, T, End) -> + <> = R, + <> = ?aes_ecb_encrypt(WrappingKey, <>), + wrap(WrappingKey, <<(MSB_B bxor T):64>>, <>, T + 1, End). + + +-spec key_unwrap(WrappingKey :: binary(), KeyToUnwrap :: binary()) -> binary() | fail. +key_unwrap(WrappingKey, KeyToUnwrap) + when is_binary(WrappingKey), bit_size(KeyToUnwrap) rem 64 == 0 -> + N = (bit_size(KeyToUnwrap) div 64), + <> = KeyToUnwrap, + case unwrap(WrappingKey, <>, R, 6 * (N - 1)) of + <> -> + UnwrappedKey; + _ -> + fail + end. + +unwrap(_WrappingKey, A, R, 0) -> + <>; +unwrap(WrappingKey, <>, R, T) -> + RestSize = bit_size(R) - 64, + <> = R, + <> = ?aes_ecb_decrypt(WrappingKey, <<(A bxor T):64, R2:64>>), + unwrap(WrappingKey, <>, <>, T - 1). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +wrap_test_() -> + [ + %% 128 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F:128>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5:192>>), + %% 192 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D:192>>), + %% 256 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7:192>>), + %% 192 KEK / 192 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, + <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, + <<16#031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2:256>>), + %% 256 KEK / 192 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, + <<16#A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1:256>>), + %% 256 KEK / 256 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:256>>, + <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21:320>>)]. + +test_wrap_unwrap(WrappingKey, KeyToWrap, ExpectedWrappedKey) -> + [?_assertEqual(ExpectedWrappedKey, key_wrap(WrappingKey, KeyToWrap)), + ?_assertEqual(KeyToWrap, key_unwrap(WrappingKey, key_wrap(WrappingKey, KeyToWrap)))]. + +fail_test() -> + KEK = <<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + CipherText = <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD20:320>>, + ?assertEqual(fail, key_unwrap(KEK, CipherText)). + +-endif. diff --git a/src/aegis/src/aegis_noop_key_manager.erl b/src/aegis/src/aegis_noop_key_manager.erl new file mode 100644 index 000000000..2b61f1d29 --- /dev/null +++ b/src/aegis/src/aegis_noop_key_manager.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_noop_key_manager). + + +-behaviour(aegis_key_manager). + + +-export([ + init_db/2, + open_db/1 +]). + + + +init_db(#{} = _Db, _Options) -> + false. + + +open_db(#{} = _Db) -> + false. diff --git a/src/aegis/src/aegis_server.erl b/src/aegis/src/aegis_server.erl new file mode 100644 index 000000000..be8202ced --- /dev/null +++ b/src/aegis/src/aegis_server.erl @@ -0,0 +1,275 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_server). + +-behaviour(gen_server). + +-vsn(1). + + +-include("aegis.hrl"). + + +%% aegis_server API +-export([ + start_link/0, + init_db/2, + open_db/1, + encrypt/3, + decrypt/3 +]). + +%% gen_server callbacks +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + + +-define(KEY_CHECK, aegis_key_check). +-define(INIT_TIMEOUT, 60000). +-define(TIMEOUT, 10000). + + +-record(entry, {uuid, encryption_key}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +-spec init_db(Db :: #{}, Options :: list()) -> boolean(). +init_db(#{uuid := UUID} = Db, Options) -> + process_flag(sensitive, true), + + case ?AEGIS_KEY_MANAGER:init_db(Db, Options) of + {ok, DbKey} -> + gen_server:call(?MODULE, {insert_key, UUID, DbKey}), + true; + false -> + false + end. + + +-spec open_db(Db :: #{}) -> boolean(). +open_db(#{} = Db) -> + process_flag(sensitive, true), + + case do_open_db(Db) of + {ok, _DbKey} -> + true; + false -> + false + end. + + +-spec encrypt(Db :: #{}, Key :: binary(), Value :: binary()) -> binary(). +encrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> + #{ + uuid := UUID + } = Db, + + case ets:member(?KEY_CHECK, UUID) of + true -> + case gen_server:call(?MODULE, {encrypt, Db, Key, Value}) of + CipherText when is_binary(CipherText) -> + CipherText; + {error, {_Tag, {_C_FileName,_LineNumber}, _Desc} = Reason} -> + couch_log:error("aegis encryption failure: ~p ", [Reason]), + erlang:error(decryption_failed); + {error, Reason} -> + erlang:error(Reason) + end; + false -> + process_flag(sensitive, true), + + {ok, DbKey} = do_open_db(Db), + do_encrypt(DbKey, Db, Key, Value) + end. + + +-spec decrypt(Db :: #{}, Key :: binary(), Value :: binary()) -> binary(). +decrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> + #{ + uuid := UUID + } = Db, + + case ets:member(?KEY_CHECK, UUID) of + true -> + case gen_server:call(?MODULE, {decrypt, Db, Key, Value}) of + PlainText when is_binary(PlainText) -> + PlainText; + {error, {_Tag, {_C_FileName,_LineNumber}, _Desc} = Reason} -> + couch_log:error("aegis decryption failure: ~p ", [Reason]), + erlang:error(decryption_failed); + {error, Reason} -> + erlang:error(Reason) + end; + false -> + process_flag(sensitive, true), + + {ok, DbKey} = do_open_db(Db), + do_decrypt(DbKey, Db, Key, Value) + end. + + +%% gen_server functions + +init([]) -> + process_flag(sensitive, true), + Cache = ets:new(?MODULE, [set, private, {keypos, #entry.uuid}]), + ets:new(?KEY_CHECK, [named_table, protected, {read_concurrency, true}]), + + St = #{ + cache => Cache + }, + {ok, St, ?INIT_TIMEOUT}. + + +terminate(_Reason, _St) -> + ok. + + +handle_call({insert_key, UUID, DbKey}, _From, #{cache := Cache} = St) -> + ok = insert(Cache, UUID, DbKey), + {reply, ok, St, ?TIMEOUT}; + +handle_call({encrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> + #{ + cache := Cache + } = St, + + {ok, DbKey} = lookup(Cache, UUID), + + erlang:spawn(fun() -> + process_flag(sensitive, true), + try + do_encrypt(DbKey, Db, Key, Value) + of + Resp -> + gen_server:reply(From, Resp) + catch + _:Error -> + gen_server:reply(From, {error, Error}) + end + end), + + {noreply, St, ?TIMEOUT}; + +handle_call({decrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> + #{ + cache := Cache + } = St, + + {ok, DbKey} = lookup(Cache, UUID), + + erlang:spawn(fun() -> + process_flag(sensitive, true), + try + do_decrypt(DbKey, Db, Key, Value) + of + Resp -> + gen_server:reply(From, Resp) + catch + _:Error -> + gen_server:reply(From, {error, Error}) + end + end), + + {noreply, St, ?TIMEOUT}; + +handle_call(_Msg, _From, St) -> + {noreply, St}. + + +handle_cast(_Msg, St) -> + {noreply, St}. + + +handle_info(_Msg, St) -> + {noreply, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +%% private functions + +do_open_db(#{uuid := UUID} = Db) -> + case ?AEGIS_KEY_MANAGER:open_db(Db) of + {ok, DbKey} -> + gen_server:call(?MODULE, {insert_key, UUID, DbKey}), + {ok, DbKey}; + false -> + false + end. + + +do_encrypt(DbKey, #{uuid := UUID}, Key, Value) -> + EncryptionKey = crypto:strong_rand_bytes(32), + <> = aegis_keywrap:key_wrap(DbKey, EncryptionKey), + + {CipherText, <>} = + ?aes_gcm_encrypt( + EncryptionKey, + <<0:96>>, + <>, + Value), + <<1:8, WrappedKey:320, CipherTag:128, CipherText/binary>>. + + +do_decrypt(DbKey, #{uuid := UUID}, Key, Value) -> + case Value of + <<1:8, WrappedKey:320, CipherTag:128, CipherText/binary>> -> + case aegis_keywrap:key_unwrap(DbKey, <>) of + fail -> + erlang:error(decryption_failed); + DecryptionKey -> + Decrypted = + ?aes_gcm_decrypt( + DecryptionKey, + <<0:96>>, + <>, + CipherText, + <>), + if Decrypted /= error -> Decrypted; true -> + erlang:error(decryption_failed) + end + end; + _ -> + erlang:error(not_ciphertext) + end. + + +%% cache functions + +insert(Cache, UUID, DbKey) -> + Entry = #entry{uuid = UUID, encryption_key = DbKey}, + true = ets:insert(Cache, Entry), + true = ets:insert(?KEY_CHECK, {UUID, true}), + ok. + + +lookup(Cache, UUID) -> + case ets:lookup(Cache, UUID) of + [#entry{uuid = UUID, encryption_key = DbKey}] -> + {ok, DbKey}; + [] -> + {error, not_found} + end. diff --git a/src/aegis/src/aegis_sup.erl b/src/aegis/src/aegis_sup.erl new file mode 100644 index 000000000..6d3ee83d8 --- /dev/null +++ b/src/aegis/src/aegis_sup.erl @@ -0,0 +1,46 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_sup). + +-behaviour(supervisor). + +-vsn(1). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 5, + period => 10 + }, + Children = [ + #{ + id => aegis_server, + start => {aegis_server, start_link, []}, + shutdown => 5000 + } + ], + {ok, {Flags, Children}}. diff --git a/src/aegis/test/aegis_basic_test.erl b/src/aegis/test/aegis_basic_test.erl new file mode 100644 index 000000000..61d9737dd --- /dev/null +++ b/src/aegis/test/aegis_basic_test.erl @@ -0,0 +1,17 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_basic_test). + +-include_lib("eunit/include/eunit.hrl"). + +-define(DB, #{uuid => <<"foo">>}). diff --git a/src/aegis/test/aegis_server_test.erl b/src/aegis/test/aegis_server_test.erl new file mode 100644 index 000000000..0f23a3fd9 --- /dev/null +++ b/src/aegis/test/aegis_server_test.erl @@ -0,0 +1,165 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_server_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). + +-define(DB, #{uuid => <<0:64>>}). +-define(VALUE, <<0:8>>). +-define(ENCRYPTED, <<1,155,242,89,190,54,112,151,18,145,25,251,217, + 49,147,125,14,162,146,201,189,100,232,38,239,111,163,84,25,60, + 147,167,237,107,24,204,171,232,227,16,72,203,101,118,150,252, + 204,80,245,66,98,213,223,63,111,105,101,154>>). +-define(TIMEOUT, 10000). + + + +basic_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + {"init_db returns true when encryption enabled", + {timeout, ?TIMEOUT, fun test_init_db/0}}, + {"open_db returns true when encryption enabled", + {timeout, ?TIMEOUT, fun test_open_db/0}}, + {"init_db caches key", + {timeout, ?TIMEOUT, fun test_init_db_cache/0}}, + {"open_db caches key", + {timeout, ?TIMEOUT, fun test_open_db_cache/0}}, + {"encrypt fetches and caches key when it's missing", + {timeout, ?TIMEOUT, fun test_encrypt_cache/0}}, + {"decrypt fetches and caches key when it's missing", + {timeout, ?TIMEOUT, fun test_decrypt_cache/0}} + ] + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + meck:new([?AEGIS_KEY_MANAGER], [passthrough]), + ok = meck:expect(?AEGIS_KEY_MANAGER, init_db, 2, {ok, <<0:256>>}), + ok = meck:expect(?AEGIS_KEY_MANAGER, open_db, 1, {ok, <<0:256>>}), + Ctx. + + +teardown(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +test_init_db() -> + ?assert(aegis_server:init_db(?DB, [])), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)). + + +test_open_db() -> + ?assert(aegis_server:open_db(?DB)), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_init_db_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)), + + ?assert(aegis_server:init_db(?DB, [])), + + lists:foreach(fun(I) -> + Encrypted = aegis_server:encrypt(?DB, <>, ?VALUE), + ?assertNotEqual(?VALUE, Encrypted), + ?assertMatch(<<1:8, _/binary>>, Encrypted) + end, lists:seq(1, 12)), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)). + + +test_open_db_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + ?assert(aegis_server:open_db(?DB)), + + lists:foreach(fun(I) -> + Encrypted = aegis_server:encrypt(?DB, <>, ?VALUE), + ?assertNotEqual(?VALUE, Encrypted), + ?assertMatch(<<1:8, _/binary>>, Encrypted) + end, lists:seq(1, 12)), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_encrypt_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + Encrypted = aegis_server:encrypt(?DB, <<1:64>>, ?VALUE), + ?assertNotEqual(?VALUE, Encrypted), + ?assertMatch(<<1:8, _/binary>>, Encrypted), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_decrypt_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + Decrypted = aegis_server:decrypt(?DB, <<1:64>>, ?ENCRYPTED), + ?assertEqual(<<0>>, Decrypted), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + + +disabled_test_() -> + { + foreach, + fun() -> + Ctx = setup(), + ok = meck:delete(?AEGIS_KEY_MANAGER, init_db, 2), + ok = meck:expect(?AEGIS_KEY_MANAGER, init_db, 2, false), + ok = meck:delete(?AEGIS_KEY_MANAGER, open_db, 1), + ok = meck:expect(?AEGIS_KEY_MANAGER, open_db, 1, false), + Ctx + end, + fun teardown/1, + [ + {"init_db returns false when encryptions disabled", + {timeout, ?TIMEOUT, fun test_disabled_init_db/0}}, + {"open_db returns false when encryptions disabled", + {timeout, ?TIMEOUT, fun test_disabled_open_db/0}}, + {"pass through on encrypt when encryption disabled", + {timeout, ?TIMEOUT, fun test_disabled_encrypt/0}}, + {"pass through on decrypt when encryption disabled", + {timeout, ?TIMEOUT, fun test_disabled_decrypt/0}} + ] + }. + + +test_disabled_init_db() -> + ?assertNot(aegis_server:init_db(?DB, [])), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)). + + +test_disabled_open_db() -> + ?assertNot(aegis_server:open_db(?DB)), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_disabled_encrypt() -> + Db = ?DB#{is_encrypted => aegis_server:open_db(?DB)}, + Encrypted = aegis:encrypt(Db, <<1:64>>, ?VALUE), + ?assertEqual(?VALUE, Encrypted). + + +test_disabled_decrypt() -> + Db = ?DB#{is_encrypted => aegis_server:open_db(?DB)}, + Decrypted = aegis:decrypt(Db, <<1:64>>, ?ENCRYPTED), + ?assertEqual(?ENCRYPTED, Decrypted). diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 4640258a8..699601c0e 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -357,6 +357,10 @@ catch_error(HttpReq, throw, Error) -> send_error(HttpReq, Error); catch_error(HttpReq, error, database_does_not_exist) -> send_error(HttpReq, database_does_not_exist); +catch_error(HttpReq, error, decryption_failed) -> + send_error(HttpReq, decryption_failed); +catch_error(HttpReq, error, not_ciphertext) -> + send_error(HttpReq, not_ciphertext); catch_error(HttpReq, Tag, Error) -> Stack = erlang:get_stacktrace(), % TODO improve logging and metrics collection for client disconnects @@ -965,6 +969,10 @@ error_info(not_implemented) -> error_info(timeout) -> {500, <<"timeout">>, <<"The request could not be processed in a reasonable" " amount of time.">>}; +error_info(decryption_failed) -> + {500, <<"decryption_failed">>, <<"Decryption failed">>}; +error_info(not_ciphertext) -> + {500, <<"not_ciphertext">>, <<"Not Ciphertext">>}; error_info({service_unavailable, Reason}) -> {503, <<"service unavailable">>, Reason}; error_info({timeout, _Reason}) -> diff --git a/src/couch/src/couch_keywrap.erl b/src/couch/src/couch_keywrap.erl deleted file mode 100644 index 0d1e3f59d..000000000 --- a/src/couch/src/couch_keywrap.erl +++ /dev/null @@ -1,103 +0,0 @@ --module(couch_keywrap). - -%% Implementation of NIST Special Publication 800-38F -%% For wrapping and unwrapping keys with AES. - --export([key_wrap/2, key_unwrap/2]). - --define(ICV1, 16#A6A6A6A6A6A6A6A6). - -%% Assume old crypto api --define(aes_ecb_encrypt(Key, Data), - crypto:block_encrypt(aes_ecb, Key, Data)). --define(aes_ecb_decrypt(Key, Data), - crypto:block_decrypt(aes_ecb, Key, Data)). - -%% Replace macros if new crypto api is available --ifdef(OTP_RELEASE). --if(?OTP_RELEASE >= 22). --define(key_alg(Key), case bit_size(Key) of 128 -> aes_128_ecb; 192 -> aes_192_ecb; 256 -> aes_256_ecb end). --undef(aes_ecb_encrypt). --define(aes_ecb_encrypt(Key, Data), - crypto:crypto_one_time(?key_alg(Key), Key, Data, true)). --undef(aes_ecb_decrypt). --define(aes_ecb_decrypt(Key, Data), - crypto:crypto_one_time(?key_alg(Key), Key, Data, false)). --endif. --endif. - --spec key_wrap(WrappingKey :: binary(), KeyToWrap :: binary()) -> binary(). -key_wrap(WrappingKey, KeyToWrap) - when is_binary(WrappingKey), bit_size(KeyToWrap) rem 64 == 0 -> - N = bit_size(KeyToWrap) div 64, - wrap(WrappingKey, <>, KeyToWrap, 1, 6 * N). - -wrap(_WrappingKey, A, R, T, End) when T > End -> - <>; -wrap(WrappingKey, A, R, T, End) -> - <> = R, - <> = ?aes_ecb_encrypt(WrappingKey, <>), - wrap(WrappingKey, <<(MSB_B bxor T):64>>, <>, T + 1, End). - - --spec key_unwrap(WrappingKey :: binary(), KeyToUnwrap :: binary()) -> binary() | fail. -key_unwrap(WrappingKey, KeyToUnwrap) - when is_binary(WrappingKey), bit_size(KeyToUnwrap) rem 64 == 0 -> - N = (bit_size(KeyToUnwrap) div 64), - <> = KeyToUnwrap, - case unwrap(WrappingKey, <>, R, 6 * (N - 1)) of - <> -> - UnwrappedKey; - _ -> - fail - end. - -unwrap(_WrappingKey, A, R, 0) -> - <>; -unwrap(WrappingKey, <>, R, T) -> - RestSize = bit_size(R) - 64, - <> = R, - <> = ?aes_ecb_decrypt(WrappingKey, <<(A bxor T):64, R2:64>>), - unwrap(WrappingKey, <>, <>, T - 1). - - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -wrap_test_() -> - [ - %% 128 KEK / 128 DATA - test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F:128>>, - <<16#00112233445566778899AABBCCDDEEFF:128>>, - <<16#1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5:192>>), - %% 192 KEK / 128 DATA - test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, - <<16#00112233445566778899AABBCCDDEEFF:128>>, - <<16#96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D:192>>), - %% 256 KEK / 128 DATA - test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, - <<16#00112233445566778899AABBCCDDEEFF:128>>, - <<16#64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7:192>>), - %% 192 KEK / 192 DATA - test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, - <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, - <<16#031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2:256>>), - %% 256 KEK / 192 DATA - test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, - <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, - <<16#A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1:256>>), - %% 256 KEK / 256 DATA - test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, - <<16#00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:256>>, - <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21:320>>)]. - -test_wrap_unwrap(WrappingKey, KeyToWrap, ExpectedWrappedKey) -> - [?_assertEqual(ExpectedWrappedKey, key_wrap(WrappingKey, KeyToWrap)), - ?_assertEqual(KeyToWrap, key_unwrap(WrappingKey, key_wrap(WrappingKey, KeyToWrap)))]. - -fail_test() -> - KEK = <<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, - CipherText = <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD20:320>>, - ?assertEqual(fail, key_unwrap(KEK, CipherText)). - --endif. diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 2181e5373..c95722230 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -161,7 +161,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> callback => Callback, acc => Acc0 }, - Fun = fun fold_fwd/2, + Fun = aegis:wrap_fold_fun(TxDb, fun fold_fwd/2), #{ acc := Acc1 @@ -321,7 +321,7 @@ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) -> Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]), - ok = erlfdb:set(Tx, Key, Val). + ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)). update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> @@ -341,7 +341,7 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> lists:foreach(fun({DupeId, Key1, Key2, EV}) -> KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId), Val = erlfdb_tuple:pack({Key2, EV}), - ok = erlfdb:set(Tx, KK, Val) + ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val)) end, KVsToAdd). @@ -356,7 +356,7 @@ get_view_keys(TxDb, Sig, DocId) -> erlfdb_tuple:unpack(K, DbPrefix), [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), {ViewId, TotalKeys, TotalSize, UniqueKeys} - end, erlfdb:get_range(Tx, Start, End, [])). + end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))). update_row_count(TxDb, Sig, ViewId, Increment) -> diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index bf3e2aa03..234c5291e 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -42,6 +42,7 @@ -define(DB_LOCAL_DOC_BODIES, 25). -define(DB_ATT_NAMES, 26). -define(DB_SEARCH, 27). +-define(DB_AEGIS, 28). % Versions diff --git a/src/fabric/src/fabric.app.src b/src/fabric/src/fabric.app.src index 0538b19b4..a7059fd10 100644 --- a/src/fabric/src/fabric.app.src +++ b/src/fabric/src/fabric.app.src @@ -28,6 +28,7 @@ mem3, couch_log, couch_stats, - erlfdb + erlfdb, + aegis ]} ]}. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index b1ada52fc..ba57e646d 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -181,7 +181,7 @@ create(#{} = Db0, Options) -> name := DbName, tx := Tx, layer_prefix := LayerPrefix - } = Db = ensure_current(Db0, false), + } = Db1 = ensure_current(Db0, false), DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), HCA = erlfdb_hca:create(erlfdb_tuple:pack({?DB_HCA}, LayerPrefix)), @@ -224,7 +224,7 @@ create(#{} = Db0, Options) -> UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), Options1 = lists:keydelete(user_ctx, 1, Options), - Db#{ + Db2 = Db1#{ uuid => UUID, db_prefix => DbPrefix, db_version => DbVersion, @@ -240,7 +240,8 @@ create(#{} = Db0, Options) -> % All other db things as we add features, db_options => Options1 - }. + }, + aegis:init_db(Db2, Options). open(#{} = Db0, Options) -> @@ -286,14 +287,15 @@ open(#{} = Db0, Options) -> }, Db3 = load_config(Db2), + Db4 = aegis:open_db(Db3), - case {UUID, Db3} of + case {UUID, Db4} of {undefined, _} -> ok; {<<_/binary>>, #{uuid := UUID}} -> ok; {<<_/binary>>, #{uuid := _}} -> erlang:error(database_does_not_exist) end, - load_validate_doc_funs(Db3). + load_validate_doc_funs(Db4). % Match on `name` in the function head since some non-fabric2 db @@ -701,9 +703,10 @@ get_doc_body_wait(#{} = Db0, DocId, RevInfo, Future) -> rev_path := RevPath } = RevInfo, - RevBodyRows = erlfdb:fold_range_wait(Tx, Future, fun({_K, V}, Acc) -> + FoldFun = aegis:wrap_fold_fun(Db, fun({_K, V}, Acc) -> [V | Acc] - end, []), + end), + RevBodyRows = erlfdb:fold_range_wait(Tx, Future, FoldFun, []), BodyRows = lists:reverse(RevBodyRows), fdb_to_doc(Db, DocId, RevPos, [Rev | RevPath], BodyRows). @@ -720,7 +723,7 @@ get_local_doc(#{} = Db0, <> = DocId) -> Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, DocId}, DbPrefix), Future = erlfdb:get_range_startswith(Tx, Prefix), - Chunks = lists:map(fun({_K, V}) -> V end, erlfdb:wait(Future)), + {_, Chunks} = lists:unzip(aegis:decrypt(Db, erlfdb:wait(Future))), fdb_to_local_doc(Db, DocId, Rev, Chunks). @@ -949,7 +952,9 @@ write_local_doc(#{} = Db0, Doc) -> % Make sure to clear the whole range, in case there was a larger % document body there before. erlfdb:clear_range_startswith(Tx, BPrefix), - lists:foreach(fun({K, V}) -> erlfdb:set(Tx, K, V) end, Rows) + lists:foreach(fun({K, V}) -> + erlfdb:set(Tx, K, aegis:encrypt(Db, K, V)) + end, Rows) end, case {WasDeleted, Doc#doc.deleted} of @@ -977,8 +982,8 @@ read_attachment(#{} = Db, DocId, AttId) -> not_found -> throw({not_found, missing}); KVs -> - Vs = [V || {_K, V} <- KVs], - iolist_to_binary(Vs) + {_, Chunks} = lists:unzip(aegis:decrypt(Db, KVs)), + iolist_to_binary(Chunks) end, IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), @@ -1023,7 +1028,7 @@ write_attachment(#{} = Db, DocId, Data, Encoding) lists:foldl(fun(Chunk, ChunkId) -> AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), - ok = erlfdb:set(Tx, AttKey, Chunk), + ok = erlfdb:set(Tx, AttKey, aegis:encrypt(Db, AttKey, Chunk)), ChunkId + 1 end, 0, Chunks), {ok, AttId}. @@ -1332,7 +1337,7 @@ write_doc_body(#{} = Db0, #doc{} = Doc) -> Rows = doc_to_fdb(Db, Doc), lists:foreach(fun({Key, Value}) -> - ok = erlfdb:set(Tx, Key, Value) + ok = erlfdb:set(Tx, Key, aegis:encrypt(Db, Key, Value)) end, Rows). -- cgit v1.2.1 From 21bb444bdc2d370f761c6e6d750a38f66f66d172 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 23 Apr 2020 19:22:08 -0400 Subject: Add a couch_views test for multiple design documents with the same map --- src/couch_views/test/couch_views_indexer_test.erl | 88 ++++++++++++++++++++--- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 54f787da3..cb8378f01 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -46,6 +46,7 @@ indexer_test_() -> ?TDEF_FE(multipe_keys_from_same_doc), ?TDEF_FE(multipe_identical_keys_from_same_doc), ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc), + ?TDEF_FE(multiple_design_docs), ?TDEF_FE(handle_size_key_limits), ?TDEF_FE(handle_size_value_limits), ?TDEF_FE(index_autoupdater_callback), @@ -427,6 +428,55 @@ budget_history() -> [Result || {_Pid, {couch_rate, budget, _}, Result} <- meck:history(couch_rate)]. +multiple_design_docs(Db) -> + Cleanup = fun() -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + ok = couch_views:cleanup_indices(TxDb, DDocs) + end) + end, + + % This is how we check that no index updates took place + meck:new(couch_views_fdb, [passthrough]), + meck:expect(couch_views_fdb, write_doc, fun(TxDb, Sig, ViewIds, Doc) -> + meck:passthrough([TxDb, Sig, ViewIds, Doc]) + end), + + DDoc1 = create_ddoc(simple, <<"_design/bar1">>), + DDoc2 = create_ddoc(simple, <<"_design/bar2">>), + + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), + ?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc1, ?MAP_FUN1)), + + % Because run_query/3 can return, and unsurbscribe from the job, + % before it actually finishes, ensure we wait for the job to + % finish so we get a deterministic setup every time. + JobId = get_job_id(Db, DDoc1), + ?assertEqual(ok, wait_job_finished(JobId, 5000)), + + % Add the second ddoc with same view as first one. + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, DDoc2, []), + + DDoc1Del = DDoc1#doc{revs = {Pos1, [Rev1]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc1Del, []), + + Cleanup(), + + meck:reset(couch_views_fdb), + ?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc2, ?MAP_FUN1)), + ?assertEqual(ok, wait_job_finished(JobId, 5000)), + ?assertEqual(0, meck:num_calls(couch_views_fdb, write_doc, 4)), + + DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), + + Cleanup(), + + % After the last ddoc is deleted we should get an error + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc2, ?MAP_FUN1)). + + handle_db_recreated_when_running(Db) -> DbName = fabric2_db:name(Db), @@ -564,9 +614,13 @@ create_ddoc() -> create_ddoc(simple). -create_ddoc(simple) -> +create_ddoc(Type) -> + create_ddoc(Type, <<"_design/bar">>). + + +create_ddoc(simple, DocId) when is_binary(DocId) -> couch_doc:from_json_obj({[ - {<<"_id">>, <<"_design/bar">>}, + {<<"_id">>, DocId}, {<<"views">>, {[ {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} @@ -577,9 +631,9 @@ create_ddoc(simple) -> ]}} ]}); -create_ddoc(multi_emit_different) -> +create_ddoc(multi_emit_different, DocId) when is_binary(DocId) -> couch_doc:from_json_obj({[ - {<<"_id">>, <<"_design/bar">>}, + {<<"_id">>, DocId}, {<<"views">>, {[ {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) { " @@ -593,9 +647,9 @@ create_ddoc(multi_emit_different) -> ]}} ]}); -create_ddoc(multi_emit_same) -> +create_ddoc(multi_emit_same, DocId) when is_binary(DocId) -> couch_doc:from_json_obj({[ - {<<"_id">>, <<"_design/bar">>}, + {<<"_id">>, DocId}, {<<"views">>, {[ {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) { " @@ -612,9 +666,9 @@ create_ddoc(multi_emit_same) -> ]}} ]}); -create_ddoc(multi_emit_key_limit) -> +create_ddoc(multi_emit_key_limit, DocId) when is_binary(DocId) -> couch_doc:from_json_obj({[ - {<<"_id">>, <<"_design/bar">>}, + {<<"_id">>, DocId}, {<<"views">>, {[ {?MAP_FUN1, {[ {<<"map">>, <<"function(doc) { " @@ -658,6 +712,24 @@ run_query(#{} = Db, DDoc, <<_/binary>> = View) -> couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). +get_job_id(#{} = Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + couch_views_jobs:job_id(Db, Mrst). + + +wait_job_finished(JobId, Timeout) -> + case couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId) of + {ok, Sub, _, _} -> + case couch_jobs:wait(Sub, finished, Timeout) of + {?INDEX_JOB_TYPE, _, _, _} -> ok; + timeout -> timeout + end; + {ok, finished, _} -> + ok + end. + + meck_intercept_job_update(ParentPid) -> meck:new(couch_jobs, [passthrough]), meck:expect(couch_jobs, update, fun(Db, Job, Data) -> -- cgit v1.2.1 From 27d1405ce8379db7ce34c0b8abf9cf1eb757e8aa Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Wed, 15 Apr 2020 00:28:27 +0000 Subject: First pass at SpiderMonkey 68 support --- .gitignore | 1 + rebar.config.script | 16 +- src/couch/priv/couch_js/68/help.h | 86 +++++ src/couch/priv/couch_js/68/http.cpp | 710 ++++++++++++++++++++++++++++++++++++ src/couch/priv/couch_js/68/http.h | 27 ++ src/couch/priv/couch_js/68/main.cpp | 494 +++++++++++++++++++++++++ src/couch/priv/couch_js/68/utf8.cpp | 309 ++++++++++++++++ src/couch/priv/couch_js/68/utf8.h | 19 + src/couch/priv/couch_js/68/util.cpp | 350 ++++++++++++++++++ src/couch/priv/couch_js/68/util.h | 60 +++ src/couch/rebar.config.script | 66 ++-- support/build_js.escript | 6 + 12 files changed, 2107 insertions(+), 37 deletions(-) create mode 100644 src/couch/priv/couch_js/68/help.h create mode 100644 src/couch/priv/couch_js/68/http.cpp create mode 100644 src/couch/priv/couch_js/68/http.h create mode 100644 src/couch/priv/couch_js/68/main.cpp create mode 100644 src/couch/priv/couch_js/68/utf8.cpp create mode 100644 src/couch/priv/couch_js/68/utf8.h create mode 100644 src/couch/priv/couch_js/68/util.cpp create mode 100644 src/couch/priv/couch_js/68/util.h diff --git a/.gitignore b/.gitignore index 3cfa3721e..8a4a6f08d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ .rebar/ .eunit/ cover/ +core log apache-couchdb-*/ bin/ diff --git a/rebar.config.script b/rebar.config.script index bfca5c84e..0e9c9781c 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -139,7 +139,7 @@ SubDirs = [ "src/setup", "src/smoosh", "rel" -], +]. DepDescs = [ %% Independent Apps @@ -162,18 +162,18 @@ DepDescs = [ {mochiweb, "mochiweb", {tag, "v2.20.0"}}, {meck, "meck", {tag, "0.8.8"}}, {recon, "recon", {tag, "2.5.0"}} -], +]. -WithProper = lists:keyfind(with_proper, 1, CouchConfig) == {with_proper, true}, +WithProper = lists:keyfind(with_proper, 1, CouchConfig) == {with_proper, true}. OptionalDeps = case WithProper of true -> [{proper, {url, "https://github.com/proper-testing/proper"}, {tag, "v1.3"}}]; false -> [] -end, +end. -BaseUrl = "https://github.com/apache/", +BaseUrl = "https://github.com/apache/". MakeDep = fun ({AppName, {url, Url}, Version}) -> @@ -186,12 +186,12 @@ MakeDep = fun ({AppName, RepoName, Version, Options}) -> Url = BaseUrl ++ "couchdb-" ++ RepoName ++ ".git", {AppName, ".*", {git, Url, Version}, Options} -end, +end. ErlOpts = case os:getenv("ERL_OPTS") of false -> []; Opts -> [list_to_atom(O) || O <- string:tokens(Opts, ",")] -end, +end. AddConfig = [ {require_otp_vsn, "19|20|21|22"}, @@ -210,7 +210,7 @@ AddConfig = [ sasl, setup, ssl, stdlib, syntax_tools, xmerl]}, {warnings, [unmatched_returns, error_handling, race_conditions]}]}, {post_hooks, [{compile, "escript support/build_js.escript"}]} -], +]. C = lists:foldl(fun({K, V}, CfgAcc) -> lists:keystore(K, 1, CfgAcc, {K, V}) diff --git a/src/couch/priv/couch_js/68/help.h b/src/couch/priv/couch_js/68/help.h new file mode 100644 index 000000000..678651fd3 --- /dev/null +++ b/src/couch/priv/couch_js/68/help.h @@ -0,0 +1,86 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef COUCHJS_HELP_H +#define COUCHJS_HELP_H + +#include "config.h" + +static const char VERSION_TEMPLATE[] = + "%s - %s\n" + "\n" + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may " + "not use\n" + "this file except in compliance with the License. You may obtain a copy of" + "the\n" + "License at\n" + "\n" + " http://www.apache.org/licenses/LICENSE-2.0\n" + "\n" + "Unless required by applicable law or agreed to in writing, software " + "distributed\n" + "under the License is distributed on an \"AS IS\" BASIS, WITHOUT " + "WARRANTIES OR\n" + "CONDITIONS OF ANY KIND, either express or implied. See the License " + "for the\n" + "specific language governing permissions and limitations under the " + "License.\n"; + +static const char USAGE_TEMPLATE[] = + "Usage: %s [FILE]\n" + "\n" + "The %s command runs the %s JavaScript interpreter.\n" + "\n" + "The exit status is 0 for success or 1 for failure.\n" + "\n" + "Options:\n" + "\n" + " -h display a short help message and exit\n" + " -V display version information and exit\n" + " -H enable %s cURL bindings (only avaiable\n" + " if package was built with cURL available)\n" + " -T enable test suite specific functions (these\n" + " should not be enabled for production systems)\n" + " -S SIZE specify that the runtime should allow at\n" + " most SIZE bytes of memory to be allocated\n" + " default is 64 MiB\n" + " -u FILE path to a .uri file containing the address\n" + " (or addresses) of one or more servers\n" + " --eval Enable runtime code evaluation (dangerous!)\n" + "\n" + "Report bugs at <%s>.\n"; + +#define BASENAME COUCHJS_NAME + +#define couch_version(basename) \ + fprintf( \ + stdout, \ + VERSION_TEMPLATE, \ + basename, \ + PACKAGE_STRING) + +#define DISPLAY_VERSION couch_version(BASENAME) + + +#define couch_usage(basename) \ + fprintf( \ + stdout, \ + USAGE_TEMPLATE, \ + basename, \ + basename, \ + PACKAGE_NAME, \ + basename, \ + PACKAGE_BUGREPORT) + +#define DISPLAY_USAGE couch_usage(BASENAME) + +#endif // Included help.h diff --git a/src/couch/priv/couch_js/68/http.cpp b/src/couch/priv/couch_js/68/http.cpp new file mode 100644 index 000000000..a0c73bdc6 --- /dev/null +++ b/src/couch/priv/couch_js/68/http.cpp @@ -0,0 +1,710 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include "config.h" +#include "utf8.h" +#include "util.h" + +// Soft dependency on cURL bindings because they're +// only used when running the JS tests from the +// command line which is rare. +#ifndef HAVE_CURL + +void +http_check_enabled() +{ + fprintf(stderr, "HTTP API was disabled at compile time.\n"); + exit(3); +} + + +bool +http_ctor(JSContext* cx, JSObject* req) +{ + return false; +} + + +void +http_dtor(JSFreeOp* fop, JSObject* req) +{ + return; +} + + +bool +http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc) +{ + return false; +} + + +bool +http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val) +{ + return false; +} + + +bool +http_send(JSContext* cx, JSObject* req, JS::Value body) +{ + return false; +} + + +int +http_status(JSContext* cx, JSObject* req) +{ + return -1; +} + +bool +http_uri(JSContext* cx, JSObject* req, couch_args* args, JS::Value* uri_val) +{ + return false; +} + + +#else +#include +#ifndef XP_WIN +#include +#endif + + +void +http_check_enabled() +{ + return; +} + + +// Map some of the string function names to things which exist on Windows +#ifdef XP_WIN +#define strcasecmp _strcmpi +#define strncasecmp _strnicmp +#define snprintf _snprintf +#endif + + +typedef struct curl_slist CurlHeaders; + + +typedef struct { + int method; + char* url; + CurlHeaders* req_headers; + int16_t last_status; +} HTTPData; + + +const char* METHODS[] = {"GET", "HEAD", "POST", "PUT", "DELETE", "COPY", "OPTIONS", NULL}; + + +#define GET 0 +#define HEAD 1 +#define POST 2 +#define PUT 3 +#define DELETE 4 +#define COPY 5 +#define OPTIONS 6 + + +static bool +go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t blen); + + +/*static JSString* +str_from_binary(JSContext* cx, char* data, size_t length); +*/ + + +bool +http_ctor(JSContext* cx, JSObject* req) +{ + HTTPData* http = NULL; + bool ret = false; + + http = (HTTPData*) malloc(sizeof(HTTPData)); + if(!http) + { + JS_ReportErrorUTF8(cx, "Failed to create CouchHTTP instance."); + goto error; + } + + http->method = -1; + http->url = NULL; + http->req_headers = NULL; + http->last_status = -1; + + JS_SetPrivate(req, http); + + ret = true; + goto success; + +error: + if(http) free(http); + +success: + return ret; +} + + +void +http_dtor(JSFreeOp* fop, JSObject* obj) +{ + HTTPData* http = (HTTPData*) JS_GetPrivate(obj); + if(http) { + if(http->url) free(http->url); + if(http->req_headers) curl_slist_free_all(http->req_headers); + free(http); + } +} + + +bool +http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc) +{ + HTTPData* http = (HTTPData*) JS_GetPrivate(req); + char* method = NULL; + int methid; + bool ret = false; + + if(!http) { + JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); + goto done; + } + + if(mth.isUndefined()) { + JS_ReportErrorUTF8(cx, "You must specify a method."); + goto done; + } + + method = enc_string(cx, mth, NULL); + if(!method) { + JS_ReportErrorUTF8(cx, "Failed to encode method."); + goto done; + } + + for(methid = 0; METHODS[methid] != NULL; methid++) { + if(strcasecmp(METHODS[methid], method) == 0) break; + } + + if(methid > OPTIONS) { + JS_ReportErrorUTF8(cx, "Invalid method specified."); + goto done; + } + + http->method = methid; + + if(url.isUndefined()) { + JS_ReportErrorUTF8(cx, "You must specify a URL."); + goto done; + } + + if(http->url != NULL) { + free(http->url); + http->url = NULL; + } + + http->url = enc_string(cx, url, NULL); + if(http->url == NULL) { + JS_ReportErrorUTF8(cx, "Failed to encode URL."); + goto done; + } + + if(snc.isBoolean() && snc.isTrue()) { + JS_ReportErrorUTF8(cx, "Synchronous flag must be false."); + goto done; + } + + if(http->req_headers) { + curl_slist_free_all(http->req_headers); + http->req_headers = NULL; + } + + // Disable Expect: 100-continue + http->req_headers = curl_slist_append(http->req_headers, "Expect:"); + + ret = true; + +done: + if(method) free(method); + return ret; +} + + +bool +http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val) +{ + HTTPData* http = (HTTPData*) JS_GetPrivate(req); + char* keystr = NULL; + char* valstr = NULL; + char* hdrbuf = NULL; + size_t hdrlen = -1; + bool ret = false; + + if(!http) { + JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); + goto done; + } + + if(name.isUndefined()) + { + JS_ReportErrorUTF8(cx, "You must speciy a header name."); + goto done; + } + + keystr = enc_string(cx, name, NULL); + if(!keystr) + { + JS_ReportErrorUTF8(cx, "Failed to encode header name."); + goto done; + } + + if(val.isUndefined()) + { + JS_ReportErrorUTF8(cx, "You must specify a header value."); + goto done; + } + + valstr = enc_string(cx, val, NULL); + if(!valstr) + { + JS_ReportErrorUTF8(cx, "Failed to encode header value."); + goto done; + } + + hdrlen = strlen(keystr) + strlen(valstr) + 3; + hdrbuf = (char*) malloc(hdrlen * sizeof(char)); + if(!hdrbuf) { + JS_ReportErrorUTF8(cx, "Failed to allocate header buffer."); + goto done; + } + + snprintf(hdrbuf, hdrlen, "%s: %s", keystr, valstr); + http->req_headers = curl_slist_append(http->req_headers, hdrbuf); + + ret = true; + +done: + if(keystr) free(keystr); + if(valstr) free(valstr); + if(hdrbuf) free(hdrbuf); + return ret; +} + +bool +http_send(JSContext* cx, JSObject* req, JS::Value body) +{ + HTTPData* http = (HTTPData*) JS_GetPrivate(req); + char* bodystr = NULL; + size_t bodylen = 0; + bool ret = false; + + if(!http) { + JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); + goto done; + } + + if(!body.isUndefined()) { + bodystr = enc_string(cx, body, &bodylen); + if(!bodystr) { + JS_ReportErrorUTF8(cx, "Failed to encode body."); + goto done; + } + } + + ret = go(cx, req, http, bodystr, bodylen); + +done: + if(bodystr) free(bodystr); + return ret; +} + +int +http_status(JSContext* cx, JSObject* req) +{ + HTTPData* http = (HTTPData*) JS_GetPrivate(req); + + if(!http) { + JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); + return false; + } + + return http->last_status; +} + +bool +http_uri(JSContext* cx, JSObject* req, couch_args* args, JS::Value* uri_val) +{ + FILE* uri_fp = NULL; + JSString* uri_str; + + // Default is http://localhost:15986/ when no uri file is specified + if (!args->uri_file) { + uri_str = JS_NewStringCopyZ(cx, "http://localhost:15986/"); + *uri_val = JS::StringValue(uri_str); + JS_SetReservedSlot(req, 0, *uri_val); + return true; + } + + // Else check to see if the base url is cached in a reserved slot + *uri_val = JS_GetReservedSlot(req, 0); + if (!(*uri_val).isUndefined()) { + return true; + } + + // Read the first line of the couch.uri file. + if(!((uri_fp = fopen(args->uri_file, "r")) && + (uri_str = couch_readline(cx, uri_fp)))) { + JS_ReportErrorUTF8(cx, "Failed to read couch.uri file."); + goto error; + } + + fclose(uri_fp); + *uri_val = JS::StringValue(uri_str); + JS_SetReservedSlot(req, 0, *uri_val); + return true; + +error: + if(uri_fp) fclose(uri_fp); + return false; +} + + +// Curl Helpers + +typedef struct { + HTTPData* http; + JSContext* cx; + JSObject* resp_headers; + char* sendbuf; + size_t sendlen; + size_t sent; + int sent_once; + char* recvbuf; + size_t recvlen; + size_t read; +} CurlState; + +/* + * I really hate doing this but this doesn't have to be + * uber awesome, it just has to work. + */ +CURL* HTTP_HANDLE = NULL; +char ERRBUF[CURL_ERROR_SIZE]; + +static size_t send_body(void *ptr, size_t size, size_t nmem, void *data); +static int seek_body(void *ptr, curl_off_t offset, int origin); +static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data); +static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data); + +static bool +go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) +{ + CurlState state; + char* referer; + JSString* jsbody; + bool ret = false; + JS::Value tmp; + JS::RootedObject robj(cx, obj); + JS::RootedValue vobj(cx); + + + state.cx = cx; + state.http = http; + + state.sendbuf = body; + state.sendlen = bodylen; + state.sent = 0; + state.sent_once = 0; + + state.recvbuf = NULL; + state.recvlen = 0; + state.read = 0; + + if(HTTP_HANDLE == NULL) { + HTTP_HANDLE = curl_easy_init(); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_READFUNCTION, send_body); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKFUNCTION, + (curl_seek_callback) seek_body); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_HEADERFUNCTION, recv_header); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEFUNCTION, recv_body); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOPROGRESS, 1); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_ERRORBUFFER, ERRBUF); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_COOKIEFILE, ""); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_USERAGENT, + "CouchHTTP Client - Relax"); + } + + if(!HTTP_HANDLE) { + JS_ReportErrorUTF8(cx, "Failed to initialize cURL handle."); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + + tmp = JS_GetReservedSlot(obj, 0); + + if(!(referer = enc_string(cx, tmp, NULL))) { + JS_ReportErrorUTF8(cx, "Failed to encode referer."); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer); + free(referer); + + if(http->method < 0 || http->method > OPTIONS) { + JS_ReportErrorUTF8(cx, "INTERNAL: Unknown method."); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + + curl_easy_setopt(HTTP_HANDLE, CURLOPT_CUSTOMREQUEST, METHODS[http->method]); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOBODY, 0); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 1); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_UPLOAD, 0); + + if(http->method == HEAD) { + curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOBODY, 1); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); + } else if(http->method == POST || http->method == PUT) { + curl_easy_setopt(HTTP_HANDLE, CURLOPT_UPLOAD, 1); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); + } + + if(body && bodylen) { + curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, bodylen); + } else { + curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, 0); + } + + // curl_easy_setopt(HTTP_HANDLE, CURLOPT_VERBOSE, 1); + + curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_HTTPHEADER, http->req_headers); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_READDATA, &state); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKDATA, &state); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEHEADER, &state); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEDATA, &state); + + if(curl_easy_perform(HTTP_HANDLE) != 0) { + JS_ReportErrorUTF8(cx, "Failed to execute HTTP request: %s", ERRBUF); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + + if(!state.resp_headers) { + JS_ReportErrorUTF8(cx, "Failed to recieve HTTP headers."); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + tmp = JS::ObjectValue(*state.resp_headers); + JS::RootedValue rtmp(cx, tmp); + + if(!JS_DefineProperty( + cx, robj, + "_headers", + rtmp, + JSPROP_READONLY + )) { + JS_ReportErrorUTF8(cx, "INTERNAL: Failed to set response headers."); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret;; + } + + if(state.recvbuf) { + state.recvbuf[state.read] = '\0'; + jsbody = dec_string(cx, state.recvbuf, state.read+1); + if(!jsbody) { + // If we can't decode the body as UTF-8 we forcefully + // convert it to a string by just forcing each byte + // to a char16_t. + jsbody = JS_NewStringCopyN(cx, state.recvbuf, state.read); + if(!jsbody) { + if(!JS_IsExceptionPending(cx)) { + JS_ReportErrorUTF8(cx, "INTERNAL: Failed to decode body."); + } + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + } + tmp = JS::StringValue(jsbody); + } else { + tmp = JS_GetEmptyStringValue(cx); + } + + JS::RootedValue rtmp2(cx, tmp); + + if(!JS_DefineProperty( + cx, robj, + "responseText", + rtmp2, + JSPROP_READONLY + )) { + JS_ReportErrorUTF8(cx, "INTERNAL: Failed to set responseText."); + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; + } + + ret = true; + if(state.recvbuf) JS_free(cx, state.recvbuf); + return ret; +} + +static size_t +send_body(void *ptr, size_t size, size_t nmem, void *data) +{ + CurlState* state = (CurlState*) data; + size_t length = size * nmem; + size_t towrite = state->sendlen - state->sent; + + // Assume this is cURL trying to resend a request that + // failed. + if(towrite == 0 && state->sent_once == 0) { + state->sent_once = 1; + return 0; + } else if(towrite == 0) { + state->sent = 0; + state->sent_once = 0; + towrite = state->sendlen; + } + + if(length < towrite) towrite = length; + + memcpy(ptr, state->sendbuf + state->sent, towrite); + state->sent += towrite; + + return towrite; +} + +static int +seek_body(void* ptr, curl_off_t offset, int origin) +{ + CurlState* state = (CurlState*) ptr; + if(origin != SEEK_SET) return -1; + + state->sent = (size_t) offset; + return (int) state->sent; +} + +static size_t +recv_header(void *ptr, size_t size, size_t nmem, void *data) +{ + CurlState* state = (CurlState*) data; + char code[4]; + char* header = (char*) ptr; + size_t length = size * nmem; + JSString* hdr = NULL; + uint32_t hdrlen; + + if(length > 7 && strncasecmp(header, "HTTP/1.", 7) == 0) { + if(length < 12) { + return CURLE_WRITE_ERROR; + } + + memcpy(code, header+9, 3*sizeof(char)); + code[3] = '\0'; + state->http->last_status = atoi(code); + + state->resp_headers = JS_NewArrayObject(state->cx, 0); + if(!state->resp_headers) { + return CURLE_WRITE_ERROR; + } + + return length; + } + + // We get a notice at the \r\n\r\n after headers. + if(length <= 2) { + return length; + } + + // Append the new header to our array. + hdr = dec_string(state->cx, header, length); + if(!hdr) { + return CURLE_WRITE_ERROR; + } + + JS::RootedObject obj(state->cx, state->resp_headers); + if(!JS_GetArrayLength(state->cx, obj, &hdrlen)) { + return CURLE_WRITE_ERROR; + } + + JS::RootedString hdrval(state->cx, hdr); + if(!JS_SetElement(state->cx, obj, hdrlen, hdrval)) { + return CURLE_WRITE_ERROR; + } + + return length; +} + +static size_t +recv_body(void *ptr, size_t size, size_t nmem, void *data) +{ + CurlState* state = (CurlState*) data; + size_t length = size * nmem; + char* tmp = NULL; + + if(!state->recvbuf) { + state->recvlen = 4096; + state->read = 0; + state->recvbuf = static_cast(JS_malloc(state->cx, state->recvlen)); + } + + if(!state->recvbuf) { + return CURLE_WRITE_ERROR; + } + + // +1 so we can add '\0' back up in the go function. + size_t oldlen = state->recvlen; + while(length+1 > state->recvlen - state->read) state->recvlen *= 2; + tmp = static_cast(JS_realloc(state->cx, state->recvbuf, oldlen, state->recvlen)); + if(!tmp) return CURLE_WRITE_ERROR; + state->recvbuf = tmp; + + memcpy(state->recvbuf + state->read, ptr, length); + state->read += length; + return length; +} + +/*JSString* +str_from_binary(JSContext* cx, char* data, size_t length) +{ + char16_t* conv = static_cast(JS_malloc(cx, length * sizeof(char16_t))); + JSString* ret = NULL; + size_t i; + + if(!conv) return NULL; + + for(i = 0; i < length; i++) { + conv[i] = (char16_t) data[i]; + } + + ret = JS_NewUCString(cx, conv, length); + if(!ret) JS_free(cx, conv); + + return ret; +} +*/ + +#endif /* HAVE_CURL */ diff --git a/src/couch/priv/couch_js/68/http.h b/src/couch/priv/couch_js/68/http.h new file mode 100644 index 000000000..797b3c060 --- /dev/null +++ b/src/couch/priv/couch_js/68/http.h @@ -0,0 +1,27 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef COUCH_JS_HTTP_H +#define COUCH_JS_HTTP_H + +#include "util.h" + +void http_check_enabled(); +bool http_ctor(JSContext* cx, JSObject* req); +void http_dtor(JSFreeOp* fop, JSObject* req); +bool http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc); +bool http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val); +bool http_send(JSContext* cx, JSObject* req, JS::Value body); +int http_status(JSContext* cx, JSObject* req); +bool http_uri(JSContext* cx, JSObject *req, couch_args* args, JS::Value* uri); + +#endif diff --git a/src/couch/priv/couch_js/68/main.cpp b/src/couch/priv/couch_js/68/main.cpp new file mode 100644 index 000000000..3860a01a8 --- /dev/null +++ b/src/couch/priv/couch_js/68/main.cpp @@ -0,0 +1,494 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include + +#ifdef XP_WIN +#define NOMINMAX +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "http.h" +#include "utf8.h" +#include "util.h" + +static bool enableSharedMemory = true; + +static JSClassOps global_ops = { + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + JS_GlobalObjectTraceHook +}; + +/* The class of the global object. */ +static JSClass global_class = { + "global", + JSCLASS_GLOBAL_FLAGS, + &global_ops +}; + + +static void +req_dtor(JSFreeOp* fop, JSObject* obj) +{ + http_dtor(fop, obj); +} + +// With JSClass.construct. +static const JSClassOps clsOps = { + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + req_dtor, + nullptr, + nullptr, + nullptr +}; + +static const JSClass CouchHTTPClass = { + "CouchHTTP", /* name */ + JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(2), /* flags */ + &clsOps +}; + +static bool +req_ctor(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + bool ret; + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JSObject* obj = JS_NewObjectForConstructor(cx, &CouchHTTPClass, args); + if(!obj) { + JS_ReportErrorUTF8(cx, "Failed to create CouchHTTP instance"); + return false; + } + ret = http_ctor(cx, obj); + args.rval().setObject(*obj); + return ret; +} + +static bool +req_open(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + GET_THIS(cx, argc, vp, args, obj) + bool ret = false; + + if(argc == 2) { + ret = http_open(cx, obj, args[0], args[1], JS::BooleanValue(false)); + } else if(argc == 3) { + ret = http_open(cx, obj, args[0], args[1], args[2]); + } else { + JS_ReportErrorUTF8(cx, "Invalid call to CouchHTTP.open"); + } + + args.rval().setUndefined(); + return ret; +} + + +static bool +req_set_hdr(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + GET_THIS(cx, argc, vp, args, obj) + bool ret = false; + + if(argc == 2) { + ret = http_set_hdr(cx, obj, args[0], args[1]); + } else { + JS_ReportErrorUTF8(cx, "Invalid call to CouchHTTP.set_header"); + } + + args.rval().setUndefined(); + return ret; +} + + +static bool +req_send(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + GET_THIS(cx, argc, vp, args, obj) + bool ret = false; + + if(argc == 1) { + ret = http_send(cx, obj, args[0]); + } else { + JS_ReportErrorUTF8(cx, "Invalid call to CouchHTTP.send"); + } + + args.rval().setUndefined(); + return ret; +} + +static bool +req_status(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + GET_THIS(cx, argc, vp, args, obj) + int status = http_status(cx, obj); + + if(status < 0) + return false; + + args.rval().set(JS::Int32Value(status)); + return true; +} + +static bool +base_url(JSContext *cx, unsigned int argc, JS::Value* vp) +{ + GET_THIS(cx, argc, vp, args, obj) + couch_args *cargs = (couch_args*)JS_GetContextPrivate(cx); + JS::Value uri_val; + bool rc = http_uri(cx, obj, cargs, &uri_val); + args.rval().set(uri_val); + return rc; +} + +static JSObject* +NewSandbox(JSContext* cx, bool lazy) +{ + JS::RealmOptions options; + options.creationOptions().setSharedMemoryAndAtomicsEnabled(enableSharedMemory); + options.creationOptions().setNewCompartmentAndZone(); + JS::RootedObject obj(cx, JS_NewGlobalObject(cx, &global_class, nullptr, + JS::DontFireOnNewGlobalHook, options)); + if (!obj) + return nullptr; + + { + JSAutoRealm ac(cx, obj); + if (!lazy && !JS::InitRealmStandardClasses(cx)) + return nullptr; + + JS::RootedValue value(cx, JS::BooleanValue(lazy)); + if (!JS_DefineProperty(cx, obj, "lazy", value, JSPROP_PERMANENT | JSPROP_READONLY)) + return nullptr; + + JS_FireOnNewGlobalObject(cx, obj); + } + + if (!JS_WrapObject(cx, &obj)) + return nullptr; + return obj; +} + +static bool +evalcx(JSContext *cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + bool ret = false; + + JS::RootedString str(cx, args[0].toString()); + if (!str) + return false; + + JS::RootedObject sandbox(cx); + if (args.hasDefined(1)) { + sandbox = JS::ToObject(cx, args[1]); + if (!sandbox) + return false; + } + + if (!sandbox) { + sandbox = NewSandbox(cx, false); + if (!sandbox) + return false; + } + + JS::AutoStableStringChars strChars(cx); + if (!strChars.initTwoByte(cx, str)) + return false; + + mozilla::Range chars = strChars.twoByteRange(); + JS::SourceText srcBuf; + if (!srcBuf.init(cx, chars.begin().get(), chars.length(), + JS::SourceOwnership::Borrowed)) { + return false; + } + + if(srcBuf.length() == 0) { + args.rval().setObject(*sandbox); + } else { + mozilla::Maybe ar; + unsigned flags; + JSObject* unwrapped = UncheckedUnwrap(sandbox, true, &flags); + if (flags & js::Wrapper::CROSS_COMPARTMENT) { + sandbox = unwrapped; + ar.emplace(cx, sandbox); + } + + JS::CompileOptions opts(cx); + JS::RootedValue rval(cx); + opts.setFileAndLine("", 1); + + if (!JS::Evaluate(cx, opts, srcBuf, args.rval())) { + return false; + } + } + ret = true; + if (!JS_WrapValue(cx, args.rval())) + return false; + + return ret; +} + + +static bool +gc(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS_GC(cx); + args.rval().setUndefined(); + return true; +} + + +static bool +print(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + couch_print(cx, argc, args); + args.rval().setUndefined(); + return true; +} + + +static bool +quit(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + + int exit_code = args[0].toInt32();; + exit(exit_code); +} + + +static bool +readline(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JSString* line; + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + + /* GC Occasionally */ + JS_MaybeGC(cx); + + line = couch_readline(cx, stdin); + if(line == NULL) return false; + + // return with JSString* instead of JSValue in the past + args.rval().setString(line); + return true; +} + + +static bool +seal(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject target(cx); + target = JS::ToObject(cx, args[0]); + if (!target) { + args.rval().setUndefined(); + return true; + } + bool deep = false; + deep = args[1].toBoolean(); + bool ret = deep ? JS_DeepFreezeObject(cx, target) : JS_FreezeObject(cx, target); + args.rval().setUndefined(); + return ret; +} + + +static bool +js_sleep(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + + int duration = args[0].toInt32(); + +#ifdef XP_WIN + Sleep(duration); +#else + usleep(duration * 1000); +#endif + + return true; +} + +JSPropertySpec CouchHTTPProperties[] = { + JS_PSG("status", req_status, 0), + JS_PSG("base_url", base_url, 0), + JS_PS_END +}; + + +JSFunctionSpec CouchHTTPFunctions[] = { + JS_FN("_open", req_open, 3, 0), + JS_FN("_setRequestHeader", req_set_hdr, 2, 0), + JS_FN("_send", req_send, 1, 0), + JS_FS_END +}; + + +JSFunctionSpec TestSuiteFunctions[] = { + JS_FN("sleep", js_sleep, 1, 0), + JS_FS_END +}; + + +static JSFunctionSpec global_functions[] = { + JS_FN("evalcx", evalcx, 0, 0), + JS_FN("gc", gc, 0, 0), + JS_FN("print", print, 0, 0), + JS_FN("quit", quit, 0, 0), + JS_FN("readline", readline, 0, 0), + JS_FN("seal", seal, 0, 0), + JS_FS_END +}; + + +static bool +csp_allows(JSContext* cx, JS::HandleValue code) +{ + couch_args *args = (couch_args*)JS_GetContextPrivate(cx); + if(args->eval) { + return true; + } else { + return false; + } +} + + +static JSSecurityCallbacks security_callbacks = { + csp_allows, + nullptr +}; + + +int +main(int argc, const char* argv[]) +{ + JSContext* cx = NULL; + JSObject* klass = NULL; + int i; + + couch_args* args = couch_parse_args(argc, argv); + + JS_Init(); + cx = JS_NewContext(args->stack_size, 8L * 1024L); + if(cx == NULL) + return 1; + + JS_SetGlobalJitCompilerOption(cx, JSJITCOMPILER_BASELINE_ENABLE, 0); + JS_SetGlobalJitCompilerOption(cx, JSJITCOMPILER_ION_ENABLE, 0); + + if (!JS::InitSelfHostedCode(cx)) + return 1; + + JS::SetWarningReporter(cx, couch_error); + JS::SetOutOfMemoryCallback(cx, couch_oom, NULL); + JS_SetContextPrivate(cx, args); + JS_SetSecurityCallbacks(cx, &security_callbacks); + + JS::RealmOptions options; + JS::RootedObject global(cx, JS_NewGlobalObject(cx, &global_class, nullptr, + JS::FireOnNewGlobalHook, options)); + if (!global) + return 1; + + JSAutoRealm ar(cx, global); + + if(!JS::InitRealmStandardClasses(cx)) + return 1; + + if(couch_load_funcs(cx, global, global_functions) != true) + return 1; + + if(args->use_http) { + http_check_enabled(); + + klass = JS_InitClass( + cx, global, + NULL, + &CouchHTTPClass, req_ctor, + 0, + CouchHTTPProperties, CouchHTTPFunctions, + NULL, NULL + ); + + if(!klass) + { + fprintf(stderr, "Failed to initialize CouchHTTP class.\n"); + exit(2); + } + } + + if(args->use_test_funs) { + if(couch_load_funcs(cx, global, TestSuiteFunctions) != true) + return 1; + } + + for(i = 0 ; args->scripts[i] ; i++) { + const char* filename = args->scripts[i]; + + // Compile and run + JS::CompileOptions options(cx); + options.setFileAndLine(filename, 1); + JS::RootedScript script(cx); + FILE* fp; + + fp = fopen(args->scripts[i], "r"); + if(fp == NULL) { + fprintf(stderr, "Failed to read file: %s\n", filename); + return 3; + } + script = JS::CompileUtf8File(cx, options, fp); + fclose(fp); + if (!script) { + fprintf(stderr, "Failed to compile file: %s\n", filename); + return 1; + } + + JS::RootedValue result(cx); + if(JS_ExecuteScript(cx, script, &result) != true) { + fprintf(stderr, "Failed to execute script.\n"); + return 1; + } + + // Give the GC a chance to run. + JS_MaybeGC(cx); + } + + return 0; +} diff --git a/src/couch/priv/couch_js/68/utf8.cpp b/src/couch/priv/couch_js/68/utf8.cpp new file mode 100644 index 000000000..c28e026f7 --- /dev/null +++ b/src/couch/priv/couch_js/68/utf8.cpp @@ -0,0 +1,309 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include +#include +#include +#include +#include "config.h" +#include "util.h" + +static int +enc_char(uint8_t *utf8Buffer, uint32_t ucs4Char) +{ + int utf8Length = 1; + + if (ucs4Char < 0x80) + { + *utf8Buffer = (uint8_t)ucs4Char; + } + else + { + int i; + uint32_t a = ucs4Char >> 11; + utf8Length = 2; + while(a) + { + a >>= 5; + utf8Length++; + } + i = utf8Length; + while(--i) + { + utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80); + ucs4Char >>= 6; + } + *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char); + } + + return utf8Length; +} + +static bool +enc_charbuf(const char16_t* src, size_t srclen, char* dst, size_t* dstlenp) +{ + size_t i; + size_t utf8Len; + size_t dstlen = *dstlenp; + size_t origDstlen = dstlen; + char16_t c; + char16_t c2; + uint32_t v; + uint8_t utf8buf[6]; + + if(!dst) + { + dstlen = origDstlen = (size_t) -1; + } + + while(srclen) + { + c = *src++; + srclen--; + + if(c <= 0xD7FF || c >= 0xE000) + { + v = (uint32_t) c; + } + else if(c >= 0xD800 && c <= 0xDBFF) + { + if(srclen < 1) goto buffer_too_small; + c2 = *src++; + srclen--; + if(c2 >= 0xDC00 && c2 <= 0xDFFF) + { + v = (uint32_t) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000); + } + else + { + // Invalid second half of surrogate pair + v = (uint32_t) 0xFFFD; + // Undo our character advancement + src--; + srclen++; + } + } + else + { + // Invalid first half surrogate pair + v = (uint32_t) 0xFFFD; + } + + if(v < 0x0080) + { + // no encoding necessary - performance hack + if(!dstlen) goto buffer_too_small; + if(dst) *dst++ = (char) v; + utf8Len = 1; + } + else + { + utf8Len = enc_char(utf8buf, v); + if(utf8Len > dstlen) goto buffer_too_small; + if(dst) + { + for (i = 0; i < utf8Len; i++) + { + *dst++ = (char) utf8buf[i]; + } + } + } + dstlen -= utf8Len; + } + + *dstlenp = (origDstlen - dstlen); + return true; + +buffer_too_small: + *dstlenp = (origDstlen - dstlen); + return false; +} + +char* +enc_string(JSContext* cx, JS::Value arg, size_t* buflen) +{ + JSString* str = NULL; + const char16_t* src = NULL; + char* bytes = NULL; + size_t srclen = 0; + size_t byteslen = 0; + JS::AutoStableStringChars rawChars(cx); + + str = arg.toString(); + if(!str) goto error; + + if (!rawChars.initTwoByte(cx, str)) + return NULL; + + src = rawChars.twoByteRange().begin().get(); + srclen = JS_GetStringLength(str); + + if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error; + + bytes = js_pod_malloc(byteslen + 1); + bytes[byteslen] = 0; + + if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error; + + if(buflen) *buflen = byteslen; + goto success; + +error: + if(bytes != NULL) JS_free(cx, bytes); + bytes = NULL; + +success: +/* + JS::RootedString str(cx, arg.toString()); + JS::UniqueChars chars = JS_EncodeStringToUTF8(cx, str); + + if(buflen) *buflen = strlen(chars.get()); + + return JS_NewUCStringCopyN(cs, chars.get(), buflen); +*/ + return bytes; +} + +static uint32_t +dec_char(const uint8_t *utf8Buffer, int utf8Length) +{ + uint32_t ucs4Char; + uint32_t minucs4Char; + + // from Unicode 3.1, non-shortest form is illegal + static const uint32_t minucs4Table[] = { + 0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000 + }; + + if (utf8Length == 1) + { + ucs4Char = *utf8Buffer; + } + else + { + ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1); + minucs4Char = minucs4Table[utf8Length-2]; + while(--utf8Length) + { + ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F); + } + if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF) + { + ucs4Char = 0xFFFD; + } + } + + return ucs4Char; +} + +static bool +dec_charbuf(const char *src, size_t srclen, char16_t *dst, size_t *dstlenp) +{ + uint32_t v; + size_t offset = 0; + size_t j; + size_t n; + size_t dstlen = *dstlenp; + size_t origDstlen = dstlen; + + if(!dst) dstlen = origDstlen = (size_t) -1; + + while(srclen) + { + v = (uint8_t) *src; + n = 1; + + if(v & 0x80) + { + while(v & (0x80 >> n)) + { + n++; + } + + if(n > srclen) goto buffer_too_small; + if(n == 1 || n > 6) goto bad_character; + + for(j = 1; j < n; j++) + { + if((src[j] & 0xC0) != 0x80) goto bad_character; + } + + v = dec_char((const uint8_t *) src, n); + if(v >= 0x10000) + { + v -= 0x10000; + + if(v > 0xFFFFF || dstlen < 2) + { + *dstlenp = (origDstlen - dstlen); + return false; + } + + if(dstlen < 2) goto buffer_too_small; + + if(dst) + { + *dst++ = (char16_t)((v >> 10) + 0xD800); + v = (char16_t)((v & 0x3FF) + 0xDC00); + } + dstlen--; + } + } + + if(!dstlen) goto buffer_too_small; + if(dst) *dst++ = (char16_t) v; + + dstlen--; + offset += n; + src += n; + srclen -= n; + } + + *dstlenp = (origDstlen - dstlen); + return true; + +bad_character: + *dstlenp = (origDstlen - dstlen); + return false; + +buffer_too_small: + *dstlenp = (origDstlen - dstlen); + return false; +} + +JSString* +dec_string(JSContext* cx, const char* bytes, size_t byteslen) +{ + JSString* str = NULL; + size_t charslen; + + if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) return NULL; + + JS::UniqueTwoByteChars chars(js_pod_malloc(charslen + 1)); + if(!chars) return NULL; + chars.get()[charslen] = 0; + + if(!dec_charbuf(bytes, byteslen, chars.get(), &charslen)) goto error; + + str = JS_NewUCString(cx, std::move(chars), charslen - 1); + if(!str) goto error; + + goto success; + +error: + if(chars != NULL) JS_free(cx, chars.get()); + str = NULL; + +success: + return str; +} diff --git a/src/couch/priv/couch_js/68/utf8.h b/src/couch/priv/couch_js/68/utf8.h new file mode 100644 index 000000000..c8b1f4d82 --- /dev/null +++ b/src/couch/priv/couch_js/68/utf8.h @@ -0,0 +1,19 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef COUCH_JS_UTF_8_H +#define COUCH_JS_UTF_8_H + +char* enc_string(JSContext* cx, JS::Value arg, size_t* buflen); +JSString* dec_string(JSContext* cx, const char* buf, size_t buflen); + +#endif diff --git a/src/couch/priv/couch_js/68/util.cpp b/src/couch/priv/couch_js/68/util.cpp new file mode 100644 index 000000000..f941e7dd2 --- /dev/null +++ b/src/couch/priv/couch_js/68/util.cpp @@ -0,0 +1,350 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include +#include + +#include +#include +#include +#include +#include + +#include "help.h" +#include "util.h" +#include "utf8.h" + +/* +std::string +js_to_string(JSContext* cx, JS::HandleValue val) +{ + JS::RootedString sval(cx); + sval = val.toString(); + + JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, sval)); + if(!chars) { + JS_ClearPendingException(cx); + fprintf(stderr, "Error converting value to string.\n"); + exit(3); + } + + return chars.get(); +} + +std::string +js_to_string(JSContext* cx, JSString *str) +{ + JS::UniqueChars chars(JS_EncodeString(cx, str)); + if(!chars) { + JS_ClearPendingException(cx); + fprintf(stderr, "Error converting to string.\n"); + exit(3); + } + + return chars.get(); +} +*/ + +JSString* +string_to_js(JSContext* cx, const std::string& s) +{ +/* + + JSString* ret = JS_NewStringCopyN(cx, s.c_str(), s.size()); + if(ret != nullptr) { + return ret; + } + + fprintf(stderr, "Unable to allocate string object.\n"); + exit(3); +*/ + return dec_string(cx, s.c_str(), s.size()); +} + +size_t +couch_readfile(const char* file, char** outbuf_p) +{ + FILE* fp; + char fbuf[16384]; + char *buf = NULL; + char* tmp; + size_t nread = 0; + size_t buflen = 0; + + if(strcmp(file, "-") == 0) { + fp = stdin; + } else { + fp = fopen(file, "r"); + if(fp == NULL) { + fprintf(stderr, "Failed to read file: %s\n", file); + exit(3); + } + } + + while((nread = fread(fbuf, 1, 16384, fp)) > 0) { + if(buf == NULL) { + buf = (char*) malloc(nread + 1); + if(buf == NULL) { + fprintf(stderr, "Out of memory.\n"); + exit(3); + } + memcpy(buf, fbuf, nread); + } else { + tmp = (char*) malloc(buflen + nread + 1); + if(tmp == NULL) { + fprintf(stderr, "Out of memory.\n"); + exit(3); + } + memcpy(tmp, buf, buflen); + memcpy(tmp+buflen, fbuf, nread); + free(buf); + buf = tmp; + } + buflen += nread; + buf[buflen] = '\0'; + } + *outbuf_p = buf; + return buflen ; +} + +couch_args* +couch_parse_args(int argc, const char* argv[]) +{ + couch_args* args; + int i = 1; + + args = (couch_args*) malloc(sizeof(couch_args)); + if(args == NULL) + return NULL; + + memset(args, '\0', sizeof(couch_args)); + args->stack_size = 64L * 1024L * 1024L; + + while(i < argc) { + if(strcmp("-h", argv[i]) == 0) { + DISPLAY_USAGE; + exit(0); + } else if(strcmp("-V", argv[i]) == 0) { + DISPLAY_VERSION; + exit(0); + } else if(strcmp("-H", argv[i]) == 0) { + args->use_http = 1; + } else if(strcmp("-T", argv[i]) == 0) { + args->use_test_funs = 1; + } else if(strcmp("-S", argv[i]) == 0) { + args->stack_size = atoi(argv[++i]); + if(args->stack_size <= 0) { + fprintf(stderr, "Invalid stack size.\n"); + exit(2); + } + } else if(strcmp("-u", argv[i]) == 0) { + args->uri_file = argv[++i]; + } else if(strcmp("--eval", argv[i]) == 0) { + args->eval = 1; + } else if(strcmp("--", argv[i]) == 0) { + i++; + break; + } else { + break; + } + i++; + } + + if(i >= argc) { + DISPLAY_USAGE; + exit(3); + } + args->scripts = argv + i; + + return args; +} + + +int +couch_fgets(char* buf, int size, FILE* fp) +{ + int n, i, c; + + if(size <= 0) return -1; + n = size - 1; + + for(i = 0; i < n && (c = getc(fp)) != EOF; i++) { + buf[i] = c; + if(c == '\n') { + i++; + break; + } + } + + buf[i] = '\0'; + return i; +} + + +JSString* +couch_readline(JSContext* cx, FILE* fp) +{ + JSString* str; + char* bytes = NULL; + char* tmp = NULL; + size_t used = 0; + size_t byteslen = 256; + size_t oldbyteslen = 256; + size_t readlen = 0; + bool sawNewline = false; + + bytes = static_cast(JS_malloc(cx, byteslen)); + if(bytes == NULL) return NULL; + + while((readlen = couch_fgets(bytes+used, byteslen-used, fp)) > 0) { + used += readlen; + + if(bytes[used-1] == '\n') { + bytes[used-1] = '\0'; + sawNewline = true; + break; + } + + // Double our buffer and read more. + oldbyteslen = byteslen; + byteslen *= 2; + tmp = static_cast(JS_realloc(cx, bytes, oldbyteslen, byteslen)); + if(!tmp) { + JS_free(cx, bytes); + return NULL; + } + + bytes = tmp; + } + + // Treat empty strings specially + if(used == 0) { + JS_free(cx, bytes); + return JS_NewStringCopyZ(cx, nullptr); + } + + // Shrink the buffer to the actual data size + tmp = static_cast(JS_realloc(cx, bytes, byteslen, used)); + if(!tmp) { + JS_free(cx, bytes); + return NULL; + } + bytes = tmp; + byteslen = used; + + str = string_to_js(cx, std::string(tmp, byteslen)); + JS_free(cx, bytes); + return str; +} + + +void +couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) +{ + FILE *stream = stdout; + + if (argc) { + if (argc > 1 && argv[1].isTrue()) { + stream = stderr; + } + JS::AutoSaveExceptionState exc_state(cx); + JS::RootedString sval(cx, JS::ToString(cx, argv[0])); + if (!sval) { + fprintf(stream, "couch_print: \n"); + fflush(stream); + return; + } + JS::UniqueChars bytes(JS_EncodeStringToUTF8(cx, sval)); + if (!bytes) + return; + + fprintf(stream, "%s", bytes.get()); + exc_state.restore(); + } + + fputc('\n', stream); + fflush(stream); +} + + +void +couch_error(JSContext* cx, JSErrorReport* report) +{ + JS::RootedValue v(cx), stack(cx), replace(cx); + char* bytes; + JSObject* regexp; + + if(!report || !JSREPORT_IS_WARNING(report->flags)) + { + fprintf(stderr, "%s\n", report->message().c_str()); + + // Print a stack trace, if available. + if (JSREPORT_IS_EXCEPTION(report->flags) && + JS_GetPendingException(cx, &v)) + { + // Clear the exception before an JS method calls or the result is + // infinite, recursive error report generation. + JS_ClearPendingException(cx); + + // Use JS regexp to indent the stack trace. + // If the regexp can't be created, don't JS_ReportErrorUTF8 since it is + // probably not productive to wind up here again. + JS::RootedObject vobj(cx, v.toObjectOrNull()); + + if(JS_GetProperty(cx, vobj, "stack", &stack) && + (regexp = JS::NewRegExpObject( + cx, "^(?=.)", 6, JS::RegExpFlag::Global | JS::RegExpFlag::Multiline))) + + { + // Set up the arguments to ``String.replace()`` + JS::RootedValueVector re_args(cx); + JS::RootedValue arg0(cx, JS::ObjectValue(*regexp)); + auto arg1 = JS::StringValue(string_to_js(cx, "\t")); + + if (re_args.append(arg0) && re_args.append(arg1)) { + // Perform the replacement + JS::RootedObject sobj(cx, stack.toObjectOrNull()); + if(JS_GetProperty(cx, sobj, "replace", &replace) && + JS_CallFunctionValue(cx, sobj, replace, re_args, &v)) + { + // Print the result + bytes = enc_string(cx, v, NULL); + fprintf(stderr, "Stacktrace:\n%s", bytes); + JS_free(cx, bytes); + } + } + } + } + } +} + + +void +couch_oom(JSContext* cx, void* data) +{ + fprintf(stderr, "out of memory\n"); + exit(1); +} + + +bool +couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs) +{ + JSFunctionSpec* f; + for(f = funcs; f->name; f++) { + if(!JS_DefineFunction(cx, obj, f->name.string(), f->call.op, f->nargs, f->flags)) { + fprintf(stderr, "Failed to create function: %s\n", f->name.string()); + return false; + } + } + return true; +} diff --git a/src/couch/priv/couch_js/68/util.h b/src/couch/priv/couch_js/68/util.h new file mode 100644 index 000000000..dc8a3a7b4 --- /dev/null +++ b/src/couch/priv/couch_js/68/util.h @@ -0,0 +1,60 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef COUCHJS_UTIL_H +#define COUCHJS_UTIL_H + +#include + +typedef struct { + int eval; + int use_http; + int use_test_funs; + int stack_size; + const char** scripts; + const char* uri_file; + JSString* uri; +} couch_args; + +/* +std::string js_to_string(JSContext* cx, JS::HandleValue val); +std::string js_to_string(JSContext* cx, JSString *str); +JSString* string_to_js(JSContext* cx, const std::string& s); +*/ + +couch_args* couch_parse_args(int argc, const char* argv[]); +int couch_fgets(char* buf, int size, FILE* fp); +JSString* couch_readline(JSContext* cx, FILE* fp); +size_t couch_readfile(const char* file, char** outbuf_p); +void couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv); +void couch_error(JSContext* cx, JSErrorReport* report); +void couch_oom(JSContext* cx, void* data); +bool couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs); + +/* + * GET_THIS: + * @cx: JSContext pointer passed into JSNative function + * @argc: Number of arguments passed into JSNative function + * @vp: Argument value array passed into JSNative function + * @args: Name for JS::CallArgs variable defined by this code snippet + * @to: Name for JS::RootedObject variable referring to function's this + * + * A convenience macro for getting the 'this' object a function was called with. + * Use in any JSNative function. + */ +#define GET_THIS(cx, argc, vp, args, to) \ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); \ + JS::RootedObject to(cx); \ + if (!args.computeThis(cx, &to)) \ + return false; + +#endif // Included util.h diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index 91e24d99e..89c652a58 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -22,7 +22,7 @@ CopyIfDifferent = fun(Path, Contents) -> false -> file:write_file(Path, Contents) end -end, +end. CouchJSName = case os:type() of @@ -30,21 +30,21 @@ CouchJSName = case os:type() of "couchjs.exe"; _ -> "couchjs" -end, -CouchJSPath = filename:join(["priv", CouchJSName]), +end. +CouchJSPath = filename:join(["priv", CouchJSName]). Version = case os:getenv("COUCHDB_VERSION") of false -> string:strip(os:cmd("git describe --always"), right, $\n); Version0 -> string:strip(Version0, right) -end, +end. GitSha = case os:getenv("COUCHDB_GIT_SHA") of false -> ""; % release builds won’t get a fallback GitSha0 -> string:strip(GitSha0, right) -end, +end. CouchConfig = case filelib:is_file(os:getenv("COUCHDB_CONFIG")) of true -> @@ -59,6 +59,8 @@ SMVsn = case lists:keyfind(spidermonkey_version, 1, CouchConfig) of "1.8.5"; {_, "60"} -> "60"; + {_, "68"} -> + "68"; undefined -> "1.8.5"; {_, Unsupported} -> @@ -78,24 +80,24 @@ ConfigH = [ {"PACKAGE_NAME", "\"Apache CouchDB\""}, {"PACKAGE_STRING", "\"Apache CouchDB " ++ Version ++ "\""}, {"PACKAGE_VERSION", "\"" ++ Version ++ "\""} -], +]. -CouchJSConfig = "priv/couch_js/" ++ SMVsn ++ "/config.h", -ConfigSrc = [["#define ", K, " ", V, $\n] || {K, V} <- ConfigH], -ConfigBin = iolist_to_binary(ConfigSrc), -ok = CopyIfDifferent(CouchJSConfig, ConfigBin), +CouchJSConfig = "priv/couch_js/" ++ SMVsn ++ "/config.h". +ConfigSrc = [["#define ", K, " ", V, $\n] || {K, V} <- ConfigH]. +ConfigBin = iolist_to_binary(ConfigSrc). +ok = CopyIfDifferent(CouchJSConfig, ConfigBin). MD5Config = case lists:keyfind(erlang_md5, 1, CouchConfig) of {erlang_md5, true} -> [{d, 'ERLANG_MD5', true}]; _ -> [] -end, +end. ProperConfig = case code:lib_dir(proper) of {error, bad_name} -> []; _ -> [{d, 'WITH_PROPER'}] -end, +end. {JS_CFLAGS, JS_LDFLAGS} = case os:type() of {win32, _} when SMVsn == "1.8.5" -> @@ -122,6 +124,11 @@ end, { "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14", "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm" + }; + {unix, _} when SMVsn == "68" -> + { + "-DXP_UNIX -I/usr/include/mozjs-68 -I/usr/local/include/mozjs-68 -std=c++14 -Wno-invalid-offsetof", + "-L/usr/local/lib -std=c++14 -lmozjs-68 -lm" } end. @@ -146,11 +153,12 @@ end. end; _ -> {"", ""} -end, +end. CouchJSSrc = case SMVsn of "1.8.5" -> ["priv/couch_js/1.8.5/*.c"]; - "60" -> ["priv/couch_js/60/*.cpp"] + "60" -> ["priv/couch_js/60/*.cpp"]; + "68" -> ["priv/couch_js/68/*.cpp"] end. CouchJSEnv = case SMVsn of @@ -159,26 +167,26 @@ CouchJSEnv = case SMVsn of {"CFLAGS", JS_CFLAGS ++ " " ++ CURL_CFLAGS}, {"LDFLAGS", JS_LDFLAGS ++ " " ++ CURL_LDFLAGS} ]; - "60" -> + _ -> [ {"CXXFLAGS", JS_CFLAGS ++ " " ++ CURL_CFLAGS}, {"LDFLAGS", JS_LDFLAGS ++ " " ++ CURL_LDFLAGS} ] -end, +end. -IcuPath = "priv/couch_icu_driver.so", -IcuSrc = ["priv/icu_driver/*.c"], +IcuPath = "priv/couch_icu_driver.so". +IcuSrc = ["priv/icu_driver/*.c"]. IcuEnv = [{"DRV_CFLAGS", "$DRV_CFLAGS -DPIC -O2 -fno-common"}, - {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}], + {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}]. IcuDarwinEnv = [{"CFLAGS", "-DXP_UNIX -I/usr/local/opt/icu4c/include"}, - {"LDFLAGS", "-L/usr/local/opt/icu4c/lib"}], + {"LDFLAGS", "-L/usr/local/opt/icu4c/lib"}]. IcuBsdEnv = [{"CFLAGS", "-DXP_UNIX -I/usr/local/include"}, - {"LDFLAGS", "-L/usr/local/lib"}], + {"LDFLAGS", "-L/usr/local/lib"}]. IcuWinEnv = [{"CFLAGS", "$DRV_CFLAGS /DXP_WIN"}, - {"LDFLAGS", "icuin.lib icudt.lib icuuc.lib"}], + {"LDFLAGS", "icuin.lib icudt.lib icuuc.lib"}]. -ComparePath = "priv/couch_ejson_compare.so", -CompareSrc = ["priv/couch_ejson_compare/*.c"], +ComparePath = "priv/couch_ejson_compare.so". +CompareSrc = ["priv/couch_ejson_compare/*.c"]. BaseSpecs = [ %% couchjs @@ -193,17 +201,17 @@ BaseSpecs = [ {"linux", ComparePath, CompareSrc, [{env, IcuEnv}]}, {"bsd", ComparePath, CompareSrc, [{env, IcuEnv ++ IcuBsdEnv}]}, {"win32", ComparePath, CompareSrc, [{env, IcuWinEnv}]} -], +]. SpawnSpec = [ {"priv/couchspawnkillable", ["priv/spawnkillable/*.c"]} -], +]. %% hack required until switch to enc/rebar3 PortEnvOverrides = [ {"win32", "EXE_LINK_CXX_TEMPLATE", "$LINKER $PORT_IN_FILES $LDFLAGS $EXE_LDFLAGS /OUT:$PORT_OUT_FILE"} -], +]. PortSpecs = case os:type() of {win32, _} -> @@ -213,10 +221,10 @@ PortSpecs = case os:type() of ok = CopyIfDifferent("priv/couchspawnkillable", CSK), os:cmd("chmod +x priv/couchspawnkillable"), BaseSpecs -end, +end. PlatformDefines = [ {platform_define, "win32", 'WINDOWS'} -], +]. AddConfig = [ {port_specs, PortSpecs}, {erl_opts, PlatformDefines ++ [ diff --git a/support/build_js.escript b/support/build_js.escript index 90ad3168f..2d9de6112 100644 --- a/support/build_js.escript +++ b/support/build_js.escript @@ -66,6 +66,12 @@ main([]) -> "share/server/rewrite_fun.js" ]; "60" -> + [ + "share/server/60/esprima.js", + "share/server/60/escodegen.js", + "share/server/60/rewrite_fun.js" + ]; + "68" -> [ "share/server/60/esprima.js", "share/server/60/escodegen.js", -- cgit v1.2.1 From e5239b79449f3da9daad9f297b081498f5670004 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Sat, 18 Apr 2020 03:22:28 +0000 Subject: Incorporate changes from #2786 --- .gitignore | 1 + src/couch/priv/couch_js/68/http.cpp | 214 ++++++++-------------- src/couch/priv/couch_js/68/main.cpp | 65 +++++-- src/couch/priv/couch_js/68/utf8.cpp | 309 -------------------------------- src/couch/priv/couch_js/68/utf8.h | 19 -- src/couch/priv/couch_js/68/util.cpp | 202 +++++++++++---------- src/couch/priv/couch_js/68/util.h | 23 +-- src/couch/rebar.config.script | 2 +- src/couch/test/eunit/couch_js_tests.erl | 1 - 9 files changed, 239 insertions(+), 597 deletions(-) delete mode 100644 src/couch/priv/couch_js/68/utf8.cpp delete mode 100644 src/couch/priv/couch_js/68/utf8.h diff --git a/.gitignore b/.gitignore index 8a4a6f08d..645817b76 100644 --- a/.gitignore +++ b/.gitignore @@ -117,6 +117,7 @@ src/mango/ebin/ src/mango/test/*.pyc src/mango/nosetests.xml src/mango/venv/ +src/jwtf/.rebar3/ test/javascript/junit.xml /_build/ diff --git a/src/couch/priv/couch_js/68/http.cpp b/src/couch/priv/couch_js/68/http.cpp index a0c73bdc6..20a609701 100644 --- a/src/couch/priv/couch_js/68/http.cpp +++ b/src/couch/priv/couch_js/68/http.cpp @@ -19,7 +19,6 @@ #include #include #include "config.h" -#include "utf8.h" #include "util.h" // Soft dependency on cURL bindings because they're @@ -101,7 +100,6 @@ http_check_enabled() #ifdef XP_WIN #define strcasecmp _strcmpi #define strncasecmp _strnicmp -#define snprintf _snprintf #endif @@ -110,7 +108,7 @@ typedef struct curl_slist CurlHeaders; typedef struct { int method; - char* url; + std::string url; CurlHeaders* req_headers; int16_t last_status; } HTTPData; @@ -128,22 +126,15 @@ const char* METHODS[] = {"GET", "HEAD", "POST", "PUT", "DELETE", "COPY", "OPTION #define OPTIONS 6 -static bool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t blen); - - -/*static JSString* -str_from_binary(JSContext* cx, char* data, size_t length); -*/ +static bool go(JSContext* cx, JSObject* obj, HTTPData* http, std::string& body); bool http_ctor(JSContext* cx, JSObject* req) { - HTTPData* http = NULL; + HTTPData* http = new HTTPData(); bool ret = false; - http = (HTTPData*) malloc(sizeof(HTTPData)); if(!http) { JS_ReportErrorUTF8(cx, "Failed to create CouchHTTP instance."); @@ -151,7 +142,6 @@ http_ctor(JSContext* cx, JSObject* req) } http->method = -1; - http->url = NULL; http->req_headers = NULL; http->last_status = -1; @@ -161,7 +151,7 @@ http_ctor(JSContext* cx, JSObject* req) goto success; error: - if(http) free(http); + if(http) delete http; success: return ret; @@ -173,9 +163,8 @@ http_dtor(JSFreeOp* fop, JSObject* obj) { HTTPData* http = (HTTPData*) JS_GetPrivate(obj); if(http) { - if(http->url) free(http->url); if(http->req_headers) curl_slist_free_all(http->req_headers); - free(http); + delete http; } } @@ -184,56 +173,50 @@ bool http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc) { HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* method = NULL; int methid; - bool ret = false; if(!http) { JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; + return false; } - if(mth.isUndefined()) { - JS_ReportErrorUTF8(cx, "You must specify a method."); - goto done; + if(!mth.isString()) { + JS_ReportErrorUTF8(cx, "Method must be a string."); + return false; } - method = enc_string(cx, mth, NULL); - if(!method) { + std::string method; + if(!js_to_string(cx, JS::RootedValue(cx, mth), method)) { JS_ReportErrorUTF8(cx, "Failed to encode method."); - goto done; + return false; } for(methid = 0; METHODS[methid] != NULL; methid++) { - if(strcasecmp(METHODS[methid], method) == 0) break; + if(strcasecmp(METHODS[methid], method.c_str()) == 0) break; } if(methid > OPTIONS) { JS_ReportErrorUTF8(cx, "Invalid method specified."); - goto done; + return false; } http->method = methid; - if(url.isUndefined()) { - JS_ReportErrorUTF8(cx, "You must specify a URL."); - goto done; - } - - if(http->url != NULL) { - free(http->url); - http->url = NULL; + if(!url.isString()) { + JS_ReportErrorUTF8(cx, "URL must be a string"); + return false; } - http->url = enc_string(cx, url, NULL); - if(http->url == NULL) { + std::string urlstr; + if(!js_to_string(cx, JS::RootedValue(cx, url), urlstr)) { JS_ReportErrorUTF8(cx, "Failed to encode URL."); - goto done; + return false; } + http->url = urlstr; if(snc.isBoolean() && snc.isTrue()) { JS_ReportErrorUTF8(cx, "Synchronous flag must be false."); - goto done; + return false; } if(http->req_headers) { @@ -244,11 +227,7 @@ http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value // Disable Expect: 100-continue http->req_headers = curl_slist_append(http->req_headers, "Expect:"); - ret = true; - -done: - if(method) free(method); - return ret; + return true; } @@ -256,88 +235,60 @@ bool http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val) { HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* keystr = NULL; - char* valstr = NULL; - char* hdrbuf = NULL; - size_t hdrlen = -1; - bool ret = false; if(!http) { JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; + return false; } - if(name.isUndefined()) + if(!name.isString()) { - JS_ReportErrorUTF8(cx, "You must speciy a header name."); - goto done; + JS_ReportErrorUTF8(cx, "Header names must be strings."); + return false; } - keystr = enc_string(cx, name, NULL); - if(!keystr) + std::string keystr; + if(!js_to_string(cx, JS::RootedValue(cx, name), keystr)) { JS_ReportErrorUTF8(cx, "Failed to encode header name."); - goto done; + return false; } - if(val.isUndefined()) + if(!val.isString()) { - JS_ReportErrorUTF8(cx, "You must specify a header value."); - goto done; + JS_ReportErrorUTF8(cx, "Header values must be strings."); + return false; } - valstr = enc_string(cx, val, NULL); - if(!valstr) - { + std::string valstr; + if(!js_to_string(cx, JS::RootedValue(cx, val), valstr)) { JS_ReportErrorUTF8(cx, "Failed to encode header value."); - goto done; - } - - hdrlen = strlen(keystr) + strlen(valstr) + 3; - hdrbuf = (char*) malloc(hdrlen * sizeof(char)); - if(!hdrbuf) { - JS_ReportErrorUTF8(cx, "Failed to allocate header buffer."); - goto done; + return false; } - snprintf(hdrbuf, hdrlen, "%s: %s", keystr, valstr); - http->req_headers = curl_slist_append(http->req_headers, hdrbuf); - - ret = true; + std::string header = keystr + ": " + valstr; + http->req_headers = curl_slist_append(http->req_headers, header.c_str()); -done: - if(keystr) free(keystr); - if(valstr) free(valstr); - if(hdrbuf) free(hdrbuf); - return ret; + return true; } bool http_send(JSContext* cx, JSObject* req, JS::Value body) { HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* bodystr = NULL; - size_t bodylen = 0; - bool ret = false; if(!http) { JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; + return false; } - if(!body.isUndefined()) { - bodystr = enc_string(cx, body, &bodylen); - if(!bodystr) { - JS_ReportErrorUTF8(cx, "Failed to encode body."); - goto done; - } + std::string bodystr; + if(!js_to_string(cx, JS::RootedValue(cx, body), bodystr)) { + JS_ReportErrorUTF8(cx, "Failed to encode body."); + return false; } - ret = go(cx, req, http, bodystr, bodylen); - -done: - if(bodystr) free(bodystr); - return ret; + return go(cx, req, http, bodystr); } int @@ -397,7 +348,7 @@ typedef struct { HTTPData* http; JSContext* cx; JSObject* resp_headers; - char* sendbuf; + const char* sendbuf; size_t sendlen; size_t sent; int sent_once; @@ -419,10 +370,9 @@ static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data); static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data); static bool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) +go(JSContext* cx, JSObject* obj, HTTPData* http, std::string& body) { CurlState state; - char* referer; JSString* jsbody; bool ret = false; JS::Value tmp; @@ -433,8 +383,8 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) state.cx = cx; state.http = http; - state.sendbuf = body; - state.sendlen = bodylen; + state.sendbuf = body.c_str();; + state.sendlen = body.size(); state.sent = 0; state.sent_once = 0; @@ -465,13 +415,13 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) tmp = JS_GetReservedSlot(obj, 0); - if(!(referer = enc_string(cx, tmp, NULL))) { + std::string referer; + if(!js_to_string(cx, JS::RootedValue(cx, tmp), referer)) { JS_ReportErrorUTF8(cx, "Failed to encode referer."); if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; + return ret; } - curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer); - free(referer); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer.c_str()); if(http->method < 0 || http->method > OPTIONS) { JS_ReportErrorUTF8(cx, "INTERNAL: Unknown method."); @@ -492,15 +442,15 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); } - if(body && bodylen) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, bodylen); + if(body.size() > 0) { + curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, body.size()); } else { curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, 0); } // curl_easy_setopt(HTTP_HANDLE, CURLOPT_VERBOSE, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url); + curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url.c_str()); curl_easy_setopt(HTTP_HANDLE, CURLOPT_HTTPHEADER, http->req_headers); curl_easy_setopt(HTTP_HANDLE, CURLOPT_READDATA, &state); curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKDATA, &state); @@ -534,7 +484,8 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) if(state.recvbuf) { state.recvbuf[state.read] = '\0'; - jsbody = dec_string(cx, state.recvbuf, state.read+1); + std::string bodystr(state.recvbuf, state.read); + jsbody = string_to_js(cx, bodystr); if(!jsbody) { // If we can't decode the body as UTF-8 we forcefully // convert it to a string by just forcing each byte @@ -574,7 +525,7 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) static size_t send_body(void *ptr, size_t size, size_t nmem, void *data) { - CurlState* state = (CurlState*) data; + CurlState* state = static_cast(data); size_t length = size * nmem; size_t towrite = state->sendlen - state->sent; @@ -600,19 +551,19 @@ send_body(void *ptr, size_t size, size_t nmem, void *data) static int seek_body(void* ptr, curl_off_t offset, int origin) { - CurlState* state = (CurlState*) ptr; + CurlState* state = static_cast(ptr); if(origin != SEEK_SET) return -1; - state->sent = (size_t) offset; - return (int) state->sent; + state->sent = static_cast(offset); + return static_cast(state->sent); } static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data) { - CurlState* state = (CurlState*) data; + CurlState* state = static_cast(data); char code[4]; - char* header = (char*) ptr; + char* header = static_cast(ptr); size_t length = size * nmem; JSString* hdr = NULL; uint32_t hdrlen; @@ -640,7 +591,8 @@ recv_header(void *ptr, size_t size, size_t nmem, void *data) } // Append the new header to our array. - hdr = dec_string(state->cx, header, length); + std::string hdrstr(header, length); + hdr = string_to_js(state->cx, hdrstr); if(!hdr) { return CURLE_WRITE_ERROR; } @@ -661,14 +613,17 @@ recv_header(void *ptr, size_t size, size_t nmem, void *data) static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data) { - CurlState* state = (CurlState*) data; + CurlState* state = static_cast(data); size_t length = size * nmem; char* tmp = NULL; if(!state->recvbuf) { state->recvlen = 4096; state->read = 0; - state->recvbuf = static_cast(JS_malloc(state->cx, state->recvlen)); + state->recvbuf = static_cast(JS_malloc( + state->cx, + state->recvlen + )); } if(!state->recvbuf) { @@ -678,7 +633,12 @@ recv_body(void *ptr, size_t size, size_t nmem, void *data) // +1 so we can add '\0' back up in the go function. size_t oldlen = state->recvlen; while(length+1 > state->recvlen - state->read) state->recvlen *= 2; - tmp = static_cast(JS_realloc(state->cx, state->recvbuf, oldlen, state->recvlen)); + tmp = static_cast(JS_realloc( + state->cx, + state->recvbuf, + oldlen, + state->recvlen + )); if(!tmp) return CURLE_WRITE_ERROR; state->recvbuf = tmp; @@ -687,24 +647,4 @@ recv_body(void *ptr, size_t size, size_t nmem, void *data) return length; } -/*JSString* -str_from_binary(JSContext* cx, char* data, size_t length) -{ - char16_t* conv = static_cast(JS_malloc(cx, length * sizeof(char16_t))); - JSString* ret = NULL; - size_t i; - - if(!conv) return NULL; - - for(i = 0; i < length; i++) { - conv[i] = (char16_t) data[i]; - } - - ret = JS_NewUCString(cx, conv, length); - if(!ret) JS_free(cx, conv); - - return ret; -} -*/ - #endif /* HAVE_CURL */ diff --git a/src/couch/priv/couch_js/68/main.cpp b/src/couch/priv/couch_js/68/main.cpp index 3860a01a8..2c95f6129 100644 --- a/src/couch/priv/couch_js/68/main.cpp +++ b/src/couch/priv/couch_js/68/main.cpp @@ -31,7 +31,6 @@ #include "config.h" #include "http.h" -#include "utf8.h" #include "util.h" static bool enableSharedMemory = true; @@ -102,7 +101,10 @@ req_ctor(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_open(JSContext* cx, unsigned int argc, JS::Value* vp) { - GET_THIS(cx, argc, vp, args, obj) + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject obj(cx); + if (!args.computeThis(cx, &obj)) + return false; bool ret = false; if(argc == 2) { @@ -121,7 +123,10 @@ req_open(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_set_hdr(JSContext* cx, unsigned int argc, JS::Value* vp) { - GET_THIS(cx, argc, vp, args, obj) + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject obj(cx); + if (!args.computeThis(cx, &obj)) + return false; bool ret = false; if(argc == 2) { @@ -138,7 +143,10 @@ req_set_hdr(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_send(JSContext* cx, unsigned int argc, JS::Value* vp) { - GET_THIS(cx, argc, vp, args, obj) + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject obj(cx); + if (!args.computeThis(cx, &obj)) + return false; bool ret = false; if(argc == 1) { @@ -154,7 +162,11 @@ req_send(JSContext* cx, unsigned int argc, JS::Value* vp) static bool req_status(JSContext* cx, unsigned int argc, JS::Value* vp) { - GET_THIS(cx, argc, vp, args, obj) + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject obj(cx); + if (!args.computeThis(cx, &obj)) + return false; + int status = http_status(cx, obj); if(status < 0) @@ -167,8 +179,12 @@ req_status(JSContext* cx, unsigned int argc, JS::Value* vp) static bool base_url(JSContext *cx, unsigned int argc, JS::Value* vp) { - GET_THIS(cx, argc, vp, args, obj) - couch_args *cargs = (couch_args*)JS_GetContextPrivate(cx); + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject obj(cx); + if (!args.computeThis(cx, &obj)) + return false; + + couch_args *cargs = static_cast(JS_GetContextPrivate(cx)); JS::Value uri_val; bool rc = http_uri(cx, obj, cargs, &uri_val); args.rval().set(uri_val); @@ -278,7 +294,19 @@ static bool print(JSContext* cx, unsigned int argc, JS::Value* vp) { JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - couch_print(cx, argc, args); + + bool use_stderr = false; + if(argc > 1 && args[1].isTrue()) { + use_stderr = true; + } + + if(!args[0].isString()) { + JS_ReportErrorUTF8(cx, "Unable to print non-string value."); + return false; + } + + couch_print(cx, args[0], use_stderr); + args.rval().setUndefined(); return true; } @@ -381,7 +409,7 @@ static JSFunctionSpec global_functions[] = { static bool csp_allows(JSContext* cx, JS::HandleValue code) { - couch_args *args = (couch_args*)JS_GetContextPrivate(cx); + couch_args* args = static_cast(JS_GetContextPrivate(cx)); if(args->eval) { return true; } else { @@ -476,14 +504,27 @@ main(int argc, const char* argv[]) script = JS::CompileUtf8File(cx, options, fp); fclose(fp); if (!script) { - fprintf(stderr, "Failed to compile file: %s\n", filename); + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to compile file: %s\n", filename); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } return 1; } JS::RootedValue result(cx); if(JS_ExecuteScript(cx, script, &result) != true) { - fprintf(stderr, "Failed to execute script.\n"); - return 1; + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to execute script.\n"); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } } // Give the GC a chance to run. diff --git a/src/couch/priv/couch_js/68/utf8.cpp b/src/couch/priv/couch_js/68/utf8.cpp deleted file mode 100644 index c28e026f7..000000000 --- a/src/couch/priv/couch_js/68/utf8.cpp +++ /dev/null @@ -1,309 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#include -#include -#include -#include -#include -#include "config.h" -#include "util.h" - -static int -enc_char(uint8_t *utf8Buffer, uint32_t ucs4Char) -{ - int utf8Length = 1; - - if (ucs4Char < 0x80) - { - *utf8Buffer = (uint8_t)ucs4Char; - } - else - { - int i; - uint32_t a = ucs4Char >> 11; - utf8Length = 2; - while(a) - { - a >>= 5; - utf8Length++; - } - i = utf8Length; - while(--i) - { - utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80); - ucs4Char >>= 6; - } - *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char); - } - - return utf8Length; -} - -static bool -enc_charbuf(const char16_t* src, size_t srclen, char* dst, size_t* dstlenp) -{ - size_t i; - size_t utf8Len; - size_t dstlen = *dstlenp; - size_t origDstlen = dstlen; - char16_t c; - char16_t c2; - uint32_t v; - uint8_t utf8buf[6]; - - if(!dst) - { - dstlen = origDstlen = (size_t) -1; - } - - while(srclen) - { - c = *src++; - srclen--; - - if(c <= 0xD7FF || c >= 0xE000) - { - v = (uint32_t) c; - } - else if(c >= 0xD800 && c <= 0xDBFF) - { - if(srclen < 1) goto buffer_too_small; - c2 = *src++; - srclen--; - if(c2 >= 0xDC00 && c2 <= 0xDFFF) - { - v = (uint32_t) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000); - } - else - { - // Invalid second half of surrogate pair - v = (uint32_t) 0xFFFD; - // Undo our character advancement - src--; - srclen++; - } - } - else - { - // Invalid first half surrogate pair - v = (uint32_t) 0xFFFD; - } - - if(v < 0x0080) - { - // no encoding necessary - performance hack - if(!dstlen) goto buffer_too_small; - if(dst) *dst++ = (char) v; - utf8Len = 1; - } - else - { - utf8Len = enc_char(utf8buf, v); - if(utf8Len > dstlen) goto buffer_too_small; - if(dst) - { - for (i = 0; i < utf8Len; i++) - { - *dst++ = (char) utf8buf[i]; - } - } - } - dstlen -= utf8Len; - } - - *dstlenp = (origDstlen - dstlen); - return true; - -buffer_too_small: - *dstlenp = (origDstlen - dstlen); - return false; -} - -char* -enc_string(JSContext* cx, JS::Value arg, size_t* buflen) -{ - JSString* str = NULL; - const char16_t* src = NULL; - char* bytes = NULL; - size_t srclen = 0; - size_t byteslen = 0; - JS::AutoStableStringChars rawChars(cx); - - str = arg.toString(); - if(!str) goto error; - - if (!rawChars.initTwoByte(cx, str)) - return NULL; - - src = rawChars.twoByteRange().begin().get(); - srclen = JS_GetStringLength(str); - - if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error; - - bytes = js_pod_malloc(byteslen + 1); - bytes[byteslen] = 0; - - if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error; - - if(buflen) *buflen = byteslen; - goto success; - -error: - if(bytes != NULL) JS_free(cx, bytes); - bytes = NULL; - -success: -/* - JS::RootedString str(cx, arg.toString()); - JS::UniqueChars chars = JS_EncodeStringToUTF8(cx, str); - - if(buflen) *buflen = strlen(chars.get()); - - return JS_NewUCStringCopyN(cs, chars.get(), buflen); -*/ - return bytes; -} - -static uint32_t -dec_char(const uint8_t *utf8Buffer, int utf8Length) -{ - uint32_t ucs4Char; - uint32_t minucs4Char; - - // from Unicode 3.1, non-shortest form is illegal - static const uint32_t minucs4Table[] = { - 0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000 - }; - - if (utf8Length == 1) - { - ucs4Char = *utf8Buffer; - } - else - { - ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1); - minucs4Char = minucs4Table[utf8Length-2]; - while(--utf8Length) - { - ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F); - } - if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF) - { - ucs4Char = 0xFFFD; - } - } - - return ucs4Char; -} - -static bool -dec_charbuf(const char *src, size_t srclen, char16_t *dst, size_t *dstlenp) -{ - uint32_t v; - size_t offset = 0; - size_t j; - size_t n; - size_t dstlen = *dstlenp; - size_t origDstlen = dstlen; - - if(!dst) dstlen = origDstlen = (size_t) -1; - - while(srclen) - { - v = (uint8_t) *src; - n = 1; - - if(v & 0x80) - { - while(v & (0x80 >> n)) - { - n++; - } - - if(n > srclen) goto buffer_too_small; - if(n == 1 || n > 6) goto bad_character; - - for(j = 1; j < n; j++) - { - if((src[j] & 0xC0) != 0x80) goto bad_character; - } - - v = dec_char((const uint8_t *) src, n); - if(v >= 0x10000) - { - v -= 0x10000; - - if(v > 0xFFFFF || dstlen < 2) - { - *dstlenp = (origDstlen - dstlen); - return false; - } - - if(dstlen < 2) goto buffer_too_small; - - if(dst) - { - *dst++ = (char16_t)((v >> 10) + 0xD800); - v = (char16_t)((v & 0x3FF) + 0xDC00); - } - dstlen--; - } - } - - if(!dstlen) goto buffer_too_small; - if(dst) *dst++ = (char16_t) v; - - dstlen--; - offset += n; - src += n; - srclen -= n; - } - - *dstlenp = (origDstlen - dstlen); - return true; - -bad_character: - *dstlenp = (origDstlen - dstlen); - return false; - -buffer_too_small: - *dstlenp = (origDstlen - dstlen); - return false; -} - -JSString* -dec_string(JSContext* cx, const char* bytes, size_t byteslen) -{ - JSString* str = NULL; - size_t charslen; - - if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) return NULL; - - JS::UniqueTwoByteChars chars(js_pod_malloc(charslen + 1)); - if(!chars) return NULL; - chars.get()[charslen] = 0; - - if(!dec_charbuf(bytes, byteslen, chars.get(), &charslen)) goto error; - - str = JS_NewUCString(cx, std::move(chars), charslen - 1); - if(!str) goto error; - - goto success; - -error: - if(chars != NULL) JS_free(cx, chars.get()); - str = NULL; - -success: - return str; -} diff --git a/src/couch/priv/couch_js/68/utf8.h b/src/couch/priv/couch_js/68/utf8.h deleted file mode 100644 index c8b1f4d82..000000000 --- a/src/couch/priv/couch_js/68/utf8.h +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#ifndef COUCH_JS_UTF_8_H -#define COUCH_JS_UTF_8_H - -char* enc_string(JSContext* cx, JS::Value arg, size_t* buflen); -JSString* dec_string(JSContext* cx, const char* buf, size_t buflen); - -#endif diff --git a/src/couch/priv/couch_js/68/util.cpp b/src/couch/priv/couch_js/68/util.cpp index f941e7dd2..7717f1185 100644 --- a/src/couch/priv/couch_js/68/util.cpp +++ b/src/couch/priv/couch_js/68/util.cpp @@ -13,7 +13,11 @@ #include #include +#include + #include +#include +#include #include #include #include @@ -21,53 +25,57 @@ #include "help.h" #include "util.h" -#include "utf8.h" -/* std::string js_to_string(JSContext* cx, JS::HandleValue val) { + JS::AutoSaveExceptionState exc_state(cx); JS::RootedString sval(cx); sval = val.toString(); JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, sval)); if(!chars) { JS_ClearPendingException(cx); - fprintf(stderr, "Error converting value to string.\n"); - exit(3); + return std::string(); } return chars.get(); } -std::string -js_to_string(JSContext* cx, JSString *str) +bool +js_to_string(JSContext* cx, JS::HandleValue val, std::string& str) { - JS::UniqueChars chars(JS_EncodeString(cx, str)); - if(!chars) { - JS_ClearPendingException(cx); - fprintf(stderr, "Error converting to string.\n"); - exit(3); + if(!val.isString()) { + return false; } - return chars.get(); + if(JS_GetStringLength(val.toString()) == 0) { + str = ""; + return true; + } + + std::string conv = js_to_string(cx, val); + if(!conv.size()) { + return false; + } + + str = conv; + return true; } -*/ JSString* -string_to_js(JSContext* cx, const std::string& s) +string_to_js(JSContext* cx, const std::string& raw) { -/* + JS::UTF8Chars utf8(raw.c_str(), raw.size()); + JS::UniqueTwoByteChars utf16; + size_t len; - JSString* ret = JS_NewStringCopyN(cx, s.c_str(), s.size()); - if(ret != nullptr) { - return ret; + utf16.reset(JS::UTF8CharsToNewTwoByteCharsZ(cx, utf8, &len, js::MallocArena).get()); + if(!utf16) { + return nullptr; } - fprintf(stderr, "Unable to allocate string object.\n"); - exit(3); -*/ - return dec_string(cx, s.c_str(), s.size()); + return JS_NewUCString(cx, std::move(utf16), len); } size_t @@ -92,21 +100,21 @@ couch_readfile(const char* file, char** outbuf_p) while((nread = fread(fbuf, 1, 16384, fp)) > 0) { if(buf == NULL) { - buf = (char*) malloc(nread + 1); + buf = new char[nread + 1]; if(buf == NULL) { fprintf(stderr, "Out of memory.\n"); exit(3); } memcpy(buf, fbuf, nread); } else { - tmp = (char*) malloc(buflen + nread + 1); + tmp = new char[buflen + nread + 1]; if(tmp == NULL) { fprintf(stderr, "Out of memory.\n"); exit(3); } memcpy(tmp, buf, buflen); memcpy(tmp+buflen, fbuf, nread); - free(buf); + delete buf; buf = tmp; } buflen += nread; @@ -122,12 +130,17 @@ couch_parse_args(int argc, const char* argv[]) couch_args* args; int i = 1; - args = (couch_args*) malloc(sizeof(couch_args)); + args = new couch_args(); if(args == NULL) return NULL; - memset(args, '\0', sizeof(couch_args)); + args->eval = 0; + args->use_http = 0; + args->use_test_funs = 0; args->stack_size = 64L * 1024L * 1024L; + args->scripts = nullptr; + args->uri_file = nullptr; + args->uri = nullptr; while(i < argc) { if(strcmp("-h", argv[i]) == 0) { @@ -200,9 +213,8 @@ couch_readline(JSContext* cx, FILE* fp) size_t byteslen = 256; size_t oldbyteslen = 256; size_t readlen = 0; - bool sawNewline = false; - bytes = static_cast(JS_malloc(cx, byteslen)); + bytes = static_cast(JS_malloc(cx, byteslen)); if(bytes == NULL) return NULL; while((readlen = couch_fgets(bytes+used, byteslen-used, fp)) > 0) { @@ -210,14 +222,13 @@ couch_readline(JSContext* cx, FILE* fp) if(bytes[used-1] == '\n') { bytes[used-1] = '\0'; - sawNewline = true; break; } // Double our buffer and read more. oldbyteslen = byteslen; byteslen *= 2; - tmp = static_cast(JS_realloc(cx, bytes, oldbyteslen, byteslen)); + tmp = static_cast(JS_realloc(cx, bytes, oldbyteslen, byteslen)); if(!tmp) { JS_free(cx, bytes); return NULL; @@ -233,7 +244,7 @@ couch_readline(JSContext* cx, FILE* fp) } // Shrink the buffer to the actual data size - tmp = static_cast(JS_realloc(cx, bytes, byteslen, used)); + tmp = static_cast(JS_realloc(cx, bytes, byteslen, used)); if(!tmp) { JS_free(cx, bytes); return NULL; @@ -241,37 +252,22 @@ couch_readline(JSContext* cx, FILE* fp) bytes = tmp; byteslen = used; - str = string_to_js(cx, std::string(tmp, byteslen)); + str = string_to_js(cx, std::string(tmp)); JS_free(cx, bytes); return str; } void -couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) +couch_print(JSContext* cx, JS::HandleValue obj, bool use_stderr) { FILE *stream = stdout; - if (argc) { - if (argc > 1 && argv[1].isTrue()) { - stream = stderr; - } - JS::AutoSaveExceptionState exc_state(cx); - JS::RootedString sval(cx, JS::ToString(cx, argv[0])); - if (!sval) { - fprintf(stream, "couch_print: \n"); - fflush(stream); - return; - } - JS::UniqueChars bytes(JS_EncodeStringToUTF8(cx, sval)); - if (!bytes) - return; - - fprintf(stream, "%s", bytes.get()); - exc_state.restore(); + if (use_stderr) { + stream = stderr; } - - fputc('\n', stream); + std::string val = js_to_string(cx, obj); + fprintf(stream, "%s\n", val.c_str()); fflush(stream); } @@ -279,52 +275,64 @@ couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) void couch_error(JSContext* cx, JSErrorReport* report) { - JS::RootedValue v(cx), stack(cx), replace(cx); - char* bytes; - JSObject* regexp; - - if(!report || !JSREPORT_IS_WARNING(report->flags)) - { - fprintf(stderr, "%s\n", report->message().c_str()); - - // Print a stack trace, if available. - if (JSREPORT_IS_EXCEPTION(report->flags) && - JS_GetPendingException(cx, &v)) - { - // Clear the exception before an JS method calls or the result is - // infinite, recursive error report generation. - JS_ClearPendingException(cx); - - // Use JS regexp to indent the stack trace. - // If the regexp can't be created, don't JS_ReportErrorUTF8 since it is - // probably not productive to wind up here again. - JS::RootedObject vobj(cx, v.toObjectOrNull()); - - if(JS_GetProperty(cx, vobj, "stack", &stack) && - (regexp = JS::NewRegExpObject( - cx, "^(?=.)", 6, JS::RegExpFlag::Global | JS::RegExpFlag::Multiline))) - - { - // Set up the arguments to ``String.replace()`` - JS::RootedValueVector re_args(cx); - JS::RootedValue arg0(cx, JS::ObjectValue(*regexp)); - auto arg1 = JS::StringValue(string_to_js(cx, "\t")); - - if (re_args.append(arg0) && re_args.append(arg1)) { - // Perform the replacement - JS::RootedObject sobj(cx, stack.toObjectOrNull()); - if(JS_GetProperty(cx, sobj, "replace", &replace) && - JS_CallFunctionValue(cx, sobj, replace, re_args, &v)) - { - // Print the result - bytes = enc_string(cx, v, NULL); - fprintf(stderr, "Stacktrace:\n%s", bytes); - JS_free(cx, bytes); - } - } - } + if(!report) { + return; + } + + if(JSREPORT_IS_WARNING(report->flags)) { + return; + } + + std::ostringstream msg; + msg << "error: " << report->message().c_str(); + + mozilla::Maybe ar; + JS::RootedValue exc(cx); + JS::RootedObject exc_obj(cx); + JS::RootedObject stack_obj(cx); + JS::RootedString stack_str(cx); + JS::RootedValue stack_val(cx); + JSPrincipals* principals = GetRealmPrincipals(js::GetContextRealm(cx)); + + if(!JS_GetPendingException(cx, &exc)) { + goto done; + } + + // Clear the exception before an JS method calls or the result is + // infinite, recursive error report generation. + JS_ClearPendingException(cx); + + exc_obj.set(exc.toObjectOrNull()); + stack_obj.set(JS::ExceptionStackOrNull(exc_obj)); + + if(!stack_obj) { + // Compilation errors don't have a stack + + msg << " at "; + + if(report->filename) { + msg << report->filename; + } else { + msg << ""; } + + if(report->lineno) { + msg << ':' << report->lineno << ':' << report->column; + } + + goto done; } + + if(!JS::BuildStackString(cx, principals, stack_obj, &stack_str, 2)) { + goto done; + } + + stack_val.set(JS::StringValue(stack_str)); + msg << std::endl << std::endl << js_to_string(cx, stack_val).c_str(); + +done: + msg << std::endl; + fprintf(stderr, "%s", msg.str().c_str()); } diff --git a/src/couch/priv/couch_js/68/util.h b/src/couch/priv/couch_js/68/util.h index dc8a3a7b4..bd7843eb9 100644 --- a/src/couch/priv/couch_js/68/util.h +++ b/src/couch/priv/couch_js/68/util.h @@ -25,36 +25,17 @@ typedef struct { JSString* uri; } couch_args; -/* std::string js_to_string(JSContext* cx, JS::HandleValue val); -std::string js_to_string(JSContext* cx, JSString *str); +bool js_to_string(JSContext* cx, JS::HandleValue val, std::string& str); JSString* string_to_js(JSContext* cx, const std::string& s); -*/ couch_args* couch_parse_args(int argc, const char* argv[]); int couch_fgets(char* buf, int size, FILE* fp); JSString* couch_readline(JSContext* cx, FILE* fp); size_t couch_readfile(const char* file, char** outbuf_p); -void couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv); +void couch_print(JSContext* cx, JS::HandleValue str, bool use_stderr); void couch_error(JSContext* cx, JSErrorReport* report); void couch_oom(JSContext* cx, void* data); bool couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs); -/* - * GET_THIS: - * @cx: JSContext pointer passed into JSNative function - * @argc: Number of arguments passed into JSNative function - * @vp: Argument value array passed into JSNative function - * @args: Name for JS::CallArgs variable defined by this code snippet - * @to: Name for JS::RootedObject variable referring to function's this - * - * A convenience macro for getting the 'this' object a function was called with. - * Use in any JSNative function. - */ -#define GET_THIS(cx, argc, vp, args, to) \ - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); \ - JS::RootedObject to(cx); \ - if (!args.computeThis(cx, &to)) \ - return false; - #endif // Included util.h diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index 89c652a58..ad897e8e3 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -41,7 +41,7 @@ end. GitSha = case os:getenv("COUCHDB_GIT_SHA") of false -> - ""; % release builds won’t get a fallback + ""; % release builds won't get a fallback GitSha0 -> string:strip(GitSha0, right) end. diff --git a/src/couch/test/eunit/couch_js_tests.erl b/src/couch/test/eunit/couch_js_tests.erl index c2c62463b..693cd9772 100644 --- a/src/couch/test/eunit/couch_js_tests.erl +++ b/src/couch/test/eunit/couch_js_tests.erl @@ -137,7 +137,6 @@ should_allow_js_string_mutations() -> true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src3]), Doc = {[{<<"value">>, MomWashedTheFrame}]}, Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), - io:format(standard_error, "~w~n~w~n", [MomWashedTheFrame, Result]), Expect = [ [[<<"length">>, 14]], [[<<"substring">>, Washed]], -- cgit v1.2.1 From bb43a697fedaac44c9ff4a56e9461d99341cd297 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 21 Apr 2020 15:48:16 -0500 Subject: Replace broken u-escape sequences --- src/couch/src/couch_query_servers.erl | 106 +++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index c6d255f17..9842177d3 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -519,7 +519,7 @@ with_ddoc_proc(#doc{id=DDocId,revs={Start, [DiskRev|_]}}=DDoc, Fun) -> proc_prompt(Proc, Args) -> case proc_prompt_raw(Proc, Args) of {json, Json} -> - ?JSON_DECODE(Json); + raw_to_ejson({json, Json}); EJson -> EJson end. @@ -528,10 +528,76 @@ proc_prompt_raw(#proc{prompt_fun = {Mod, Func}} = Proc, Args) -> apply(Mod, Func, [Proc#proc.pid, Args]). raw_to_ejson({json, Json}) -> - ?JSON_DECODE(Json); + try + ?JSON_DECODE(Json) + catch throw:{invalid_json, {_, invalid_string}} -> + Forced = try + force_utf8(Json) + catch _:_ -> + Json + end, + ?JSON_DECODE(Forced) + end; raw_to_ejson(EJson) -> EJson. +force_utf8(Bin) -> + case binary:match(Bin, <<"\\u">>) of + {Start, 2} -> + <> = Bin, + {Insert, Rest3} = case check_uescape(Rest1) of + {ok, Skip} -> + <> = Rest1, + {Skipped, Rest2}; + {error, Skip} -> + <<_:Skip/binary, Rest2/binary>> = Rest1, + {<<16#EF, 16#BF, 16#BD>>, Rest2} + end, + RestForced = force_utf8(Rest3), + <>; + nomatch -> + Bin + end. + +check_uescape(Data) -> + case extract_uescape(Data) of + {Hi, Rest} when Hi >= 16#D800, Hi < 16#DC00 -> + case extract_uescape(Rest) of + {Lo, _} when Lo >= 16#DC00, Lo =< 16#DFFF -> + % A low surrogate pair + UTF16 = << + Hi:16/big-unsigned-integer, + Lo:16/big-unsigned-integer + >>, + try + [_] = xmerl_ucs:from_utf16be(UTF16), + {ok, 12} + catch _:_ -> + {error, 6} + end; + {_, _} -> + % Found a uescape that's not a low half + {error, 6}; + false -> + % No hex escape found + {error, 6} + end; + {Hi, _} when Hi >= 16#DC00, Hi =< 16#DFFF -> + % Found a low surrogate half without a high half + {error, 6}; + {_, _} -> + % Found a uescape we don't care about + {ok, 6}; + false -> + % Incomplete uescape which we don't care about + {ok, 2} + end. + +extract_uescape(<<"\\u", Code:4/binary, Rest/binary>>) -> + {binary_to_integer(Code, 16), Rest}; +extract_uescape(_) -> + false. + proc_stop(Proc) -> {Mod, Func} = Proc#proc.stop_fun, apply(Mod, Func, [Proc#proc.pid]). @@ -680,4 +746,40 @@ test_reduce(Reducer, KVs) -> {ok, Finalized} = finalize(Reducer, Reduced), Finalized. +force_utf8_test() -> + % "\uDCA5\uD83D" + Ok = [ + <<"foo">>, + <<"\\u00A0">>, + <<"\\u0032">>, + <<"\\uD83D\\uDCA5">>, + <<"foo\\uD83D\\uDCA5bar">>, + % Truncated but we doesn't break replacements + <<"\\u0FA">> + ], + lists:foreach(fun(Case) -> + ?assertEqual(Case, force_utf8(Case)) + end, Ok), + + NotOk = [ + <<"\\uDCA5">>, + <<"\\uD83D">>, + <<"fo\\uDCA5bar">>, + <<"foo\\uD83Dbar">>, + <<"\\uDCA5\\uD83D">>, + <<"\\uD83Df\\uDCA5">>, + <<"\\uDCA5\\u00A0">>, + <<"\\uD83D\\u00A0">> + ], + ToJSON = fun(Bin) -> <<34, Bin/binary, 34>> end, + lists:foreach(fun(Case) -> + try + ?assertNotEqual(Case, force_utf8(Case)), + ?assertThrow(_, ?JSON_DECODE(ToJSON(Case))), + ?assertMatch(<<_/binary>>, ?JSON_DECODE(ToJSON(force_utf8(Case)))) + catch T:R:S -> + io:format(standard_error, "~p~n~p~n~p~n", [T, R, S]) + end + end, NotOk). + -endif. -- cgit v1.2.1 From e6e6e6befa3f697721238889491b226dda940346 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Mon, 27 Apr 2020 15:15:42 +0000 Subject: Fix new JS test case --- src/couch/src/couch_query_servers.erl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index 9842177d3..447daea61 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -777,8 +777,9 @@ force_utf8_test() -> ?assertNotEqual(Case, force_utf8(Case)), ?assertThrow(_, ?JSON_DECODE(ToJSON(Case))), ?assertMatch(<<_/binary>>, ?JSON_DECODE(ToJSON(force_utf8(Case)))) - catch T:R:S -> - io:format(standard_error, "~p~n~p~n~p~n", [T, R, S]) + catch + T:R -> + io:format(standard_error, "~p~n~p~n", [T, R]) end end, NotOk). -- cgit v1.2.1 From 55deba0509038b9d892e72ae9ed029aa8905afe5 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Mon, 27 Apr 2020 12:15:51 -0400 Subject: python black cleanup --- src/mango/test/21-empty-selector-tests.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/mango/test/21-empty-selector-tests.py b/src/mango/test/21-empty-selector-tests.py index 31ad8e645..8fd76fcd5 100644 --- a/src/mango/test/21-empty-selector-tests.py +++ b/src/mango/test/21-empty-selector-tests.py @@ -42,17 +42,13 @@ def make_empty_selector_suite(klass): assert len(docs) == 0 def test_empty_array_and_with_age(self): - resp = self.db.find( - {"age": 22, "$and": []}, explain=True - ) + resp = self.db.find({"age": 22, "$and": []}, explain=True) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) docs = self.db.find({"age": 22, "$and": []}) assert len(docs) == 1 def test_empty_array_all_age(self): - resp = self.db.find( - {"age": 22, "company": {"$all": []}}, explain=True - ) + resp = self.db.find({"age": 22, "company": {"$all": []}}, explain=True) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) docs = self.db.find({"age": 22, "company": {"$all": []}}) assert len(docs) == 0 @@ -62,7 +58,7 @@ def make_empty_selector_suite(klass): {"age": 22, "$and": [{"company": {"$all": []}}]}, explain=True ) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) - docs = self.db.find( {"age": 22, "$and": [{"company": {"$all": []}}]}) + docs = self.db.find({"age": 22, "$and": [{"company": {"$all": []}}]}) assert len(docs) == 0 def test_empty_arrays_complex(self): -- cgit v1.2.1 From b7ca42d6ba9cddc8878a09498a2167d36ddb71b8 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Mon, 27 Apr 2020 13:28:21 -0400 Subject: Ensure python black runs on all .py files (#2827) --- Makefile | 4 ++-- Makefile.win | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index fff1df528..53cea3bc8 100644 --- a/Makefile +++ b/Makefile @@ -213,7 +213,7 @@ python-black: .venv/bin/black @python3 -c "import sys; exit(1 if sys.version_info >= (3,6) else 0)" || \ LC_ALL=C.UTF-8 LANG=C.UTF-8 .venv/bin/black --check \ --exclude="build/|buck-out/|dist/|_build/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/|src/rebar/pr2relnotes.py|src/fauxton" \ - . dev/run test/javascript/run src/mango src/docs + build-aux/*.py dev/run test/javascript/run src/mango/test/*.py src/docs/src/conf.py src/docs/ext/*.py . python-black-update: .venv/bin/black @python3 -c "import sys; exit(1 if sys.version_info < (3,6) else 0)" || \ @@ -221,7 +221,7 @@ python-black-update: .venv/bin/black @python3 -c "import sys; exit(1 if sys.version_info >= (3,6) else 0)" || \ LC_ALL=C.UTF-8 LANG=C.UTF-8 .venv/bin/black \ --exclude="build/|buck-out/|dist/|_build/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/|src/rebar/pr2relnotes.py|src/fauxton" \ - . dev/run test/javascript/run src/mango src/docs + build-aux/*.py dev/run test/javascript/run src/mango/test/*.py src/docs/src/conf.py src/docs/ext/*.py . .PHONY: elixir elixir: export MIX_ENV=integration diff --git a/Makefile.win b/Makefile.win index 0fc4d91c7..6c160e8fd 100644 --- a/Makefile.win +++ b/Makefile.win @@ -190,7 +190,7 @@ python-black: .venv/bin/black @python.exe -c "import sys; exit(1 if sys.version_info >= (3,6) else 0)" || \ .venv\Scripts\black.exe --check \ --exclude="build/|buck-out/|dist/|_build/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/|src/rebar/pr2relnotes.py|src/fauxton" \ - . dev\run test\javascript\run src\mango src\docs + build-aux\*.py dev\run test\javascript\run src\mango\test\*.py src\docs\src\conf.py src\docs\ext\*.py . python-black-update: .venv/bin/black @python.exe -c "import sys; exit(1 if sys.version_info < (3,6) else 0)" || \ @@ -198,7 +198,7 @@ python-black-update: .venv/bin/black @python.exe -c "import sys; exit(1 if sys.version_info >= (3,6) else 0)" || \ .venv\Scripts\black.exe \ --exclude="build/|buck-out/|dist/|_build/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/|src/rebar/pr2relnotes.py|src/fauxton" \ - . dev\run test\javascript\run src\mango src\docs + build-aux\*.py dev\run test\javascript\run src\mango\test\*.py src\docs\src\conf.py src\docs\ext\*.py . .PHONY: elixir elixir: export MIX_ENV=integration -- cgit v1.2.1 From 0a74954070a68b0cc883494b859321adbd8c0a3e Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 24 Apr 2020 15:35:04 -0400 Subject: Update erlfdb to v1.1.0 https://github.com/apache/couchdb-erlfdb/releases/tag/v1.1.0 --- rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config.script b/rebar.config.script index 2badaba2d..b0d2f15d5 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -152,7 +152,7 @@ DepDescs = [ %% Independent Apps {config, "config", {tag, "2.1.7"}}, {b64url, "b64url", {tag, "1.0.2"}}, -{erlfdb, "erlfdb", {tag, "v1.0.0"}}, +{erlfdb, "erlfdb", {tag, "v1.1.0"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, {khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.4"}}, -- cgit v1.2.1 From b07a6292a95f1145567a27e7041e609bda5f2877 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 24 Apr 2020 19:37:55 -0400 Subject: Allow specifying FDB transaction options With the latest erlfdb release v1.1.0 we have the ability to set default transaction options on the database handle. Once set, those are inherited by every transaction started from that handle. Use this feature to give advanced users a way to experiment with various transaction options. Descriptions of those options in the default.ini file have been mostly a copy and paste from the fdb_c_option.g.h file from the client library. In addition, specify some safer default values for transaction timeouts (1min) and retry limit (100). These quite conservative and are basically something less that "infinity". In the future these may be adjusted lower. --- rel/overlay/etc/default.ini | 56 ++++++++++++++- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric2_server.erl | 97 ++++++++++++++++++++++--- src/fabric/test/fabric2_tx_options_tests.erl | 103 +++++++++++++++++++++++++++ 4 files changed, 245 insertions(+), 12 deletions(-) create mode 100644 src/fabric/test/fabric2_tx_options_tests.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index dfc67f7fb..a1e3c5851 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -709,4 +709,58 @@ compaction = false [couch_rate.views] limiter = couch_rate_limiter -opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} \ No newline at end of file +opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} + + +; Some low-level FDB transaction options. These options will be applied to the +; database handle and inherited by each transaction started with that handle. +; The description of these can be found in fdb_c_option.g.h include file from +; the client libraries. The default values which were not specified the +; fdb_c_option.g.h file were not included here either. + +[fdb_tx_options] +; Specify the machine ID that was passed to fdbserver processes running on the +; same machine as this client, for better location-aware load balancing. +; Type is a hexadecimal string, less than 16 bytes in size. +;machine_id = + +; Specify the datacenter ID that was passed to fdbserver processes running in +; the same datacenter as this client, for better location-aware load balancing. +; Type is hexadecimal string, less than 16 bytes in size. +;datacenter_id = + +; Sets the maximum escaped length of key and value fields to be logged to the +; trace file via the LOG_TRANSACTION option, after which the field will be +; truncated. A negative value disables truncation. +;transaction_logging_max_field_length = + +; Set a timeout in milliseconds which, when elapsed, will cause the transaction +; automatically to be cancelled. Valid parameter values are [0, INT_MAX]. +; If set to 0, will disable all timeouts. All pending and any future uses of +; the transaction will throw an exception. The transaction can be used again +; after it is reset. +;timeout = 60000 + +; Set a maximum number of retries after which additional calls to 'on_error` +; will throw the most recently seen error code. Valid parameter values are +; [-1, INT_MAX]. If set to -1, will disable the retry limit. +;retry_limit = 100 + +; Set the maximum amount of backoff delay incurred in the call to 'on_error' +; if the error is retryable. Defaults to 1000 ms. Valid parameter values are +; [0, INT_MAX]. If the maximum retry delay is less than the current retry +; delay of the transaction, then the current retry delay will be clamped to the +; maximum retry delay. The retry limit is not reset after an +; 'on_erro' call. +;max_retry_delay = 1000 + +; Set the transaction size limit in bytes. The size is calculated by combining +; the sizes of all keys and values written or mutated, all key ranges cleared, +; and all read and write conflict ranges. (In other words, it includes the +; total size of all data included in the request to the cluster to commit the +; transaction.) Large transactions can cause performance problems on +; FoundationDB clusters, so setting this limit to a smaller value than the +; default can help prevent the client from accidentally degrading the cluster's +; performance. This value must be at least 10000 and cannot be set to higher than +; 10000000, the default transaction size limit. +;size_limit = 10000000 diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 234c5291e..27f3d61c2 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -77,6 +77,7 @@ -define(FUTURE_VERSION, 1009). -define(COMMIT_UNKNOWN_RESULT, 1021). -define(TRANSACTION_CANCELLED, 1025). +-define(TRANSACTION_TOO_LARGE, 2101). -define(DEFAULT_BINARY_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index 1de60f798..204246ae2 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -42,6 +42,21 @@ -define(FDB_DIRECTORY, fdb_directory). -define(FDB_CLUSTER, fdb_cluster). -define(DEFAULT_FDB_DIRECTORY, <<"couchdb">>). +-define(TX_OPTIONS_SECTION, "fdb_tx_options"). +-define(RELISTEN_DELAY, 1000). + +-define(DEFAULT_TIMEOUT_MSEC, "60000"). +-define(DEFAULT_RETRY_LIMIT, "100"). + +-define(TX_OPTIONS, #{ + machine_id => {binary, undefined}, + datacenter_id => {binary, undefined}, + transaction_logging_max_field_length => {integer, undefined}, + timeout => {integer, ?DEFAULT_TIMEOUT_MSEC}, + retry_limit => {integer, ?DEFAULT_RETRY_LIMIT}, + max_retry_delay => {integer, undefined}, + size_limit => {integer, undefined} +}). start_link() -> @@ -79,16 +94,7 @@ init(_) -> {read_concurrency, true}, {write_concurrency, true} ]), - - {Cluster, Db} = case application:get_env(fabric, eunit_run) of - {ok, true} -> - {<<"eunit_test">>, erlfdb_util:get_test_db([empty])}; - undefined -> - ClusterFileStr = config:get("erlfdb", "cluster_file", ?CLUSTER_FILE), - {ok, ConnectionStr} = file:read_file(ClusterFileStr), - DbHandle = erlfdb:open(iolist_to_binary(ClusterFileStr)), - {string:trim(ConnectionStr), DbHandle} - end, + {Cluster, Db} = get_db_and_cluster([empty]), application:set_env(fabric, ?FDB_CLUSTER, Cluster), application:set_env(fabric, db, Db), @@ -99,7 +105,7 @@ init(_) -> [?DEFAULT_FDB_DIRECTORY] end, application:set_env(fabric, ?FDB_DIRECTORY, Dir), - + config:subscribe_for_changes([?TX_OPTIONS_SECTION]), {ok, nil}. @@ -115,6 +121,27 @@ handle_cast(Msg, St) -> {stop, {bad_cast, Msg}, St}. +handle_info({config_change, ?TX_OPTIONS_SECTION, _K, deleted, _}, St) -> + % Since we don't know the exact default values to reset the options + % to we recreate the db handle instead which will start with a default + % handle and re-apply all the options + {_Cluster, NewDb} = get_db_and_cluster([]), + application:set_env(fabric, db, NewDb), + {noreply, St}; + +handle_info({config_change, ?TX_OPTIONS_SECTION, K, V, _}, St) -> + {ok, Db} = application:get_env(fabric, db), + apply_tx_options(Db, [{K, V}]), + {noreply, St}; + +handle_info({gen_event_EXIT, _Handler, _Reason}, St) -> + erlang:send_after(?RELISTEN_DELAY, self(), restart_config_listener), + {noreply, St}; + +handle_info(restart_config_listener, St) -> + config:subscribe_for_changes([?TX_OPTIONS_SECTION]), + {noreply, St}; + handle_info(Msg, St) -> {stop, {bad_info, Msg}, St}. @@ -142,3 +169,51 @@ get_env(Key) -> Value -> Value end. + + +get_db_and_cluster(EunitDbOpts) -> + {Cluster, Db} = case application:get_env(fabric, eunit_run) of + {ok, true} -> + {<<"eunit_test">>, erlfdb_util:get_test_db(EunitDbOpts)}; + undefined -> + ClusterFileStr = config:get("erlfdb", "cluster_file", ?CLUSTER_FILE), + {ok, ConnectionStr} = file:read_file(ClusterFileStr), + DbHandle = erlfdb:open(iolist_to_binary(ClusterFileStr)), + {string:trim(ConnectionStr), DbHandle} + end, + apply_tx_options(Db, config:get(?TX_OPTIONS_SECTION)), + {Cluster, Db}. + + +apply_tx_options(Db, Cfg) -> + maps:map(fun(Option, {Type, Default}) -> + case lists:keyfind(atom_to_list(Option), 1, Cfg) of + false -> + case Default of + undefined -> ok; + _Defined -> apply_tx_option(Db, Option, Default, Type) + end; + {_K, Val} -> + apply_tx_option(Db, Option, Val, Type) + end + end, ?TX_OPTIONS). + + +apply_tx_option(Db, Option, Val, integer) -> + try + erlfdb:set_option(Db, Option, list_to_integer(Val)) + catch + error:badarg -> + Msg = "~p : Invalid integer tx option ~p = ~p", + couch_log:error(Msg, [?MODULE, Option, Val]) + end; + +apply_tx_option(Db, Option, Val, binary) -> + BinVal = list_to_binary(Val), + case size(BinVal) < 16 of + true -> + erlfdb:set_option(Db, Option, BinVal); + false -> + Msg = "~p : String tx option ~p is larger than 16 bytes", + couch_log:error(Msg, [?MODULE, Option]) + end. diff --git a/src/fabric/test/fabric2_tx_options_tests.erl b/src/fabric/test/fabric2_tx_options_tests.erl new file mode 100644 index 000000000..34cb6e180 --- /dev/null +++ b/src/fabric/test/fabric2_tx_options_tests.erl @@ -0,0 +1,103 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_tx_options_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2_test.hrl"). +-include("fabric2.hrl"). + + +fdb_tx_options_test_() -> + { + "Test setting default transaction options", + setup, + fun() -> + meck:new(erlfdb, [passthrough]), + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3, ctrace, fabric]) + end, + fun(Ctx) -> + meck:unload(), + + config:delete("fdb_tx_options", "size_limit", false), + config:delete("fdb_tx_options", "max_retry_delay", false), + config:delete("fdb_tx_options", "machine_id", false), + config:delete("fdb_tx_options", "datacenter_id", false), + + test_util:stop_couch(Ctx) + end, + with([ + ?TDEF(options_take_effect), + ?TDEF(can_configure_options_at_runtime) + ]) + }. + + +options_take_effect(_) -> + ok = application:stop(fabric), + + % Try one of each type including some invalid values + config:set("fdb_tx_options", "size_limit", "150000", false), + config:set("fdb_tx_options", "max_retry_delay", "badness", false), + config:set("fdb_tx_options", "machine_id", "123abc", false), + TooLong = ["x" || _ <- lists:seq(1, 1000)], + config:set("fdb_tx_options", "datacenter_id", TooLong, false), + ok = application:start(fabric), + + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ?assertError({erlfdb_error, ?TRANSACTION_TOO_LARGE}, + add_large_doc(Db, 200000)), + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]). + + +can_configure_options_at_runtime(_) -> + meck:expect(erlfdb, set_option, fun(Fdb, Option, Val) -> + meck:passthrough([Fdb, Option, Val]) + end), + + meck:reset(erlfdb), + + config:set("fdb_tx_options", "size_limit", "150000", false), + meck:wait(erlfdb, set_option, ['_', size_limit, 150000], 4000), + + DbName = ?tempdb(), + + {ok, Db} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ?assertError({erlfdb_error, ?TRANSACTION_TOO_LARGE}, + add_large_doc(Db, 200000)), + + meck:reset(erlfdb), + + config:delete("fdb_tx_options", "size_limit", false), + % Assert that we get a new handle and are setting our default values + meck:wait(erlfdb, set_option, ['_', timeout, '_'], 4000), + erase(?PDICT_DB_KEY), + + {ok, Db1} = fabric2_db:open(DbName, [?ADMIN_CTX]), + ?assertMatch({ok, _}, add_large_doc(Db1, 200000)), + + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]). + + +add_large_doc(Db, Size) -> + Doc = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"x">>, crypto:strong_rand_bytes(Size)}]} + }, + fabric2_db:update_doc(Db, Doc). -- cgit v1.2.1 From e889cf070a7b09a08ed7ae1ac1f7ffb716e6ef02 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 27 Apr 2020 14:37:51 -0500 Subject: Fix mango test suite --- src/mango/src/mango_idx.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index 9907cc781..37b6e03eb 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -405,7 +405,8 @@ index(SelectorName, Selector) -> {[{<<"fields">>,{[{<<"location">>,<<"asc">>}]}}, {SelectorName,{Selector}}]}, [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}], - <<"ready">> + <<"ready">>, + false }. get_partial_filter_all_docs_test() -> -- cgit v1.2.1 From 63e2d08e6d580f89f95778e46d8c5a3f76ffa052 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 16 Mar 2020 22:39:16 +0000 Subject: View is partitioned if db and view are partitioned We've seen a crash if DbPartitioned is false and ViewPartitioned is true, which is obviously nonsense. The effect of the `nocase` is the termination of the couch_index_server gen_server, which is a serious amplification of a small (user-initiated) oddity. --- src/couch_mrview/src/couch_mrview_index.erl | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index cc013c5bd..68f1d2322 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -258,16 +258,7 @@ set_partitioned(Db, State) -> DbPartitioned = couch_db:is_partitioned(Db), ViewPartitioned = couch_util:get_value( <<"partitioned">>, DesignOpts, DbPartitioned), - IsPartitioned = case {DbPartitioned, ViewPartitioned} of - {true, true} -> - true; - {true, false} -> - false; - {false, false} -> - false; - _ -> - throw({bad_request, <<"invalid partition option">>}) - end, + IsPartitioned = DbPartitioned andalso ViewPartitioned, State#mrst{partitioned = IsPartitioned}. -- cgit v1.2.1 From 19d8582ef708d18408d1fde176d43602140b65ba Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Tue, 28 Apr 2020 10:22:58 +0200 Subject: Remove etag from changes and _list_dbs --- src/chttpd/src/chttpd_db.erl | 22 ++++++++-------------- src/chttpd/src/chttpd_misc.erl | 19 +++++-------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 078009590..7cafabcc8 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -42,7 +42,6 @@ % Accumulator for changes_callback function -record(cacc, { - etag, feed, mochi, prepend = "", @@ -103,16 +102,12 @@ handle_changes_req_tx(#httpd{}=Req, Db) -> Max = chttpd:chunked_response_buffer_size(), case ChangesArgs#changes_args.feed of "normal" -> - Etag = <<"foo">>, - chttpd:etag_respond(Req, Etag, fun() -> - Acc0 = #cacc{ - feed = normal, - etag = Etag, - mochi = Req, - threshold = Max - }, - ChangesFun({fun changes_callback/2, Acc0}) - end); + Acc0 = #cacc{ + feed = normal, + mochi = Req, + threshold = Max + }, + ChangesFun({fun changes_callback/2, Acc0}); Feed when Feed =:= "continuous"; Feed =:= "longpoll"; Feed =:= "eventsource" -> couch_stats:increment_counter([couchdb, httpd, clients_requesting_changes]), Acc0 = #cacc{ @@ -183,10 +178,9 @@ changes_callback({stop, _EndSeq}, #cacc{feed = eventsource} = Acc) -> % callbacks for longpoll and normal (single JSON Object) changes_callback(start, #cacc{feed = normal} = Acc) -> - #cacc{etag = Etag, mochi = Req} = Acc, + #cacc{mochi = Req} = Acc, FirstChunk = "{\"results\":[\n", - {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, - [{"ETag",Etag}], FirstChunk), + {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [], FirstChunk), {ok, Acc#cacc{mochi = Resp, responding = true}}; changes_callback(start, Acc) -> #cacc{mochi = Req} = Acc, diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index 843c3fe7e..565b1210c 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -130,20 +130,11 @@ handle_all_dbs_req(#httpd{method='GET'}=Req) -> {skip, Skip} ], - % Eventually the Etag for this request will be derived - % from the \xFFmetadataVersion key in fdb - Etag = <<"foo">>, - - {ok, Resp} = chttpd:etag_respond(Req, Etag, fun() -> - {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [{"ETag",Etag}]), - Callback = fun all_dbs_callback/2, - Acc = #vacc{req=Req,resp=Resp}, - fabric2_db:list_dbs(Callback, Acc, Options) - end), - case is_record(Resp, vacc) of - true -> {ok, Resp#vacc.resp}; - _ -> {ok, Resp} - end; + {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, []), + Callback = fun all_dbs_callback/2, + Acc = #vacc{req=Req,resp=Resp}, + {ok, Acc1} = fabric2_db:list_dbs(Callback, Acc, Options), + {ok, Acc1#vacc.resp}; handle_all_dbs_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). -- cgit v1.2.1 From c6d3a7bdd4a43c069414f6c576f8be2e116d9474 Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Tue, 28 Apr 2020 11:32:20 -0300 Subject: Temporary disable fabric2_tx_options_tests --- src/fabric/test/fabric2_tx_options_tests.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fabric/test/fabric2_tx_options_tests.erl b/src/fabric/test/fabric2_tx_options_tests.erl index 34cb6e180..2cffedc20 100644 --- a/src/fabric/test/fabric2_tx_options_tests.erl +++ b/src/fabric/test/fabric2_tx_options_tests.erl @@ -20,7 +20,7 @@ -include("fabric2.hrl"). -fdb_tx_options_test_() -> +fdb_tx_options_test_DISABLE() -> { "Test setting default transaction options", setup, -- cgit v1.2.1 From 607f4c1f1095101980fe764550e7e3ca2b8f39b8 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Tue, 28 Apr 2020 13:46:14 -0400 Subject: Suppress offsetof gcc warnings for SM60 Mozilla did this years ago: https://hg.mozilla.org/mozilla-central/rev/41d9d32ab5a7 --- src/couch/rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index ad897e8e3..320584b59 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -122,7 +122,7 @@ end. }; {unix, _} when SMVsn == "60" -> { - "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14", + "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14 -Wno-invalid-offsetof", "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm" }; {unix, _} when SMVsn == "68" -> -- cgit v1.2.1 From cf0b032380b0b416b500f740512f4755ea299717 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 28 Apr 2020 14:28:11 -0400 Subject: Re-enable the tx options tests And an extra level of error checking to erlfdb:set_option since it could fail if we forget to update erlfdb dependency or fdb server version is too old. That operation can fail with an error:badarg which is exactly how list_to_integer fails and result in a confusing log message. --- src/fabric/src/fabric2_server.erl | 16 ++++++++++++++-- src/fabric/test/fabric2_tx_options_tests.erl | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index 204246ae2..957efff3b 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -201,7 +201,7 @@ apply_tx_options(Db, Cfg) -> apply_tx_option(Db, Option, Val, integer) -> try - erlfdb:set_option(Db, Option, list_to_integer(Val)) + set_option(Db, Option, list_to_integer(Val)) catch error:badarg -> Msg = "~p : Invalid integer tx option ~p = ~p", @@ -212,8 +212,20 @@ apply_tx_option(Db, Option, Val, binary) -> BinVal = list_to_binary(Val), case size(BinVal) < 16 of true -> - erlfdb:set_option(Db, Option, BinVal); + set_option(Db, Option, BinVal); false -> Msg = "~p : String tx option ~p is larger than 16 bytes", couch_log:error(Msg, [?MODULE, Option]) end. + + +set_option(Db, Option, Val) -> + try + erlfdb:set_option(Db, Option, Val) + catch + % This could happen if the option is not supported by erlfdb or + % fdbsever. + error:badarg -> + Msg = "~p : Could not set fdb tx option ~p = ~p", + couch_log:error(Msg, [?MODULE, Option, Val]) + end. diff --git a/src/fabric/test/fabric2_tx_options_tests.erl b/src/fabric/test/fabric2_tx_options_tests.erl index 2cffedc20..34cb6e180 100644 --- a/src/fabric/test/fabric2_tx_options_tests.erl +++ b/src/fabric/test/fabric2_tx_options_tests.erl @@ -20,7 +20,7 @@ -include("fabric2.hrl"). -fdb_tx_options_test_DISABLE() -> +fdb_tx_options_test_() -> { "Test setting default transaction options", setup, -- cgit v1.2.1 From 97227c4f3a8cdfdcf15fd4ff4a6983873595544b Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Sun, 26 Apr 2020 15:22:04 -0700 Subject: Improve robustness of couch expiring cache test In its current incarnation, the so-called "simple lifecycle" test is prone to numerous failures in the CI system [1], doubtless because it's riddled with race conditions. The original author makes many assumptions about how quickly an (actual, unmocked) FDB instance will respond to a request. The primary goal is to stop failing CI builds, while other considerations include: keeping the run time of the test as low as possible, keeping the code coverage high, and documenting the known races. Specifically: - Increase the `stale` and `expired` times by a factor of 5 to decrease sensitivity to poor FDB performance. - Change default timer from `erlang:system_time/1` to `os:timestamp` on the assumption that the latter is less prone to warping [2]. - Decrease the period of the cache server reaper by half to increase accuracy of eviction time. - Inline and modify the `test_util:wait` code to make the timer explicit, and emphasize that `timer:delay/1` only works with millisecond resolution. - Don't fail the test if it can't get a fresh lookup immediately after insertion, but let it continue on to the next race, at least to the point of expiration and deletion, which continue to be asserted. - Factor `Timeout` and `Interval` to allow declarations near the other hard-coded parameters. - Move cache server `Opts` into `setup/0` and eliminate `start_link/0`. - Double the overall test timeout to 20 seconds. This has soaked for hundreds of runs on a 5 year old laptop, but the real test is the CI system. Should this test continue to fail CI builds, additional improvements could include mocking the timer and/or FDB layer to eliminate the variability of an integrated system. [1] https://ci-couchdb.apache.org/blue/organizations/jenkins/jenkins-cm1%2FPullRequests/detail/PR-2813/10/pipeline [2] http://erlang.org/doc/apps/erts/time_correction.html#terminology --- .../src/couch_expiring_cache_server.erl | 10 +- .../test/couch_expiring_cache_tests.erl | 101 ++++++++++++++------- 2 files changed, 77 insertions(+), 34 deletions(-) diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl index eb74e6eb2..74c432e25 100644 --- a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl +++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl @@ -17,6 +17,7 @@ -callback start_link() -> {ok, pid()} | ignore | {error, term()}. -export([ + now_ts/0, start_link/2 ]). @@ -80,10 +81,10 @@ handle_info(remove_expired, St) -> largest_elapsed := LargestElapsed } = St, - NowTS = erlang:system_time(?TIME_UNIT), + NowTS = now_ts(), OldestTS = max(OldestTS0, couch_expiring_cache_fdb:clear_range_to(Name, NowTS, BatchSize)), - Elapsed = erlang:system_time(?TIME_UNIT) - NowTS, + Elapsed = now_ts() - NowTS, {noreply, St#{ timer_ref := schedule_remove_expired(Period, MaxJitter), @@ -108,6 +109,11 @@ code_change(_OldVsn, St, _Extra) -> {ok, St}. +now_ts() -> + {Mega, Sec, Micro} = os:timestamp(), + ((Mega * 1000000) + Sec) * 1000 + Micro div 1000. + + %% Private diff --git a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl index 2e06fcc5a..0780b8847 100644 --- a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl +++ b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl @@ -18,16 +18,9 @@ -include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). --define(CACHE_NAME, <<"test">>). +-define(CACHE_NAME, atom_to_binary(?MODULE, utf8)). - -start_link() -> - Opts = #{ - cache_name => ?CACHE_NAME, - period => 20, - max_jitter => 0 - }, - couch_expiring_cache_server:start_link(?MODULE, Opts). +-define(FOREVER, 576460752303423488). % max int 64 bit couch_expiring_cache_basic_test_() -> @@ -56,7 +49,11 @@ teardown_couch(Ctx) -> setup() -> - {ok, Pid} = start_link(), + Opts = #{ + cache_name => ?CACHE_NAME, + period => 10, + max_jitter => 0}, + {ok, Pid} = couch_expiring_cache_server:start_link(?MODULE, Opts), true = unlink(Pid), #{pid => Pid}. @@ -66,10 +63,18 @@ teardown(#{pid := Pid}) -> simple_lifecycle(_) -> - {timeout, 10, ?_test(begin - Now = erlang:system_time(?TIME_UNIT), - StaleTS = Now + 100, - ExpiresTS = Now + 200, + % The entire test is racing against FDB being faster than timeout seconds + {timeout, 20, ?_test(begin + Start = couch_expiring_cache_server:now_ts(), + % Race Alert! + % We're betting on FDB returning a lookup faster than these: + Stale = 500, + Expires = 1000, + Timeout = 5000, + Interval = 5, + + StaleTS = Start + Stale, + ExpiresTS = Start + Expires, Name = ?CACHE_NAME, Key = <<"key">>, Val = <<"val">>, @@ -77,34 +82,66 @@ simple_lifecycle(_) -> ?assertEqual(ok, couch_expiring_cache_fdb:clear_all(Name)), ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)), ?assertEqual([], entries(Name)), - ?assertEqual(ok, - couch_expiring_cache:insert(Name, Key, Val, StaleTS, ExpiresTS)), - ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)), - ok = wait_lookup(Name, Key, {stale, Val}), + ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val, + StaleTS, ExpiresTS)), + ok = attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval), % Refresh the existing key with updated timestamps - ?assertEqual(ok, - couch_expiring_cache:insert(Name, Key, Val, - StaleTS + 100, ExpiresTS + 100)), - ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)), + Refresh = couch_expiring_cache_server:now_ts(), + ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val, + Refresh + Stale, Refresh + Expires)), + ok = attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval), ?assertEqual(1, length(entries(Name))), - ok = wait_lookup(Name, Key, {stale, Val}), - ok = wait_lookup(Name, Key, expired), - ok = wait_lookup(Name, Key, not_found), - ?assertEqual([], entries(Name)), - ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)) + % These last 2 are also races, betting on FDB to be reasonably + % fast on the home stretch + ok = wait_lookup(Name, Key, expired, Timeout, Interval), + ok = wait_lookup(Name, Key, not_found, Timeout, Interval), + ?assertEqual([], entries(Name)) end)}. +% In this race we're betting on FDB to take less than `Stale` and then +% `Expired` milliseconds to respond +attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval) -> + case couch_expiring_cache:lookup(Name, Key) of + {fresh, Val} -> + % We won that race, let's bet on another! + ok = wait_lookup(Name, Key, {stale, Val}, Timeout, Interval); + _ -> + % Unlucky! But don't fail the test just yet... + ok + end. + + entries(Name) -> - FarFuture = erlang:system_time(?TIME_UNIT) * 2, - couch_expiring_cache_fdb:get_range_to(Name, FarFuture, _Limit=100). + couch_expiring_cache_fdb:get_range_to(Name, ?FOREVER, _Limit=100). -wait_lookup(Name, Key, Expect) -> - test_util:wait(fun() -> +% This lookup races against Timeout +wait_lookup(Name, Key, Expect, Timeout, Interval) -> + wait(fun() -> case couch_expiring_cache:lookup(Name, Key) of Expect -> ok; _ -> wait end - end, _Timeout = 1000, _PollingInterval = 10). + end, Timeout, Interval). + + +wait(Fun, Timeout, Delay) -> + Now = couch_expiring_cache_server:now_ts(), + wait(Fun, Timeout, Delay, Now, Now). + + +wait(_Fun, Timeout, _Delay, Started, Prev) when Prev - Started > Timeout -> + timeout; + +wait(Fun, Timeout, Delay, Started, _Prev) -> + case Fun() of + wait -> + % http://erlang.org/doc/man/timer.html#sleep-1 + ok = timer:sleep(Delay), % always millisecond + wait(Fun, Timeout, Delay, Started, + couch_expiring_cache_server:now_ts()); + Else -> + Else + end. -- cgit v1.2.1 From 44e0f0fcb06727f0e41e1995fc7b24421457439d Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Tue, 28 Apr 2020 14:31:06 -0400 Subject: Drop os_mon from dependencies --- INSTALL.Unix.md | 2 +- rebar.config.script | 2 +- rel/reltool.config | 2 -- src/couch/src/couch.app.src | 1 - src/couch/src/couch.erl | 1 - 5 files changed, 2 insertions(+), 6 deletions(-) diff --git a/INSTALL.Unix.md b/INSTALL.Unix.md index 1934e9be9..cb45e9ad4 100644 --- a/INSTALL.Unix.md +++ b/INSTALL.Unix.md @@ -90,7 +90,7 @@ You can install the dependencies by running: sudo yum install autoconf autoconf-archive automake \ curl-devel erlang-asn1 erlang-erts erlang-eunit \ - erlang-os_mon erlang-xmerl help2man \ + erlang-xmerl help2man \ js-devel-1.8.5 libicu-devel libtool perl-Test-Harness You can install the Node.JS dependencies via [NodeSource](https://github.com/nodesource/distributions#rpminstall). diff --git a/rebar.config.script b/rebar.config.script index 0e9c9781c..02d0df003 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -206,7 +206,7 @@ AddConfig = [ {plt_location, local}, {plt_location, COUCHDB_ROOT}, {plt_extra_apps, [ - asn1, compiler, crypto, inets, kernel, os_mon, runtime_tools, + asn1, compiler, crypto, inets, kernel, runtime_tools, sasl, setup, ssl, stdlib, syntax_tools, xmerl]}, {warnings, [unmatched_returns, error_handling, race_conditions]}]}, {post_hooks, [{compile, "escript support/build_js.escript"}]} diff --git a/rel/reltool.config b/rel/reltool.config index 796019298..6acba378b 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -19,7 +19,6 @@ crypto, inets, kernel, - os_mon, runtime_tools, sasl, ssl, @@ -77,7 +76,6 @@ {app, crypto, [{incl_cond, include}]}, {app, inets, [{incl_cond, include}]}, {app, kernel, [{incl_cond, include}]}, - {app, os_mon, [{incl_cond, include}]}, {app, public_key, [{incl_cond, include}]}, {app, runtime_tools, [{incl_cond, include}]}, {app, sasl, [{incl_cond, include}]}, diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src index 12ec29e12..6116c79ba 100644 --- a/src/couch/src/couch.app.src +++ b/src/couch/src/couch.app.src @@ -33,7 +33,6 @@ sasl, inets, ssl, - os_mon, % Upstream deps ibrowse, diff --git a/src/couch/src/couch.erl b/src/couch/src/couch.erl index 60a8b6626..1c912ac2a 100644 --- a/src/couch/src/couch.erl +++ b/src/couch/src/couch.erl @@ -23,7 +23,6 @@ deps() -> [ sasl, inets, - os_mon, crypto, public_key, ssl, -- cgit v1.2.1 From e48da9279b660f9e67d04dc504f17e5e1b9855d5 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 29 Apr 2020 13:52:04 -0400 Subject: Fix a flaky fdbcore index test --- src/fabric/test/fabric2_index_tests.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fabric/test/fabric2_index_tests.erl b/src/fabric/test/fabric2_index_tests.erl index e0c3e8ba9..fa3a14d61 100644 --- a/src/fabric/test/fabric2_index_tests.erl +++ b/src/fabric/test/fabric2_index_tests.erl @@ -35,7 +35,7 @@ index_test_() -> ?TDEF(single_update), ?TDEF(multiple_updates), ?TDEF(skip_db_if_no_ddocs), - ?TDEF(ignore_deleted_dbs), + ?TDEF(ignore_deleted_dbs, 10), ?TDEF(check_gen_server_messages) ]) } @@ -163,7 +163,7 @@ ignore_deleted_dbs(#{}) -> lists:foreach(fun(_) -> RandomDbName = fabric2_util:uuid(), fabric2_index:db_updated(RandomDbName) - end, lists:seq(1, 10000)), + end, lists:seq(1, 1000)), test_util:wait(fun() -> case table_sizes() =:= 0 of -- cgit v1.2.1 From 45a899aad444e3898fe94506f6f50991b873ccce Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 29 Apr 2020 14:01:32 -0400 Subject: Fix list_dbs_info_tx_too_old flaky test On CI creating a 100 dbs in a row was too much to do in 5 seconds so bump it to 15. --- src/fabric/test/fabric2_db_crud_tests.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index d5025b987..9deb8dd26 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -46,7 +46,7 @@ crud_test_() -> ?TDEF_FE(list_dbs_info), ?TDEF_FE(list_dbs_info_partial), ?TDEF_FE(list_dbs_tx_too_old), - ?TDEF_FE(list_dbs_info_tx_too_old), + ?TDEF_FE(list_dbs_info_tx_too_old, 15), ?TDEF_FE(list_deleted_dbs_info), ?TDEF_FE(list_deleted_dbs_info_user_fun), ?TDEF_FE(list_deleted_dbs_info_user_fun_partial), -- cgit v1.2.1 From ba9fc3c3606fe6ae6d361b9a4322ecf07e4dcec5 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Tue, 28 Apr 2020 22:08:17 +0000 Subject: Add Ubuntu Focal (20.04) + SM68 to Jenkins --- .gitignore | 1 + build-aux/Jenkinsfile.full | 49 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 645817b76..6223d7322 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ .eunit/ cover/ core +debian/ log apache-couchdb-*/ bin/ diff --git a/build-aux/Jenkinsfile.full b/build-aux/Jenkinsfile.full index d88525415..b3b477bea 100644 --- a/build-aux/Jenkinsfile.full +++ b/build-aux/Jenkinsfile.full @@ -33,7 +33,7 @@ mkdir couchdb cp ${WORKSPACE}/apache-couchdb-*.tar.gz couchdb tar -xf ${WORKSPACE}/apache-couchdb-*.tar.gz -C couchdb cd couchdb-pkg -make ${platform} PLATFORM=${platform} +make ''' cleanup_and_save = ''' @@ -417,6 +417,50 @@ pipeline { } // post } // stage + stage('Ubuntu Focal') { + agent { + docker { + image 'couchdbdev/ubuntu-focal-erlang-20.3.8.25-1:latest' + label 'docker' + alwaysPull true + args "${DOCKER_ARGS}" + } + } + environment { + platform = 'focal' + sm_ver = '68' + } + stages { + stage('Build from tarball & test') { + steps { + unstash 'tarball' + sh( script: build_and_test ) + } + post { + always { + junit '**/.eunit/*.xml, **/_build/*/lib/couchdbtest/*.xml, **/src/mango/nosetests.xml, **/test/javascript/junit.xml' + } + } + } + stage('Build CouchDB packages') { + steps { + sh( script: make_packages ) + sh( script: cleanup_and_save ) + } + post { + success { + archiveArtifacts artifacts: 'pkgs/**', fingerprint: true + } + } + } + } // stages + post { + cleanup { + sh 'rm -rf ${WORKSPACE}/*' + } + } // post + } // stage + stage('Debian Stretch') { agent { docker { @@ -697,11 +741,12 @@ pipeline { cp js/debian-stretch/*.deb pkgs/stretch reprepro -b couchdb-pkg/repo includedeb stretch pkgs/stretch/*.deb cp js/debian-buster/*.deb pkgs/stretch - reprepro -b couchdb-pkg/repo includedeb stretch pkgs/buster/*.deb + reprepro -b couchdb-pkg/repo includedeb buster pkgs/buster/*.deb cp js/ubuntu-xenial/*.deb pkgs/xenial reprepro -b couchdb-pkg/repo includedeb xenial pkgs/xenial/*.deb cp js/ubuntu-bionic/*.deb pkgs/bionic reprepro -b couchdb-pkg/repo includedeb bionic pkgs/bionic/*.deb + reprepro -b couchdb-pkg/repo includedeb focal pkgs/focal/*.deb ''' echo 'Building CentOS repos...' -- cgit v1.2.1 From 4f3d5aef254cf24224d05072ba175a281a3b1df6 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Wed, 29 Apr 2020 19:53:30 -0400 Subject: Bump docs, fauxton --- rebar.config.script | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config.script b/rebar.config.script index 02d0df003..d8afc10e3 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -151,9 +151,9 @@ DepDescs = [ %% Non-Erlang deps {docs, {url, "https://github.com/apache/couchdb-documentation"}, - {tag, "3.0.0"}, [raw]}, + {tag, "3.1.0-RC1"}, [raw]}, {fauxton, {url, "https://github.com/apache/couchdb-fauxton"}, - {tag, "v1.2.3"}, [raw]}, + {tag, "v1.2.4"}, [raw]}, %% Third party deps {folsom, "folsom", {tag, "CouchDB-0.8.3"}}, {hyper, "hyper", {tag, "CouchDB-2.2.0-6"}}, -- cgit v1.2.1 From ebdfbba7dff8f1cac0440e79052ada81d675d50a Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Thu, 30 Apr 2020 09:11:20 -0700 Subject: Fix python-black target for Windows --- Makefile.win | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.win b/Makefile.win index 6c160e8fd..7e14a53cc 100644 --- a/Makefile.win +++ b/Makefile.win @@ -190,7 +190,7 @@ python-black: .venv/bin/black @python.exe -c "import sys; exit(1 if sys.version_info >= (3,6) else 0)" || \ .venv\Scripts\black.exe --check \ --exclude="build/|buck-out/|dist/|_build/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/|src/rebar/pr2relnotes.py|src/fauxton" \ - build-aux\*.py dev\run test\javascript\run src\mango\test\*.py src\docs\src\conf.py src\docs\ext\*.py . + build-aux dev\run test\javascript\run src\mango\test src\docs\src\conf.py src\docs\ext . python-black-update: .venv/bin/black @python.exe -c "import sys; exit(1 if sys.version_info < (3,6) else 0)" || \ @@ -198,7 +198,7 @@ python-black-update: .venv/bin/black @python.exe -c "import sys; exit(1 if sys.version_info >= (3,6) else 0)" || \ .venv\Scripts\black.exe \ --exclude="build/|buck-out/|dist/|_build/|\.git/|\.hg/|\.mypy_cache/|\.nox/|\.tox/|\.venv/|src/rebar/pr2relnotes.py|src/fauxton" \ - build-aux\*.py dev\run test\javascript\run src\mango\test\*.py src\docs\src\conf.py src\docs\ext\*.py . + build-aux dev\run test\javascript\run src\mango\test src\docs\src\conf.py src\docs\ext . .PHONY: elixir elixir: export MIX_ENV=integration -- cgit v1.2.1 From baba64bfe47ab548231375f95b1e1a2a68d95bcc Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Sun, 26 Apr 2020 19:57:01 +0200 Subject: Port replicator db tests to elixir --- test/elixir/README.md | 4 +- test/elixir/test/replicator_db_bad_rep_id_test.exs | 81 ++++++++++++++ test/elixir/test/replicator_db_by_doc_id_test.exs | 121 +++++++++++++++++++++ test/javascript/tests/replicator_db_bad_rep_id.js | 1 + test/javascript/tests/replicator_db_by_doc_id.js | 1 + 5 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/replicator_db_bad_rep_id_test.exs create mode 100644 test/elixir/test/replicator_db_by_doc_id_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 32add2aba..bb9b4d2da 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -69,8 +69,8 @@ X means done, - means partially - [ ] Port reduce_false_temp.js - [X] Port reduce.js - [X] Port replication.js - - [ ] Port replicator_db_bad_rep_id.js - - [ ] Port replicator_db_by_doc_id.js + - [X] Port replicator_db_bad_rep_id.js + - [X] Port replicator_db_by_doc_id.js - [ ] Port replicator_db_compact_rep_db.js - [ ] Port replicator_db_continuous.js - [ ] Port replicator_db_credential_delegation.js diff --git a/test/elixir/test/replicator_db_bad_rep_id_test.exs b/test/elixir/test/replicator_db_bad_rep_id_test.exs new file mode 100644 index 000000000..693c9d85d --- /dev/null +++ b/test/elixir/test/replicator_db_bad_rep_id_test.exs @@ -0,0 +1,81 @@ +defmodule ReplicationBadIdTest do + use CouchTestCase + + @moduledoc """ + This is a port of the replicator_db_bad_rep_id.js suite + """ + + @docs [ + %{ + _id: "foo1", + value: 11 + }, + %{ + _id: "foo2", + value: 22 + }, + %{ + _id: "foo3", + value: 33 + } + ] + + test "replication doc with bad rep id" do + name = random_db_name() + src_db_name = name <> "_src" + tgt_db_name = name <> "_tgt" + + create_db(src_db_name) + bulk_save(src_db_name, @docs) + create_db(tgt_db_name) + delete_db_on_exit([src_db_name, tgt_db_name]) + + src_db_url = Couch.process_url("/#{src_db_name}") + tgt_db_url = Couch.process_url("/#{tgt_db_name}") + + replication_doc = %{ + _id: "foo_rep_#{name}", + source: src_db_url, + target: tgt_db_url, + replication_id: "1234abc" + } + + {:ok, repdoc} = create_doc("_replicator", replication_doc) + delete_doc_on_exit("_replicator", repdoc.body["id"]) + + retry_until(fn -> + resp = Couch.get("/_replicator/#{replication_doc[:_id]}") + assert resp.body["_replication_state"] == "completed" + resp + end) + + Enum.each(@docs, fn doc -> + copy_resp = Couch.get("/#{tgt_db_name}/#{doc[:_id]}") + assert copy_resp.status_code == 200 + assert copy_resp.body["value"] === doc.value + end) + + resp = Couch.get("/_replicator/#{replication_doc[:_id]}") + assert resp.status_code == 200 + assert resp.body["source"] == replication_doc.source + assert resp.body["target"] == replication_doc.target + assert resp.body["_replication_state"] == "completed" + {:ok, _, _} = DateTime.from_iso8601(resp.body["_replication_state_time"]) + assert resp.body["_replication_id"] == nil + end + + def delete_db_on_exit(db_names) when is_list(db_names) do + on_exit(fn -> + Enum.each(db_names, fn name -> + delete_db(name) + end) + end) + end + + def delete_doc_on_exit(db_name, doc_id) do + on_exit(fn -> + resp = Couch.get("/#{db_name}/#{doc_id}") + Couch.delete("/#{db_name}/#{doc_id}?rev=#{resp.body["_rev"]}") + end) + end +end diff --git a/test/elixir/test/replicator_db_by_doc_id_test.exs b/test/elixir/test/replicator_db_by_doc_id_test.exs new file mode 100644 index 000000000..2e68f2ca9 --- /dev/null +++ b/test/elixir/test/replicator_db_by_doc_id_test.exs @@ -0,0 +1,121 @@ +defmodule ReplicatorDBByDocIdTest do + use CouchTestCase + + @moduledoc """ + This is a port of the replicator_db_by_doc_id.js suite + """ + + @docs [ + %{ + _id: "foo1", + value: 11 + }, + %{ + _id: "foo2", + value: 22 + }, + %{ + _id: "foo3", + value: 33 + } + ] + + test "replicatior db by doc id" do + name = random_db_name() + src_db_name = name <> "_src" + tgt_db_name = name <> "_tgt" + + create_db(src_db_name) + create_db(tgt_db_name) + delete_db_on_exit([src_db_name, tgt_db_name]) + + # Populate src DB + ddocs = [ + %{ + _id: "_design/mydesign", + language: "javascript" + } + ] + + docs = @docs ++ ddocs + bulk_save(src_db_name, docs) + + src_db_url = Couch.process_url("/#{src_db_name}") + tgt_db_url = build_tgt_uri(tgt_db_name) + + replication_doc = %{ + _id: "foo_cont_rep_#{name}", + source: src_db_url, + target: tgt_db_url, + doc_ids: ["foo666", "foo3", "_design/mydesign", "foo999", "foo1"] + } + + {:ok, repdoc} = create_doc("_replicator", replication_doc) + delete_doc_on_exit("_replicator", repdoc.body["id"]) + + retry_until(fn -> + resp = Couch.get("/_replicator/#{replication_doc[:_id]}") + assert resp.body["_replication_state"] == "completed" + resp + end) + + copy_resp = Couch.get("/#{tgt_db_name}/foo1") + assert copy_resp.status_code == 200 + assert copy_resp.body["value"] === 11 + + copy_resp = Couch.get("/#{tgt_db_name}/foo2") + assert copy_resp.status_code == 404 + + copy_resp = Couch.get("/#{tgt_db_name}/foo3") + assert copy_resp.status_code == 200 + assert copy_resp.body["value"] === 33 + + copy_resp = Couch.get("/#{tgt_db_name}/foo666") + assert copy_resp.status_code == 404 + + copy_resp = Couch.get("/#{tgt_db_name}/foo999") + assert copy_resp.status_code == 404 + + # Javascript test suite was executed with admin party + # the design doc was created during replication. + # Elixir test suite is executed configuring an admin. + # The auth info should be provided for the tgt db in order to + # create the design doc during replication + copy_resp = Couch.get("/#{tgt_db_name}/_design/mydesign") + assert copy_resp.status_code == 200 + + resp = Couch.get("/_replicator/#{replication_doc[:_id]}") + assert resp.status_code == 200 + assert resp.body["_replication_stats"]["revisions_checked"] == 3 + assert resp.body["_replication_stats"]["missing_revisions_found"] == 3 + assert resp.body["_replication_stats"]["docs_read"] == 3 + assert resp.body["_replication_stats"]["docs_written"] == 3 + assert resp.body["_replication_stats"]["doc_write_failures"] == 0 + end + + defp build_tgt_uri(db_name) do + username = System.get_env("EX_USERNAME") || "adm" + password = System.get_env("EX_PASSWORD") || "pass" + + "/#{db_name}" + |> Couch.process_url() + |> URI.parse() + |> Map.put(:userinfo, "#{username}:#{password}") + |> URI.to_string() + end + + def delete_db_on_exit(db_names) when is_list(db_names) do + on_exit(fn -> + Enum.each(db_names, fn name -> + delete_db(name) + end) + end) + end + + def delete_doc_on_exit(db_name, doc_id) do + on_exit(fn -> + resp = Couch.get("/#{db_name}/#{doc_id}") + Couch.delete("/#{db_name}/#{doc_id}?rev=#{resp.body["_rev"]}") + end) + end +end diff --git a/test/javascript/tests/replicator_db_bad_rep_id.js b/test/javascript/tests/replicator_db_bad_rep_id.js index 30a124505..0912c1bc0 100644 --- a/test/javascript/tests/replicator_db_bad_rep_id.js +++ b/test/javascript/tests/replicator_db_bad_rep_id.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.replicator_db_bad_rep_id = function(debug) { //return console.log('TODO'); if (debug) debugger; diff --git a/test/javascript/tests/replicator_db_by_doc_id.js b/test/javascript/tests/replicator_db_by_doc_id.js index d9de0f119..bc15b03d2 100644 --- a/test/javascript/tests/replicator_db_by_doc_id.js +++ b/test/javascript/tests/replicator_db_by_doc_id.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.replicator_db_by_doc_id = function(debug) { //return console.log('TODO'); -- cgit v1.2.1 From 69f6b8686c936585dfe23a4c2ae671a989167611 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Sat, 2 May 2020 23:12:02 +0200 Subject: Quit test run without checking that couchdb is running --- test/javascript/cli_runner.js | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/test/javascript/cli_runner.js b/test/javascript/cli_runner.js index 73467626b..a35348f20 100644 --- a/test/javascript/cli_runner.js +++ b/test/javascript/cli_runner.js @@ -11,6 +11,19 @@ // the License. // +/* + * Quit current test execution if it is tagged as skipped or ported to elixir + */ +function quitIfSkippedOrPorted() { + if(couchTests.skip) { + quit(2); + } + + if(couchTests.elixir) { + quit(3); + } +} + /* * Futon test suite was designed to be able to run all tests populated into * couchTests. Here we should only be loading one test, so we'll pop the first @@ -22,14 +35,6 @@ function runTest() { var count = 0; var start = new Date().getTime(); - if(couchTests.skip) { - quit(2); - } - - if(couchTests.elixir) { - quit(3); - } - for(var name in couchTests) { count++; } @@ -51,6 +56,8 @@ function runTest() { } } +quitIfSkippedOrPorted(); + waitForSuccess(CouchDB.isRunning, 'isRunning'); runTest(); -- cgit v1.2.1 From bd44fc6a44148fe2327962341c1e3d4bb8f7db27 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 4 May 2020 15:16:05 +0200 Subject: return correct not implemented for reduce --- src/couch_views/src/couch_views.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 9d518ebce..3ea4d54be 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -50,7 +50,7 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> Args3 = couch_mrview_util:validate_args(Args2), ok = check_range(Args3), case is_reduce_view(Args3) of - true -> throw({not_implemented}); + true -> throw(not_implemented); false -> ok end, -- cgit v1.2.1 From 7e7a3f6651ea33643ea506d6786fe30f17ef4617 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Tue, 5 May 2020 08:46:05 -0700 Subject: add test to make sure type <<"text">> design docs are ignored (#2866) --- src/mango/test/16-index-selectors-test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mango/test/16-index-selectors-test.py b/src/mango/test/16-index-selectors-test.py index 4510065f5..cde8438fc 100644 --- a/src/mango/test/16-index-selectors-test.py +++ b/src/mango/test/16-index-selectors-test.py @@ -246,6 +246,14 @@ class IndexSelectorJson(mango.DbPerClass): docs = self.db.find(selector, use_index="oldschooltext") self.assertEqual(len(docs), 3) + def test_text_old_index_not_used(self): + selector = {"location": {"$gte": "FRA"}} + self.db.save_doc(oldschoolddoctext) + resp = self.db.find(selector, explain=True) + self.assertEqual(resp["index"]["name"], "_all_docs") + docs = self.db.find(selector) + self.assertEqual(len(docs), 3) + @unittest.skipUnless(mango.has_text_service(), "requires text service") def test_text_old_selector_still_supported_via_api(self): selector = {"location": {"$gte": "FRA"}} -- cgit v1.2.1 From 577be6561f1791f05e2b29f2fd1f58858ef1d967 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 7 May 2020 01:51:57 -0700 Subject: Re-enable ExUnit tests --- Makefile | 3 ++- Makefile.win | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2e3b33889..eaa8b3d47 100644 --- a/Makefile +++ b/Makefile @@ -165,7 +165,7 @@ check: all @$(MAKE) emilio make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs - make exunit tests=src/couch_rate/test/exunit/ + make exunit apps=couch_rate,chttpd make mango-test .PHONY: eunit @@ -188,6 +188,7 @@ exunit: export MIX_ENV=test exunit: export ERL_LIBS = $(shell pwd)/src exunit: export ERL_AFLAGS = -config $(shell pwd)/rel/files/eunit.config exunit: export COUCHDB_QUERY_SERVER_JAVASCRIPT = $(shell pwd)/bin/couchjs $(shell pwd)/share/server/main.js +exunit: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 exunit: couch elixir-init setup-eunit elixir-check-formatted elixir-credo @mix test --cover --trace $(EXUNIT_OPTS) diff --git a/Makefile.win b/Makefile.win index 885b7741c..265cdf339 100644 --- a/Makefile.win +++ b/Makefile.win @@ -163,6 +163,7 @@ exunit: export MIX_ENV=test exunit: export ERL_LIBS = $(shell echo %cd%)\src exunit: export ERL_AFLAGS = -config $(shell echo %cd%)/rel/files/eunit.config exunit: export COUCHDB_QUERY_SERVER_JAVASCRIPT = $(shell echo %cd%)/bin/couchjs $(shell echo %cd%)/share/server/main.js +exunit: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 exunit: couch elixir-init setup-eunit elixir-check-formatted elixir-credo @mix test --cover --trace $(EXUNIT_OPTS) -- cgit v1.2.1 From 51b8cc1e56ecfd5ce3a75347bf868dfd8deceaaf Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 7 May 2020 02:45:33 -0700 Subject: Update erlfdb --- rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config.script b/rebar.config.script index b0d2f15d5..03c380f46 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -152,7 +152,7 @@ DepDescs = [ %% Independent Apps {config, "config", {tag, "2.1.7"}}, {b64url, "b64url", {tag, "1.0.2"}}, -{erlfdb, "erlfdb", {tag, "v1.1.0"}}, +{erlfdb, "erlfdb", {tag, "v1.2.0"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, {khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.4"}}, -- cgit v1.2.1 From 1d0a1027dd7d3579ee12d5f5a3df54203b461caf Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Tue, 5 May 2020 16:38:57 -0300 Subject: Convert aegis key cach to LRU with hard expiration time --- rel/overlay/etc/default.ini | 13 +++ src/aegis/src/aegis_server.erl | 177 +++++++++++++++++++++++++++++++---- src/aegis/test/aegis_server_test.erl | 149 +++++++++++++++++++++++++++++ src/fabric/src/fabric2_util.erl | 8 ++ 4 files changed, 327 insertions(+), 20 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index a1e3c5851..66680a4e8 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -764,3 +764,16 @@ opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} ; performance. This value must be at least 10000 and cannot be set to higher than ; 10000000, the default transaction size limit. ;size_limit = 10000000 + +[aegis] +; The maximum number of entries in the key cache. +; Once the limit reached the least recently used entries are eviceted. +;cache_limit = 100000 + +; The period in seconds for how long each entry kept in cache. +; This is not affected by access time, i.e. the keys are always removed +; once expired and re-fetched on a next encrypt/decrypt operation. +;cache_max_age_sec = 1800 + +; The interval in seconds of how often the expiration check runs. +;cache_expiration_check_sec = 10 diff --git a/src/aegis/src/aegis_server.erl b/src/aegis/src/aegis_server.erl index be8202ced..21932626c 100644 --- a/src/aegis/src/aegis_server.erl +++ b/src/aegis/src/aegis_server.erl @@ -44,9 +44,13 @@ -define(KEY_CHECK, aegis_key_check). -define(INIT_TIMEOUT, 60000). -define(TIMEOUT, 10000). +-define(CACHE_LIMIT, 100000). +-define(CACHE_MAX_AGE_SEC, 1800). +-define(CACHE_EXPIRATION_CHECK_SEC, 10). +-define(LAST_ACCESSED_INACTIVITY_SEC, 10). --record(entry, {uuid, encryption_key}). +-record(entry, {uuid, encryption_key, counter, last_accessed, expires_at}). start_link() -> @@ -84,7 +88,7 @@ encrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> uuid := UUID } = Db, - case ets:member(?KEY_CHECK, UUID) of + case is_key_fresh(UUID) of true -> case gen_server:call(?MODULE, {encrypt, Db, Key, Value}) of CipherText when is_binary(CipherText) -> @@ -109,7 +113,7 @@ decrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> uuid := UUID } = Db, - case ets:member(?KEY_CHECK, UUID) of + case is_key_fresh(UUID) of true -> case gen_server:call(?MODULE, {decrypt, Db, Key, Value}) of PlainText when is_binary(PlainText) -> @@ -133,10 +137,16 @@ decrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> init([]) -> process_flag(sensitive, true), Cache = ets:new(?MODULE, [set, private, {keypos, #entry.uuid}]), + ByAccess = ets:new(?MODULE, + [ordered_set, private, {keypos, #entry.counter}]), ets:new(?KEY_CHECK, [named_table, protected, {read_concurrency, true}]), + erlang:send_after(0, self(), maybe_remove_expired), + St = #{ - cache => Cache + cache => Cache, + by_access => ByAccess, + counter => 0 }, {ok, St, ?INIT_TIMEOUT}. @@ -146,15 +156,18 @@ terminate(_Reason, _St) -> handle_call({insert_key, UUID, DbKey}, _From, #{cache := Cache} = St) -> - ok = insert(Cache, UUID, DbKey), - {reply, ok, St, ?TIMEOUT}; + case ets:lookup(Cache, UUID) of + [#entry{uuid = UUID} = Entry] -> + delete(St, Entry); + [] -> + ok + end, + NewSt = insert(St, UUID, DbKey), + {reply, ok, NewSt, ?TIMEOUT}; handle_call({encrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> - #{ - cache := Cache - } = St, - {ok, DbKey} = lookup(Cache, UUID), + {ok, DbKey} = lookup(St, UUID), erlang:spawn(fun() -> process_flag(sensitive, true), @@ -172,11 +185,8 @@ handle_call({encrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> {noreply, St, ?TIMEOUT}; handle_call({decrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> - #{ - cache := Cache - } = St, - {ok, DbKey} = lookup(Cache, UUID), + {ok, DbKey} = lookup(St, UUID), erlang:spawn(fun() -> process_flag(sensitive, true), @@ -197,10 +207,22 @@ handle_call(_Msg, _From, St) -> {noreply, St}. +handle_cast({accessed, UUID}, St) -> + NewSt = bump_last_accessed(St, UUID), + {noreply, NewSt}; + + handle_cast(_Msg, St) -> {noreply, St}. +handle_info(maybe_remove_expired, St) -> + remove_expired_entries(St), + CheckInterval = erlang:convert_time_unit( + expiration_check_interval(), second, millisecond), + erlang:send_after(CheckInterval, self(), maybe_remove_expired), + {noreply, St}; + handle_info(_Msg, St) -> {noreply, St}. @@ -257,19 +279,134 @@ do_decrypt(DbKey, #{uuid := UUID}, Key, Value) -> end. +is_key_fresh(UUID) -> + Now = fabric2_util:now(sec), + + case ets:lookup(?KEY_CHECK, UUID) of + [{UUID, ExpiresAt}] when ExpiresAt >= Now -> true; + _ -> false + end. + + %% cache functions -insert(Cache, UUID, DbKey) -> - Entry = #entry{uuid = UUID, encryption_key = DbKey}, +insert(St, UUID, DbKey) -> + #{ + cache := Cache, + by_access := ByAccess, + counter := Counter + } = St, + + Now = fabric2_util:now(sec), + ExpiresAt = Now + max_age(), + + Entry = #entry{ + uuid = UUID, + encryption_key = DbKey, + counter = Counter, + last_accessed = Now, + expires_at = ExpiresAt + }, + true = ets:insert(Cache, Entry), - true = ets:insert(?KEY_CHECK, {UUID, true}), - ok. + true = ets:insert_new(ByAccess, Entry), + true = ets:insert(?KEY_CHECK, {UUID, ExpiresAt}), + + CacheLimit = cache_limit(), + CacheSize = ets:info(Cache, size), + case CacheSize > CacheLimit of + true -> + LRUKey = ets:first(ByAccess), + [LRUEntry] = ets:lookup(ByAccess, LRUKey), + delete(St, LRUEntry); + false -> + ok + end, -lookup(Cache, UUID) -> + St#{counter := Counter + 1}. + + +lookup(#{cache := Cache}, UUID) -> case ets:lookup(Cache, UUID) of - [#entry{uuid = UUID, encryption_key = DbKey}] -> + [#entry{uuid = UUID, encryption_key = DbKey} = Entry] -> + maybe_bump_last_accessed(Entry), {ok, DbKey}; [] -> {error, not_found} end. + + +delete(St, #entry{uuid = UUID} = Entry) -> + #{ + cache := Cache, + by_access := ByAccess + } = St, + + true = ets:delete(?KEY_CHECK, UUID), + true = ets:delete_object(Cache, Entry), + true = ets:delete_object(ByAccess, Entry). + + +maybe_bump_last_accessed(#entry{last_accessed = LastAccessed} = Entry) -> + case fabric2_util:now(sec) > LastAccessed + ?LAST_ACCESSED_INACTIVITY_SEC of + true -> + gen_server:cast(?MODULE, {accessed, Entry#entry.uuid}); + false -> + ok + end. + + +bump_last_accessed(St, UUID) -> + #{ + cache := Cache, + by_access := ByAccess, + counter := Counter + } = St, + + + [#entry{counter = OldCounter} = Entry0] = ets:lookup(Cache, UUID), + + Entry = Entry0#entry{ + last_accessed = fabric2_util:now(sec), + counter = Counter + }, + + true = ets:insert(Cache, Entry), + true = ets:insert_new(ByAccess, Entry), + + ets:delete(ByAccess, OldCounter), + + St#{counter := Counter + 1}. + + +remove_expired_entries(St) -> + #{ + cache := Cache, + by_access := ByAccess + } = St, + + MatchConditions = [{'=<', '$1', fabric2_util:now(sec)}], + + KeyCheckMatchHead = {'_', '$1'}, + KeyCheckExpired = [{KeyCheckMatchHead, MatchConditions, [true]}], + Count = ets:select_delete(?KEY_CHECK, KeyCheckExpired), + + CacheMatchHead = #entry{expires_at = '$1', _ = '_'}, + CacheExpired = [{CacheMatchHead, MatchConditions, [true]}], + Count = ets:select_delete(Cache, CacheExpired), + Count = ets:select_delete(ByAccess, CacheExpired). + + + +max_age() -> + config:get_integer("aegis", "cache_max_age_sec", ?CACHE_MAX_AGE_SEC). + + +expiration_check_interval() -> + config:get_integer( + "aegis", "cache_expiration_check_sec", ?CACHE_EXPIRATION_CHECK_SEC). + + +cache_limit() -> + config:get_integer("aegis", "cache_limit", ?CACHE_LIMIT). diff --git a/src/aegis/test/aegis_server_test.erl b/src/aegis/test/aegis_server_test.erl index 0f23a3fd9..0f96798b7 100644 --- a/src/aegis/test/aegis_server_test.erl +++ b/src/aegis/test/aegis_server_test.erl @@ -163,3 +163,152 @@ test_disabled_decrypt() -> Db = ?DB#{is_encrypted => aegis_server:open_db(?DB)}, Decrypted = aegis:decrypt(Db, <<1:64>>, ?ENCRYPTED), ?assertEqual(?ENCRYPTED, Decrypted). + + + +lru_cache_with_expiration_test_() -> + { + foreach, + fun() -> + %% this has to be be set before start of aegis server + %% for config param "cache_expiration_check_sec" to be picked up + meck:new([config, aegis_server, fabric2_util], [passthrough]), + ok = meck:expect(config, get_integer, fun + ("aegis", "cache_limit", _) -> 5; + ("aegis", "cache_max_age_sec", _) -> 130; + ("aegis", "cache_expiration_check_sec", _) -> 1; + (_, _, Default) -> Default + end), + Ctx = setup(), + ok = meck:expect(fabric2_util, now, fun(sec) -> + get(time) == undefined andalso put(time, 10), + Now = get(time), + put(time, Now + 10), + Now + end), + Ctx + end, + fun teardown/1, + [ + {"counter moves forward on access bump", + {timeout, ?TIMEOUT, fun test_advance_counter/0}}, + {"oldest entries evicted", + {timeout, ?TIMEOUT, fun test_evict_old_entries/0}}, + {"access bump preserves entries", + {timeout, ?TIMEOUT, fun test_bump_accessed/0}}, + {"expired entries removed", + {timeout, ?TIMEOUT, fun test_remove_expired/0}} + ] + }. + + +test_advance_counter() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + ok = meck:expect(aegis_server, handle_cast, fun({accessed, _} = Msg, St) -> + #{counter := Counter} = St, + get(counter) == undefined andalso put(counter, 0), + OldCounter = get(counter), + put(counter, Counter), + ?assert(Counter > OldCounter), + meck:passthrough([Msg, St]) + end), + + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <>, ?VALUE), + aegis_server:encrypt(Db, <<(I+1):64>>, ?VALUE) + end, lists:seq(1, 10)), + + ?assertEqual(10, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_evict_old_entries() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% overflow cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <>, ?VALUE) + end, lists:seq(1, 10)), + + ?assertEqual(10, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that newest keys are still in cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <<(I+1):64>>, ?VALUE) + end, lists:seq(6, 10)), + + ?assertEqual(10, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that oldest keys been eviced and needed re-fetch + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <<(I+1):64>>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(15, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_bump_accessed() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% fill the cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(5, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% bump oldest key and then insert a new key to trigger eviction + aegis_server:encrypt(?DB#{uuid => <<1:64>>}, <<1:64>>, ?VALUE), + aegis_server:encrypt(?DB#{uuid => <<6:64>>}, <<6:64>>, ?VALUE), + ?assertEqual(6, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that former oldest key is still in cache + aegis_server:encrypt(?DB#{uuid => <<1:64>>}, <<2:64>>, ?VALUE), + ?assertEqual(6, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that the second oldest key been evicted by the new insert + aegis_server:encrypt(?DB#{uuid => <<2:64>>}, <<3:64>>, ?VALUE), + ?assertEqual(7, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_remove_expired() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% to detect when maybe_remove_expired called + ok = meck:expect(aegis_server, handle_info,fun + (maybe_remove_expired, St) -> + meck:passthrough([maybe_remove_expired, St]) + end), + + %% fill the cache. first key expires a 140, last at 180 of "our" time + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(5, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm enties are still in cache and wind up our "clock" to 160 + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(5, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% wait for remove_expired_entries to be triggered + meck:reset(aegis_server), + meck:wait(aegis_server, handle_info, [maybe_remove_expired, '_'], 2500), + + %% 3 "oldest" entries should be removed, 2 yet to expire still in cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <>}, + aegis_server:encrypt(Db, <>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(8, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 9b6d18c58..136762b34 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -41,6 +41,7 @@ all_docs_view_opts/1, iso8601_timestamp/0, + now/1, do_recovery/0, pmap/2, @@ -348,6 +349,13 @@ iso8601_timestamp() -> io_lib:format(Format, [Year, Month, Date, Hour, Minute, Second]). +now(ms) -> + {Mega, Sec, Micro} = os:timestamp(), + (Mega * 1000000 + Sec) * 1000 + round(Micro / 1000); +now(sec) -> + now(ms) div 1000. + + do_recovery() -> config:get_boolean("couchdb", "enable_database_recovery", false). -- cgit v1.2.1 From b2f2a45122cd1d0315ef20068bf38ef2ec4afc71 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 6 May 2020 15:07:25 +0200 Subject: add local_docs to fold_doc with docids --- src/fabric/src/fabric2_db.erl | 63 ++++++--- src/fabric/src/fabric2_fdb.erl | 38 +++++- .../test/fabric2_db_fold_doc_docids_tests.erl | 150 +++++++++++++++++++++ test/elixir/test/all_docs_test.exs | 64 +++++++++ 4 files changed, 292 insertions(+), 23 deletions(-) create mode 100644 src/fabric/test/fabric2_db_fold_doc_docids_tests.erl diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 740f9abf6..8764d4e18 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -976,16 +976,6 @@ fold_docs(Db, DocIds, UserFun, UserAcc0, Options) -> NeedsTreeOpts = [revs_info, conflicts, deleted_conflicts], NeedsTree = (Options -- NeedsTreeOpts /= Options), - FetchRevs = case NeedsTree of - true -> - fun(DocId) -> - fabric2_fdb:get_all_revs_future(TxDb, DocId) - end; - false -> - fun(DocId) -> - fabric2_fdb:get_winning_revs_future(TxDb, DocId, 1) - end - end, InitAcc = #{ revs_q => queue:new(), revs_count => 0, @@ -1001,7 +991,7 @@ fold_docs(Db, DocIds, UserFun, UserAcc0, Options) -> revs_q := RevsQ, revs_count := RevsCount } = Acc, - Future = FetchRevs(DocId), + Future = fold_docs_get_revs(TxDb, DocId, NeedsTree), NewAcc = Acc#{ revs_q := queue:in({DocId, Future}, RevsQ), revs_count := RevsCount + 1 @@ -1262,6 +1252,47 @@ drain_all_deleted_info_futures(FutureQ, UserFun, Acc) -> end. +fold_docs_get_revs(Db, <> = DocId, _) -> + fabric2_fdb:get_local_doc_rev_future(Db, DocId); + +fold_docs_get_revs(Db, DocId, true) -> + fabric2_fdb:get_all_revs_future(Db, DocId); + +fold_docs_get_revs(Db, DocId, false) -> + fabric2_fdb:get_winning_revs_future(Db, DocId, 1). + + +fold_docs_get_revs_wait(_Db, <>, RevsFuture) -> + Rev = fabric2_fdb:get_local_doc_rev_wait(RevsFuture), + [Rev]; + +fold_docs_get_revs_wait(Db, _DocId, RevsFuture) -> + fabric2_fdb:get_revs_wait(Db, RevsFuture). + + +fold_docs_get_doc_body_future(Db, <> = DocId, + [Rev]) -> + fabric2_fdb:get_local_doc_body_future(Db, DocId, Rev); + +fold_docs_get_doc_body_future(Db, DocId, Revs) -> + Winner = get_rev_winner(Revs), + fabric2_fdb:get_doc_body_future(Db, DocId, Winner). + + +fold_docs_get_doc_body_wait(Db, <> = DocId, [Rev], + _DocOpts, BodyFuture) -> + case fabric2_fdb:get_local_doc_body_wait(Db, DocId, Rev, BodyFuture) of + {not_found, missing} -> {not_found, missing}; + Doc -> {ok, Doc} + end; + +fold_docs_get_doc_body_wait(Db, DocId, Revs, DocOpts, BodyFuture) -> + RevInfo = get_rev_winner(Revs), + Base = fabric2_fdb:get_doc_body_wait(Db, DocId, RevInfo, + BodyFuture), + apply_open_doc_opts(Base, Revs, DocOpts). + + drain_fold_docs_revs_futures(_TxDb, #{revs_count := C} = Acc) when C < 100 -> Acc; drain_fold_docs_revs_futures(TxDb, Acc) -> @@ -1284,13 +1315,12 @@ drain_one_fold_docs_revs_future(TxDb, Acc) -> } = Acc, {{value, {DocId, RevsFuture}}, RestRevsQ} = queue:out(RevsQ), - Revs = fabric2_fdb:get_revs_wait(TxDb, RevsFuture), + Revs = fold_docs_get_revs_wait(TxDb, DocId, RevsFuture), DocFuture = case Revs of [] -> {DocId, [], not_found}; [_ | _] -> - Winner = get_rev_winner(Revs), - BodyFuture = fabric2_fdb:get_doc_body_future(TxDb, DocId, Winner), + BodyFuture = fold_docs_get_doc_body_future(TxDb, DocId, Revs), {DocId, Revs, BodyFuture} end, NewAcc = Acc#{ @@ -1328,10 +1358,7 @@ drain_one_fold_docs_body_future(TxDb, Acc) -> not_found -> {not_found, missing}; _ -> - RevInfo = get_rev_winner(Revs), - Base = fabric2_fdb:get_doc_body_wait(TxDb, DocId, RevInfo, - BodyFuture), - apply_open_doc_opts(Base, Revs, DocOpts) + fold_docs_get_doc_body_wait(TxDb, DocId, Revs, DocOpts, BodyFuture) end, NewUserAcc = maybe_stop(UserFun(DocId, Doc, UserAcc)), Acc#{ diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index ba57e646d..8264e8a60 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -51,6 +51,11 @@ get_doc_body/3, get_doc_body_future/3, get_doc_body_wait/4, + + get_local_doc_rev_future/2, + get_local_doc_rev_wait/1, + get_local_doc_body_future/3, + get_local_doc_body_wait/4, get_local_doc/2, get_local_doc_rev/3, @@ -712,22 +717,45 @@ get_doc_body_wait(#{} = Db0, DocId, RevInfo, Future) -> fdb_to_doc(Db, DocId, RevPos, [Rev | RevPath], BodyRows). -get_local_doc(#{} = Db0, <> = DocId) -> +get_local_doc_rev_future(Db, DocId) -> #{ tx := Tx, db_prefix := DbPrefix - } = Db = ensure_current(Db0), + } = ensure_current(Db), Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, DocId}, DbPrefix), - Rev = erlfdb:wait(erlfdb:get(Tx, Key)), + erlfdb:get(Tx, Key). + + +get_local_doc_rev_wait(Future) -> + erlfdb:wait(Future). + + +get_local_doc_body_future(#{} = Db, DocId, Rev) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, DocId}, DbPrefix), - Future = erlfdb:get_range_startswith(Tx, Prefix), - {_, Chunks} = lists:unzip(aegis:decrypt(Db, erlfdb:wait(Future))), + erlfdb:get_range_startswith(Tx, Prefix). + + +get_local_doc_body_wait(#{} = Db0, DocId, Rev, Future) -> + Db = ensure_current(Db0), + {_, Chunks} = lists:unzip(aegis:decrypt(Db, erlfdb:wait(Future))), fdb_to_local_doc(Db, DocId, Rev, Chunks). +get_local_doc(#{} = Db, <> = DocId) -> + RevFuture = get_local_doc_rev_future(Db, DocId), + Rev = get_local_doc_rev_wait(RevFuture), + + BodyFuture = get_local_doc_body_future(Db, DocId, Rev), + get_local_doc_body_wait(Db, DocId, Rev, BodyFuture). + + get_local_doc_rev(_Db0, <> = DocId, Val) -> case Val of <<255, RevBin/binary>> -> diff --git a/src/fabric/test/fabric2_db_fold_doc_docids_tests.erl b/src/fabric/test/fabric2_db_fold_doc_docids_tests.erl new file mode 100644 index 000000000..b55da5363 --- /dev/null +++ b/src/fabric/test/fabric2_db_fold_doc_docids_tests.erl @@ -0,0 +1,150 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_fold_doc_docids_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + +doc_fold_test_() -> + { + "Test document fold operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(fold_docs_simple), + ?TDEF_FE(fold_docs_lots), + ?TDEF_FE(fold_docs_local), + ?TDEF_FE(fold_docs_mixed) +] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +cleanup(Db) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +fold_docs_simple(Db) -> + Docs = create_docs(Db, 10), + run_fold(Db, Docs). + + +fold_docs_lots(Db) -> + Docs = create_docs(Db, 110), + run_fold(Db, Docs). + + +fold_docs_local(Db) -> + Docs = create_local_docs(Db, 10), + run_fold(Db, Docs). + + +fold_docs_mixed(Db) -> + Docs = create_mixed_docs(Db, 200), + run_fold(Db, Docs). + + +run_fold(Db, Docs) -> + SortedIds = get_ids(Docs), + Ids = shuffle(SortedIds), + Returned = fabric2_fdb:transactional(Db, fun (TxDb) -> + fold_docs_return_ids(TxDb, Ids) + end), + ?assertEqual(Returned, Ids). + + +fold_docs_return_ids(TxDb, Ids) -> + CB = fun(DocId, _Doc, Acc) -> + {ok, Acc ++ [DocId]} + end, + {ok, Acc} = fabric2_db:fold_docs(TxDb, Ids, CB, [], []), + Acc. + +get_ids(Docs) -> + lists:map(fun (#doc{id = Id}) -> Id end, Docs). + + +create_mixed_docs(Db, Size) -> + fabric2_fdb:transactional(Db, fun (TxDb) -> + Docs = lists:map(fun (Id) -> + case Id rem 3 == 0 of + true -> create_local_doc(Id); + false -> create_doc(Id) + end + end, lists:seq(0, Size)), + {ok, _} = fabric2_db:update_docs(TxDb, Docs), + Docs + end). + + +create_local_docs(Db, Size) -> + fabric2_fdb:transactional(Db, fun (TxDb) -> + Docs = lists:map(fun (Id) -> + create_local_doc(Id) + end, lists:seq(0, Size)), + {ok, _} = fabric2_db:update_docs(TxDb, Docs), + Docs + end). + + +create_docs(Db, Size) -> + fabric2_fdb:transactional(Db, fun (TxDb) -> + Docs = lists:map(fun (Id) -> + create_doc(Id) + end, lists:seq(0, Size)), + {ok, _} = fabric2_db:update_docs(TxDb, Docs), + Docs + end). + + +create_doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary([<<"doc-">>, integer_to_binary(Id)])}, + {<<"value">>, 1} + ]}). + + +create_local_doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary([<<"_local/doc-">>, integer_to_binary(Id)])}, + {<<"value">>, 1} + ]}). + + +shuffle(List) when is_list(List) -> + Tagged = [{rand:uniform(), Item} || Item <- List], + {_, Randomized} = lists:unzip(lists:sort(Tagged)), + Randomized. diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index d41d046b8..46ab1f8b3 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -275,6 +275,70 @@ defmodule AllDocsTest do assert doc["string"] == "1" end + @tag :with_db + test "_local_docs POST with keys and limit", context do + expected = [ + %{ + "doc" => %{"_id" => "_local/one", "_rev" => "0-1", "value" => "one"}, + "id" => "_local/one", + "key" => "_local/one", + "value" => %{"rev" => "0-1"} + }, + %{ + "doc" => %{"_id" => "_local/two", "_rev" => "0-1", "value" => "two"}, + "id" => "_local/two", + "key" => "_local/two", + "value" => %{"rev" => "0-1"} + }, + %{ + "doc" => %{ + "_id" => "three", + "_rev" => "1-878d3724976748bc881841046a276ceb", + "value" => "three" + }, + "id" => "three", + "key" => "three", + "value" => %{"rev" => "1-878d3724976748bc881841046a276ceb"} + }, + %{"error" => "not_found", "key" => "missing"}, + %{"error" => "not_found", "key" => "_local/missing"} + ] + + db_name = context[:db_name] + + docs = [ + %{ + _id: "_local/one", + value: "one" + }, + %{ + _id: "_local/two", + value: "two" + }, + %{ + _id: "three", + value: "three" + } + ] + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: docs}) + assert resp.status_code in [201, 202] + + resp = + Couch.post( + "/#{db_name}/_all_docs", + body: %{ + :keys => ["_local/one", "_local/two", "three", "missing", "_local/missing"], + :include_docs => true + } + ) + + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 5 + assert rows == expected + end + @tag :with_db test "POST with query parameter and JSON body", context do db_name = context[:db_name] -- cgit v1.2.1 From d66e95afef1b25d13bad21e5597ef5a17209628a Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Wed, 6 May 2020 15:09:17 +0200 Subject: mix format all_docs_test.exs --- test/elixir/test/all_docs_test.exs | 123 ++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 58 deletions(-) diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index 46ab1f8b3..21c136d39 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -188,34 +188,36 @@ defmodule AllDocsTest do test "GET with one key", context do db_name = context[:db_name] - {:ok, _} = create_doc( - db_name, - %{ - _id: "foo", - bar: "baz" - } - ) + {:ok, _} = + create_doc( + db_name, + %{ + _id: "foo", + bar: "baz" + } + ) - {:ok, _} = create_doc( - db_name, - %{ - _id: "foo2", - bar: "baz2" - } - ) + {:ok, _} = + create_doc( + db_name, + %{ + _id: "foo2", + bar: "baz2" + } + ) - resp = Couch.get( - "/#{db_name}/_all_docs", - query: %{ - :key => "\"foo\"", - } - ) + resp = + Couch.get( + "/#{db_name}/_all_docs", + query: %{ + :key => "\"foo\"" + } + ) assert resp.status_code == 200 assert length(Map.get(resp, :body)["rows"]) == 1 end - @tag :with_db test "POST with empty body", context do db_name = context[:db_name] @@ -223,10 +225,11 @@ defmodule AllDocsTest do resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: create_docs(0..2)}) assert resp.status_code in [201, 202] - resp = Couch.post( - "/#{db_name}/_all_docs", - body: %{} - ) + resp = + Couch.post( + "/#{db_name}/_all_docs", + body: %{} + ) assert resp.status_code == 200 assert length(Map.get(resp, :body)["rows"]) == 3 @@ -239,12 +242,13 @@ defmodule AllDocsTest do resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: create_docs(0..3)}) assert resp.status_code in [201, 202] - resp = Couch.post( - "/#{db_name}/_all_docs", - body: %{ - :keys => [1] - } - ) + resp = + Couch.post( + "/#{db_name}/_all_docs", + body: %{ + :keys => [1] + } + ) assert resp.status_code == 200 rows = resp.body["rows"] @@ -259,14 +263,15 @@ defmodule AllDocsTest do resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: create_docs(0..3)}) assert resp.status_code in [201, 202] - resp = Couch.post( - "/#{db_name}/_all_docs", - body: %{ - :keys => ["1", "2"], - :limit => 1, - :include_docs => true - } - ) + resp = + Couch.post( + "/#{db_name}/_all_docs", + body: %{ + :keys => ["1", "2"], + :limit => 1, + :include_docs => true + } + ) assert resp.status_code == 200 rows = resp.body["rows"] @@ -346,15 +351,16 @@ defmodule AllDocsTest do resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: create_docs(0..3)}) assert resp.status_code in [201, 202] - resp = Couch.post( - "/#{db_name}/_all_docs", - query: %{ - :limit => 1 - }, - body: %{ - :keys => [1, 2] - } - ) + resp = + Couch.post( + "/#{db_name}/_all_docs", + query: %{ + :limit => 1 + }, + body: %{ + :keys => [1, 2] + } + ) assert resp.status_code == 200 assert length(Map.get(resp, :body)["rows"]) == 1 @@ -367,16 +373,17 @@ defmodule AllDocsTest do resp = Couch.post("/#{db_name}/_bulk_docs", body: %{docs: create_docs(0..3)}) assert resp.status_code in [201, 202] - resp = Couch.post( - "/#{db_name}/_all_docs", - query: %{ - :limit => 1 - }, - body: %{ - :keys => [1, 2], - :limit => 2 - } - ) + resp = + Couch.post( + "/#{db_name}/_all_docs", + query: %{ + :limit => 1 + }, + body: %{ + :keys => [1, 2], + :limit => 2 + } + ) assert resp.status_code == 200 assert length(Map.get(resp, :body)["rows"]) == 1 -- cgit v1.2.1 From 968def848f1251bc9e8d1d5c0d388d803d8837b2 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 8 May 2020 12:09:33 -0400 Subject: Fix couch_views updater_running info result Previously we always returned `false` because the result from `couch_jobs:get_job_state` was expected to be just `Status`, but it is `{ok, Status}`. That part is now explicit so we account for every possible job state and would fail on a clause match if we get something else there. Moved `job_state/2` function to `couch_view_jobs` to avoid duplicating the logic on how to calculate job_id and keep it all in one module. Tests were updated to explicitly check for each state job state. --- src/couch_views/src/couch_views.erl | 22 ++++------ src/couch_views/src/couch_views_jobs.erl | 8 +++- src/couch_views/test/couch_views_info_test.erl | 60 +++++++++++++++++--------- 3 files changed, 56 insertions(+), 34 deletions(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index 3ea4d54be..d9ba0c16b 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -99,21 +99,17 @@ get_info(Db, DDoc) -> DbName = fabric2_db:name(Db), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), Sig = fabric2_util:to_hex(Mrst#mrst.sig), - JobId = <>, - {UpdateSeq, DataSize, Status0} = fabric2_fdb:transactional(Db, fun(TxDb) -> - #{ - tx := Tx - } = TxDb, + {UpdateSeq, DataSize, Status} = fabric2_fdb:transactional(Db, fun(TxDb) -> Seq = couch_views_fdb:get_update_seq(TxDb, Mrst), DataSize = get_total_view_size(TxDb, Mrst), - Status = couch_jobs:get_job_state(Tx, ?INDEX_JOB_TYPE, JobId), - {Seq, DataSize, Status} + JobStatus = case couch_views_jobs:job_state(TxDb, Mrst) of + {ok, pending} -> true; + {ok, running} -> true; + {ok, finished} -> false; + {error, not_found} -> false + end, + {Seq, DataSize, JobStatus} end), - Status1 = case Status0 of - pending -> true; - running -> true; - _ -> false - end, UpdateOptions = get_update_options(Mrst), {ok, [ {language, Mrst#mrst.language}, @@ -122,7 +118,7 @@ get_info(Db, DDoc) -> {active, DataSize} ]}}, {update_seq, UpdateSeq}, - {updater_running, Status1}, + {updater_running, Status}, {update_options, UpdateOptions} ]}. diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index d0de44ea8..909e9234f 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -16,7 +16,8 @@ set_timeout/0, build_view/3, build_view_async/2, - remove/2 + remove/2, + job_state/2 ]). -ifdef(TEST). @@ -67,6 +68,11 @@ remove(TxDb, Sig) -> couch_jobs:remove(TxDb, ?INDEX_JOB_TYPE, JobId). +job_state(#{} = TxDb, #mrst{} = Mrst) -> + JobId = job_id(TxDb, Mrst), + couch_jobs:get_job_state(TxDb, ?INDEX_JOB_TYPE, JobId). + + ensure_correct_tx(#{tx := undefined} = TxDb) -> TxDb; diff --git a/src/couch_views/test/couch_views_info_test.erl b/src/couch_views/test/couch_views_info_test.erl index 777cdb3dc..993801a0d 100644 --- a/src/couch_views/test/couch_views_info_test.erl +++ b/src/couch_views/test/couch_views_info_test.erl @@ -45,8 +45,7 @@ foreach_setup() -> {ok, _} = fabric2_db:update_doc(Db, Doc1, []), run_query(Db, DDoc, ?MAP_FUN1), - {ok, Info} = couch_views:get_info(Db, DDoc), - {Db, Info}. + {Db, DDoc}. foreach_teardown({Db, _}) -> @@ -66,41 +65,62 @@ views_info_test_() -> fun foreach_setup/0, fun foreach_teardown/1, [ - fun sig_is_binary/1, - fun language_is_js/1, - fun update_seq_is_binary/1, - fun updater_running_is_boolean/1, - fun active_size_is_non_neg_int/1, - fun update_opts_is_bin_list/1 + ?TDEF_FE(sig_is_binary), + ?TDEF_FE(language_is_js), + ?TDEF_FE(update_seq_is_binary), + ?TDEF_FE(updater_running_is_boolean), + ?TDEF_FE(active_size_is_non_neg_int), + ?TDEF_FE(update_opts_is_bin_list) ] } } }. -sig_is_binary({_, Info}) -> - ?_assert(is_binary(prop(signature, Info))). +sig_is_binary({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assert(is_binary(prop(signature, Info))). + + +language_is_js({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assertEqual(<<"javascript">>, prop(language, Info)). + +active_size_is_non_neg_int({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assert(check_non_neg_int([sizes, active], Info)). -language_is_js({_, Info}) -> - ?_assertEqual(<<"javascript">>, prop(language, Info)). +updater_running_is_boolean({Db, DDoc}) -> + meck:new(couch_jobs, [passthrough]), -active_size_is_non_neg_int({_, Info}) -> - ?_assert(check_non_neg_int([sizes, active], Info)). + meck:expect(couch_jobs, get_job_state, 3, meck:val({ok, running})), + {ok, Info1} = couch_views:get_info(Db, DDoc), + ?assert(prop(updater_running, Info1)), + meck:expect(couch_jobs, get_job_state, 3, meck:val({ok, pending})), + {ok, Info2} = couch_views:get_info(Db, DDoc), + ?assert(prop(updater_running, Info2)), -updater_running_is_boolean({_, Info}) -> - ?_assert(is_boolean(prop(updater_running, Info))). + meck:expect(couch_jobs, get_job_state, 3, meck:val({ok, finished})), + {ok, Info3} = couch_views:get_info(Db, DDoc), + ?assert(not prop(updater_running, Info3)), + meck:expect(couch_jobs, get_job_state, 3, meck:val({error, not_found})), + {ok, Info4} = couch_views:get_info(Db, DDoc), + ?assert(not prop(updater_running, Info4)). -update_seq_is_binary({_, Info}) -> - ?_assert(is_binary(prop(update_seq, Info))). +update_seq_is_binary({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assert(is_binary(prop(update_seq, Info))). -update_opts_is_bin_list({_, Info}) -> + +update_opts_is_bin_list({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), Opts = prop(update_options, Info), - ?_assert(is_list(Opts) andalso + ?assert(is_list(Opts) andalso (Opts == [] orelse lists:all([is_binary(B) || B <- Opts]))). -- cgit v1.2.1 From 91becc17bc65efbb4d014a33b7a7f511d7860e5d Mon Sep 17 00:00:00 2001 From: jiangph Date: Thu, 23 Apr 2020 20:41:40 +0800 Subject: background deletion for soft-deleted database allow background job to delete soft-deleted database according to specified criteria to release space. Once database is hard-deleted, the data can't be fetched back. Co-authored-by: Nick Vatamaniuc --- src/fabric/include/fabric2.hrl | 1 + src/fabric/src/fabric2_db_expiration.erl | 246 ++++++++++++++++++++++++++++++ src/fabric/src/fabric2_sup.erl | 10 +- src/fabric/test/fabric2_db_crud_tests.erl | 104 ++++++++++++- 4 files changed, 358 insertions(+), 3 deletions(-) create mode 100644 src/fabric/src/fabric2_db_expiration.erl diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 27f3d61c2..2e71787c3 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -13,6 +13,7 @@ -define(uint2bin(I), binary:encode_unsigned(I, little)). -define(bin2uint(I), binary:decode_unsigned(I, little)). +-define(bin2int(V), binary_to_integer(V)). -define(METADATA_VERSION_KEY, <<16#FF, "/metadataVersion">>). % Prefix Definitions diff --git a/src/fabric/src/fabric2_db_expiration.erl b/src/fabric/src/fabric2_db_expiration.erl new file mode 100644 index 000000000..3363d2427 --- /dev/null +++ b/src/fabric/src/fabric2_db_expiration.erl @@ -0,0 +1,246 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_expiration). + + +-behaviour(gen_server). + + +-export([ + start_link/0, + cleanup/1, + process_expirations/2 +]). + +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + +-define(JOB_TYPE, <<"db_expiration">>). +-define(JOB_ID, <<"db_expiration_job">>). +-define(DEFAULT_JOB_Version, 1). +-define(DEFAULT_RETENTION_SEC, 172800). % 48 hours +-define(DEFAULT_SCHEDULE_SEC, 3600). % 1 hour +-define(ERROR_RESCHEDULE_SEC, 5). +-define(CHECK_ENABLED_SEC, 2). +-define(JOB_TIMEOUT_SEC, 30). + + +-record(st, { + job +}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + process_flag(trap_exit, true), + {ok, #st{job = undefined}, 0}. + + +terminate(_M, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(timeout, #st{job = undefined} = St) -> + ok = wait_for_couch_jobs_app(), + ok = couch_jobs:set_type_timeout(?JOB_TYPE, ?JOB_TIMEOUT_SEC), + ok = maybe_add_job(), + Pid = spawn_link(?MODULE, cleanup, [is_enabled()]), + {noreply, St#st{job = Pid}}; + +handle_info({'EXIT', Pid, Exit}, #st{job = Pid} = St) -> + case Exit of + normal -> ok; + Error -> couch_log:error("~p : job error ~p", [?MODULE, Error]) + end, + NewPid = spawn_link(?MODULE, cleanup, [is_enabled()]), + {noreply, St#st{job = NewPid}}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +wait_for_couch_jobs_app() -> + % Because of a circular dependency between couch_jobs and fabric apps, wait + % for couch_jobs to initialize before continuing. If we refactor the + % commits FDB utilities out we can remove this bit of code. + case lists:keysearch(couch_jobs, 1, application:which_applications()) of + {value, {couch_jobs, _, _}} -> + ok; + false -> + timer:sleep(100), + wait_for_couch_jobs_app() + end. + + +maybe_add_job() -> + case couch_jobs:get_job_data(undefined, ?JOB_TYPE, job_id()) of + {error, not_found} -> + Now = erlang:system_time(second), + ok = couch_jobs:add(undefined, ?JOB_TYPE, job_id(), #{}, Now); + {ok, _JobData} -> + ok + end. + + +cleanup(false) -> + timer:sleep(?CHECK_ENABLED_SEC * 1000), + exit(normal); + +cleanup(true) -> + Now = erlang:system_time(second), + ScheduleSec = schedule_sec(), + Opts = #{max_sched_time => Now + min(ScheduleSec div 3, 15)}, + case couch_jobs:accept(?JOB_TYPE, Opts) of + {ok, Job, Data} -> + try + {ok, Job1, Data1} = ?MODULE:process_expirations(Job, Data), + ok = resubmit_job(Job1, Data1, schedule_sec()) + catch + _Tag:Error -> + Stack = erlang:get_stacktrace(), + couch_log:error("~p : processing error ~p ~p ~p", + [?MODULE, Job, Error, Stack]), + ok = resubmit_job(Job, Data, ?ERROR_RESCHEDULE_SEC), + exit({job_error, Error, Stack}) + end; + {error, not_found} -> + timer:sleep(?CHECK_ENABLED_SEC * 1000), + ?MODULE:cleanup(is_enabled()) + end. + + +resubmit_job(Job, Data, After) -> + Now = erlang:system_time(second), + SchedTime = Now + After, + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + {ok, Job1} = couch_jobs:resubmit(JTx, Job, SchedTime), + ok = couch_jobs:finish(JTx, Job1, Data) + end), + ok. + + +process_expirations(#{} = Job, #{} = Data) -> + Start = now_sec(), + Callback = fun(Value, LastUpdateAt) -> + case Value of + {meta, _} -> ok; + {row, DbInfo} -> process_row(DbInfo); + complete -> ok + end, + {ok, maybe_report_progress(Job, LastUpdateAt)} + end, + {ok, _Infos} = fabric2_db:list_deleted_dbs_info( + Callback, + Start, + [{restart_tx, true}] + ), + {ok, Job, Data}. + + +process_row(DbInfo) -> + DbName = proplists:get_value(db_name, DbInfo), + TimeStamp = proplists:get_value(timestamp, DbInfo), + Now = now_sec(), + Retention = retention_sec(), + Since = Now - Retention, + case Since >= timestamp_to_sec(TimeStamp) of + true -> + couch_log:notice("Permanently deleting ~p database with" + " timestamp ~p", [DbName, TimeStamp]), + ok = fabric2_db:delete(DbName, [{deleted_at, TimeStamp}]); + false -> + ok + end. + + +maybe_report_progress(Job, LastUpdateAt) -> + % Update periodically the job so it doesn't expire + Now = now_sec(), + Progress = #{ + <<"processed_at">> => Now + + }, + case (Now - LastUpdateAt) > (?JOB_TIMEOUT_SEC div 2) of + true -> + couch_jobs:update(undefined, Job, Progress), + Now; + false -> + LastUpdateAt + end. + + +job_id() -> + JobVersion = job_version(), + <>. + + +now_sec() -> + Now = os:timestamp(), + Nowish = calendar:now_to_universal_time(Now), + calendar:datetime_to_gregorian_seconds(Nowish). + + +timestamp_to_sec(TimeStamp) -> + <> = TimeStamp, + + calendar:datetime_to_gregorian_seconds( + {{?bin2int(Year), ?bin2int(Month), ?bin2int(Day)}, + {?bin2int(Hour), ?bin2int(Minutes), ?bin2int(Second)}} + ). + + +is_enabled() -> + config:get_boolean("couchdb", "db_expiration_enabled", false). + + +job_version() -> + config:get_integer("couchdb", "db_expiration_job_version", + ?DEFAULT_JOB_Version). + + +retention_sec() -> + config:get_integer("couchdb", "db_expiration_retention_sec", + ?DEFAULT_RETENTION_SEC). + + +schedule_sec() -> + config:get_integer("couchdb", "db_expiration_schedule_sec", + ?DEFAULT_SCHEDULE_SEC). diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl index e8201b4ee..874a8c240 100644 --- a/src/fabric/src/fabric2_sup.erl +++ b/src/fabric/src/fabric2_sup.erl @@ -30,7 +30,7 @@ start_link(Args) -> init([]) -> config:enable_feature(fdb), - Flags = {one_for_one, 1, 5}, + Flags = {rest_for_one, 1, 5}, Children = [ { fabric2_server, @@ -55,6 +55,14 @@ init([]) -> 5000, worker, [fabric2_index] + }, + { + fabric2_db_expiration, + {fabric2_db_expiration, start_link, []}, + permanent, + 5000, + worker, + [fabric2_db_expiration] } ], ChildrenWithEpi = couch_epi:register_service(fabric2_epi, Children), diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 9deb8dd26..b1e15aa2e 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -13,6 +13,7 @@ -module(fabric2_db_crud_tests). +-include_lib("fabric/include/fabric2.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("eunit/include/eunit.hrl"). -include("fabric2_test.hrl"). @@ -39,6 +40,8 @@ crud_test_() -> ?TDEF_FE(recreate_db), ?TDEF_FE(undelete_db), ?TDEF_FE(remove_deleted_db), + ?TDEF_FE(scheduled_remove_deleted_db), + ?TDEF_FE(scheduled_remove_deleted_dbs), ?TDEF_FE(old_db_handle), ?TDEF_FE(list_dbs), ?TDEF_FE(list_dbs_user_fun), @@ -59,14 +62,42 @@ crud_test_() -> }. +scheduled_db_remove_error_test_() -> + { + "Test scheduled database remove operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(scheduled_remove_deleted_dbs_with_error) + ] + } + } + }. + + setup_all() -> - Ctx = test_util:start_couch([fabric]), + meck:new(config, [passthrough]), + meck:expect(config, get_integer, fun + ("couchdb", "db_expiration_schedule_sec", _) -> 2; + ("couchdb", "db_expiration_retention_sec", _) -> 0; + (_, _, Default) -> Default + end), + Ctx = test_util:start_couch([fabric, couch_jobs]), meck:new(erlfdb, [passthrough]), + meck:new(fabric2_db_expiration, [passthrough]), Ctx. teardown_all(Ctx) -> - meck:unload(), + meck:unload(erlfdb), + meck:unload(config), + meck:unload(fabric2_db_expiration), test_util:stop_couch(Ctx). @@ -75,9 +106,12 @@ setup() -> cleanup(_) -> + ok = config:set("couchdb", "db_expiration_enabled", "false", false), ok = config:set("couchdb", "enable_database_recovery", "false", false), fabric2_test_util:tx_too_old_reset_errors(), reset_fail_erfdb_wait(), + meck:reset([fabric2_db_expiration]), + meck:reset([config]), meck:reset([erlfdb]). @@ -205,6 +239,60 @@ remove_deleted_db(_) -> ?assert(not lists:member(DbName, DeletedDbs)). +scheduled_remove_deleted_db(_) -> + ok = config:set("couchdb", "db_expiration_enabled", "true", false), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + meck:wait(fabric2_db_expiration, process_expirations, '_', 5000), + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], + ?assert(not lists:member(DbName, DeletedDbs)). + + +scheduled_remove_deleted_dbs(_) -> + ok = config:set("couchdb", "db_expiration_enabled", "true", false), + ok = config:set("couchdb", "db_expiration_batch", "2", false), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + DbNameList = [create_and_delete_db() || _I <- lists:seq(1, 5)], + meck:wait(fabric2_db_expiration, process_expirations, '_', 5000), + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], + lists:map(fun(DbName) -> + ?assert(not lists:member(DbName, DeletedDbs)) + end, DbNameList). + + +scheduled_remove_deleted_dbs_with_error(_) -> + meck:expect(fabric2_db_expiration, process_expirations, fun(_, _) -> + throw(process_expirations_error) + end), + + {Pid, Ref} = spawn_monitor(fun() -> + fabric2_db_expiration:cleanup(true) + end), + receive + {'DOWN', Ref, process, Pid, Error} -> + ?assertMatch({job_error, process_expirations_error, _}, Error) + end, + JobType = <<"db_expiration">>, + JobId = <<"db_expiration_job">>, + FQJobId = <>, + + ?assertMatch({ok, _}, couch_jobs:get_job_data(undefined, JobType, FQJobId)), + {ok, JobState} = couch_jobs:get_job_state(undefined, JobType, FQJobId), + ?assert(lists:member(JobState, [pending, running])). + + old_db_handle(_) -> % db hard deleted DbName1 = ?tempdb(), @@ -615,3 +703,15 @@ get_deleted_dbs(DeletedDbInfos) -> DbName = fabric2_util:get_value(db_name, DbInfo), [DbName | Acc] end, [], DeletedDbInfos). + + +create_and_delete_db() -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + DbName. -- cgit v1.2.1 From f0040c79d5437aefa60791f64abb9eb8cd2a2e54 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 13 May 2020 16:23:42 -0400 Subject: Fix a few flaky tests in fabric2_db Add some longer timeouts and fix a race condition in db cleanup tests (Thanks to @jdoane for the patch) --- src/fabric/test/fabric2_db_crud_tests.erl | 25 +++++++++++++++---------- src/fabric/test/fabric2_dir_prefix_tests.erl | 4 ++-- src/fabric/test/fabric2_index_tests.erl | 6 +++--- src/fabric/test/fabric2_tx_options_tests.erl | 4 ++-- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index b1e15aa2e..b529935be 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -40,8 +40,8 @@ crud_test_() -> ?TDEF_FE(recreate_db), ?TDEF_FE(undelete_db), ?TDEF_FE(remove_deleted_db), - ?TDEF_FE(scheduled_remove_deleted_db), - ?TDEF_FE(scheduled_remove_deleted_dbs), + ?TDEF_FE(scheduled_remove_deleted_db, 15), + ?TDEF_FE(scheduled_remove_deleted_dbs, 15), ?TDEF_FE(old_db_handle), ?TDEF_FE(list_dbs), ?TDEF_FE(list_dbs_user_fun), @@ -95,9 +95,7 @@ setup_all() -> teardown_all(Ctx) -> - meck:unload(erlfdb), - meck:unload(config), - meck:unload(fabric2_db_expiration), + meck:unload(), test_util:stop_couch(Ctx). @@ -251,11 +249,17 @@ scheduled_remove_deleted_db(_) -> ?assertEqual(ok, fabric2_db:delete(DbName, [])), ?assertEqual(false, ets:member(fabric2_server, DbName)), - meck:wait(fabric2_db_expiration, process_expirations, '_', 5000), + meck:reset(fabric2_db_expiration), + meck:wait(fabric2_db_expiration, process_expirations, '_', 7000), - {ok, Infos} = fabric2_db:list_deleted_dbs_info(), - DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], - ?assert(not lists:member(DbName, DeletedDbs)). + ?assertEqual(ok, test_util:wait(fun() -> + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], + case lists:member(DbName, DeletedDbs) of + true -> wait; + false -> ok + end + end)). scheduled_remove_deleted_dbs(_) -> @@ -263,7 +267,8 @@ scheduled_remove_deleted_dbs(_) -> ok = config:set("couchdb", "db_expiration_batch", "2", false), ok = config:set("couchdb", "enable_database_recovery", "true", false), DbNameList = [create_and_delete_db() || _I <- lists:seq(1, 5)], - meck:wait(fabric2_db_expiration, process_expirations, '_', 5000), + meck:reset(fabric2_db_expiration), + meck:wait(fabric2_db_expiration, process_expirations, '_', 7000), {ok, Infos} = fabric2_db:list_deleted_dbs_info(), DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], diff --git a/src/fabric/test/fabric2_dir_prefix_tests.erl b/src/fabric/test/fabric2_dir_prefix_tests.erl index 75d68a80f..2943d6533 100644 --- a/src/fabric/test/fabric2_dir_prefix_tests.erl +++ b/src/fabric/test/fabric2_dir_prefix_tests.erl @@ -33,8 +33,8 @@ dir_prefix_test_() -> test_util:stop_couch(Ctx) end, with([ - ?TDEF(default_prefix), - ?TDEF(custom_prefix) + ?TDEF(default_prefix, 15), + ?TDEF(custom_prefix, 15) ]) }. diff --git a/src/fabric/test/fabric2_index_tests.erl b/src/fabric/test/fabric2_index_tests.erl index fa3a14d61..8a4acb77d 100644 --- a/src/fabric/test/fabric2_index_tests.erl +++ b/src/fabric/test/fabric2_index_tests.erl @@ -50,8 +50,8 @@ index_process_cleanup_test_() -> fun setup/0, fun cleanup/1, [ - ?TDEF_FE(updater_processes_start), - ?TDEF_FE(updater_processes_stop), + ?TDEF_FE(updater_processes_start, 15), + ?TDEF_FE(updater_processes_stop, 15), ?TDEF_FE(indexing_can_be_disabled), ?TDEF_FE(handle_indexer_blowing_up) ] @@ -206,7 +206,7 @@ updater_processes_stop(#{}) -> lists:foreach(fun(Ref) -> receive {'DOWN', Ref, _, _, _} -> ok - after 3000 -> + after 5000 -> ?assert(false) end end, Refs). diff --git a/src/fabric/test/fabric2_tx_options_tests.erl b/src/fabric/test/fabric2_tx_options_tests.erl index 34cb6e180..b93cc3d69 100644 --- a/src/fabric/test/fabric2_tx_options_tests.erl +++ b/src/fabric/test/fabric2_tx_options_tests.erl @@ -42,8 +42,8 @@ fdb_tx_options_test_() -> test_util:stop_couch(Ctx) end, with([ - ?TDEF(options_take_effect), - ?TDEF(can_configure_options_at_runtime) + ?TDEF(options_take_effect, 15), + ?TDEF(can_configure_options_at_runtime, 15) ]) }. -- cgit v1.2.1 From c9cbcb41a86d72b8b9d54bba55c8b89cd4f7779f Mon Sep 17 00:00:00 2001 From: Jay Doane Date: Wed, 13 May 2020 16:48:03 -0700 Subject: Fix compiler warning --- src/fabric/src/fabric2_fdb.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 8264e8a60..f274aa606 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -731,7 +731,7 @@ get_local_doc_rev_wait(Future) -> erlfdb:wait(Future). -get_local_doc_body_future(#{} = Db, DocId, Rev) -> +get_local_doc_body_future(#{} = Db, DocId, _Rev) -> #{ tx := Tx, db_prefix := DbPrefix -- cgit v1.2.1 From 3846af79aad5a1ee16164fc50eeedfd39f269385 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 8 May 2020 13:04:23 -0700 Subject: Fix variable shadowing --- src/chttpd/src/chttpd_db.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 7cafabcc8..4776ac10d 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -831,10 +831,10 @@ multi_all_docs_view(Req, Db, OP, Queries) -> prepend = "\r\n" }, VAcc1 = lists:foldl(fun - (#mrargs{keys = undefined} = Args, Acc0) -> - send_all_docs(Db, Args, Acc0); - (#mrargs{keys = Keys} = Args, Acc0) when is_list(Keys) -> - Acc1 = send_all_docs_keys(Db, Args, Acc0), + (#mrargs{keys = undefined} = ArgsIn, Acc0) -> + send_all_docs(Db, ArgsIn, Acc0); + (#mrargs{keys = Keys} = ArgsIn, Acc0) when is_list(Keys) -> + Acc1 = send_all_docs_keys(Db, ArgsIn, Acc0), {ok, Acc2} = view_cb(complete, Acc1), Acc2 end, VAcc0, ArgQueries), -- cgit v1.2.1 From 404174118f9eb38d44543c6e430e938e865cc269 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 14 May 2020 15:14:37 -0700 Subject: Move not_implemented check down to allow testing of validation --- src/couch_views/src/couch_views_util.erl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl index 24e2f8a2d..395660c02 100644 --- a/src/couch_views/src/couch_views_util.erl +++ b/src/couch_views/src/couch_views_util.erl @@ -86,11 +86,6 @@ validate_args(#mrargs{} = Args) -> GroupLevel = determine_group_level(Args), Reduce = Args#mrargs.reduce, - case Reduce =/= undefined orelse Args#mrargs.view_type == red of - true -> throw(not_implemented); - false -> ok - end, - case Reduce == undefined orelse is_boolean(Reduce) of true -> ok; _ -> mrverror(<<"Invalid `reduce` value.">>) @@ -198,6 +193,12 @@ validate_args(#mrargs{} = Args) -> _ -> mrverror(<<"Invalid value for `sorted`.">>) end, + case {Reduce, Args#mrargs.view_type} of + {false, _} -> ok; + {_, red} -> throw(not_implemented); + _ -> ok + end, + Args#mrargs{group_level=GroupLevel}. -- cgit v1.2.1 From af502eae5aab6282ce9509105d69acc9835d529c Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 11 May 2020 08:55:08 -0700 Subject: Add tests for legacy API before refactoring --- src/chttpd/test/exunit/pagination_test.exs | 302 +++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 src/chttpd/test/exunit/pagination_test.exs diff --git a/src/chttpd/test/exunit/pagination_test.exs b/src/chttpd/test/exunit/pagination_test.exs new file mode 100644 index 000000000..4b12c8b2f --- /dev/null +++ b/src/chttpd/test/exunit/pagination_test.exs @@ -0,0 +1,302 @@ +defmodule Couch.Test.Pagination do + use ExUnit.Case + import Couch.DBTest, only: [retry_until: 1] + alias Couch.DBTest, as: Utils + + defp create_admin(user_name, password) do + hashed = String.to_charlist(:couch_passwords.hash_admin_password(password)) + :config.set('admins', String.to_charlist(user_name), hashed, false) + end + + defp base_url() do + addr = :config.get('chttpd', 'bind_address', '127.0.0.1') + port = :mochiweb_socket_server.get(:chttpd, :port) + "http://#{addr}:#{port}" + end + + setup_all do + test_ctx = + :test_util.start_couch([:chttpd, :couch_jobs, :couch_views, :couch_eval, :couch_js]) + + :ok = create_admin("adm", "pass") + + on_exit(fn -> + :test_util.stop_couch(test_ctx) + end) + + %{ + base_url: base_url(), + user: "adm", + pass: "pass" + } + end + + defp with_session(context) do + session = Couch.login(context.user, context.pass, base_url: context.base_url) + %{session: session} + end + + defp random_db(context) do + db_name = Utils.random_db_name("db") + + on_exit(fn -> + delete_db(context.session, db_name) + end) + + create_db(context.session, db_name) + %{db_name: db_name} + end + + defp with_docs(context) do + assert Map.has_key?(context, :n_docs), "Please define '@describetag n_docs: 10'" + %{docs: create_docs(context.session, context.db_name, 1..context.n_docs)} + end + + defp with_view(context) do + ddoc_id = "simple" + + ddoc = %{ + _id: "_design/#{ddoc_id}", + views: %{ + all: %{ + map: "function(doc) { emit(doc.string, doc) }" + } + } + } + + create_doc(context.session, context.db_name, ddoc) + %{view_name: "all", ddoc_id: ddoc_id} + end + + def create_db(session, db_name, opts \\ []) do + retry_until(fn -> + resp = Couch.Session.put(session, "/#{db_name}", opts) + assert resp.status_code in [201, 202], "got error #{inspect(resp.body)}" + assert resp.body == %{"ok" => true} + {:ok, resp} + end) + end + + defp delete_db(session, db_name) do + retry_until(fn -> + resp = Couch.Session.delete(session, "/#{db_name}") + assert resp.status_code in [200, 202, 404], "got error #{inspect(resp.body)}" + {:ok, resp} + end) + end + + defp create_doc(session, db_name, body) do + {:ok, body} = + retry_until(fn -> + resp = Couch.Session.post(session, "/#{db_name}", body: body) + assert resp.status_code in [201, 202], "got error #{inspect(resp.body)}" + assert resp.body["ok"] + {:ok, resp.body} + end) + + Map.delete(body, "ok") + end + + defp create_docs(session, db_name, range) do + docs = make_docs(range) + + docs + |> Enum.map(fn doc -> + create_doc(session, db_name, doc) + end) + end + + defp docid(id) do + id |> Integer.to_string() |> String.pad_leading(3, "0") + end + + defp make_docs(id_range) do + for id <- id_range do + str_id = docid(id) + %{"_id" => str_id, "integer" => id, "string" => str_id} + end + end + + describe "Legacy API (10 docs)" do + @describetag n_docs: 10 + setup [:with_session, :random_db, :with_docs] + + test ": _all_docs/queries", ctx do + queries = %{ + queries: [%{descending: false}, %{descending: true}] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + [q1, q2] = resp.body["results"] + assert q1["rows"] == Enum.reverse(q2["rows"]) + end + end + + for descending <- [false, true] do + describe "Legacy API (10 docs) : _all_docs?descending=#{descending}" do + @describetag n_docs: 10 + @describetag descending: descending + setup [:with_session, :random_db, :with_docs] + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + + test "the rows are correctly sorted", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + ids = Enum.map(body["rows"], fn row -> row["id"] end) + + if ctx.descending do + assert Enum.reverse(Enum.sort(ids)) == ids + else + assert Enum.sort(ids) == ids + end + end + + test "start_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {start_pos, doc_ids} = + if ctx.descending do + {head_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), -tail_pos))} + else + {tail_pos, Enum.drop(Enum.sort(doc_ids), tail_pos - 1)} + end + + start_key = ~s("#{docid(start_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "end_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {end_pos, doc_ids} = + if ctx.descending do + {tail_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), tail_pos - 1))} + else + {head_pos, Enum.drop(Enum.sort(doc_ids), -tail_pos)} + end + + end_key = ~s("#{docid(end_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "range between start_key and end_key works", ctx do + head_pos = 2 + slice_size = 3 + doc_ids = Enum.sort(Enum.map(ctx.docs, fn doc -> doc["id"] end)) + # -1 due to 0 based indexing + # -2 is due to 0 based indexing and inclusive end + slice = Enum.slice(doc_ids, (head_pos - 1)..(head_pos + slice_size - 2)) + + {start_key, end_key, doc_ids} = + if ctx.descending do + reversed = Enum.reverse(slice) + [first | _] = reversed + [last | _] = slice + {~s("#{first}"), ~s("#{last}"), reversed} + else + [first | _] = slice + [last | _] = Enum.reverse(slice) + {~s("#{first}"), ~s("#{last}"), slice} + end + + assert length(doc_ids) == slice_size + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + end + end + + describe "Legacy API (10 docs) : /{db}/_design/{ddoc}/_view" do + @describetag n_docs: 10 + @describetag descending: false + @describetag page_size: 4 + setup [:with_session, :random_db, :with_view, :with_docs] + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + end + + describe "Legacy API (10 docs) : /{db}/_design/{ddoc}/_view/queries" do + @describetag n_docs: 10 + @describetag page_size: 4 + setup [:with_session, :random_db, :with_view, :with_docs] + + test "descending is respected", ctx do + queries = %{ + queries: [%{descending: false}, %{descending: true}] + } + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + [q1, q2] = resp.body["results"] + q1 = Enum.map(q1["rows"], fn row -> row["id"] end) + q2 = Enum.map(q2["rows"], fn row -> row["id"] end) + assert q1 == Enum.reverse(q2) + assert q1 == Enum.sort(q1) + end + end +end -- cgit v1.2.1 From b8a13a531fd682329572e61eb17b3acdb35a6a13 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 4 May 2020 04:02:43 -0700 Subject: Implement pagination API --- src/chttpd/src/chttpd_db.erl | 99 +++++++--- src/chttpd/src/chttpd_view.erl | 162 ++++++++++++++-- src/couch_mrview/include/couch_mrview.hrl | 8 +- src/couch_mrview/src/couch_mrview_http.erl | 24 ++- src/couch_views/src/couch_views_http.erl | 292 +++++++++++++++++++++++++++++ src/couch_views/src/couch_views_util.erl | 60 +++++- 6 files changed, 600 insertions(+), 45 deletions(-) create mode 100644 src/couch_views/src/couch_views_http.erl diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 4776ac10d..5cfbd1d5f 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -812,14 +812,20 @@ db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> db_attachment_req(Req, Db, DocId, FileNameParts). multi_all_docs_view(Req, Db, OP, Queries) -> - Args0 = couch_mrview_http:parse_params(Req, undefined), + Args = couch_views_http:parse_params(Req, undefined), + case couch_views_util:is_paginated(Args) of + false -> + stream_multi_all_docs_view(Req, Db, OP, Args, Queries); + true -> + paginate_multi_all_docs_view(Req, Db, OP, Args, Queries) + end. + + +stream_multi_all_docs_view(Req, Db, OP, Args0, Queries) -> Args1 = Args0#mrargs{view_type=map}, - ArgQueries = lists:map(fun({Query}) -> - QueryArg1 = couch_mrview_http:parse_params(Query, undefined, - Args1, [decoded]), - QueryArgs2 = couch_views_util:validate_args(QueryArg1), - set_namespace(OP, QueryArgs2) - end, Queries), + ArgQueries = chttpd_view:parse_queries(Req, Args1, Queries, fun(QArgs) -> + set_namespace(OP, QArgs) + end), Max = chttpd:chunked_response_buffer_size(), First = "{\"results\":[", {ok, Resp0} = chttpd:start_delayed_json_response(Req, 200, [], First), @@ -842,8 +848,34 @@ multi_all_docs_view(Req, Db, OP, Queries) -> chttpd:end_delayed_json_response(Resp1). +paginate_multi_all_docs_view(Req, Db, OP, Args0, Queries) -> + Args1 = Args0#mrargs{view_type=map}, + ArgQueries = chttpd_view:parse_queries(Req, Args1, Queries, fun(QArgs) -> + set_namespace(OP, QArgs) + end), + KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + #mrargs{page_size = PageSize} = Args0, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + EtagTerm = {Parts, UpdateSeq, Args0}, + Response = couch_views_http:paginated( + Req, EtagTerm, PageSize, ArgQueries, KeyFun, + fun(Args) -> + all_docs_paginated_cb(Db, Args) + end), + chttpd:send_json(Req, Response). + + all_docs_view(Req, Db, Keys, OP) -> - Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), + Args = couch_views_http:parse_body_and_query(Req, Keys), + case couch_views_util:is_paginated(Args) of + false -> + stream_all_docs_view(Req, Db, Args, OP); + true -> + paginate_all_docs_view(Req, Db, Args, OP) + end. + +stream_all_docs_view(Req, Db, Args0, OP) -> Args1 = Args0#mrargs{view_type=map}, Args2 = couch_views_util:validate_args(Args1), Args3 = set_namespace(OP, Args2), @@ -864,15 +896,46 @@ all_docs_view(Req, Db, Keys, OP) -> end. +paginate_all_docs_view(Req, Db, Args0, OP) -> + Args1 = Args0#mrargs{view_type=map}, + Args2 = chttpd_view:validate_args(Req, Args1), + Args3 = set_namespace(OP, Args2), + KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + EtagTerm = {Parts, UpdateSeq, Args3}, + Response = couch_views_http:paginated( + Req, EtagTerm, Args3, KeyFun, + fun(Args) -> + all_docs_paginated_cb(Db, Args) + end), + chttpd:send_json(Req, Response). + + +all_docs_paginated_cb(Db, Args) -> + #vacc{meta=MetaMap, buffer=Items} = case Args#mrargs.keys of + undefined -> + send_all_docs(Db, Args, #vacc{paginated=true}); + Keys when is_list(Keys) -> + send_all_docs_keys(Db, Args, #vacc{paginated=true}) + end, + {MetaMap, Items}. + + send_all_docs(Db, #mrargs{keys = undefined} = Args, VAcc0) -> Opts0 = fabric2_util:all_docs_view_opts(Args), - Opts = Opts0 ++ [{restart_tx, true}], - NS = couch_util:get_value(namespace, Opts), + NS = couch_util:get_value(namespace, Opts0), FoldFun = case NS of <<"_all_docs">> -> fold_docs; <<"_design">> -> fold_design_docs; <<"_local">> -> fold_local_docs end, + Opts = case couch_views_util:is_paginated(Args) of + false -> + Opts0 ++ [{restart_tx, true}]; + true -> + Opts0 + end, ViewCb = fun view_cb/2, Acc = {iter, Db, Args, VAcc0}, {ok, {iter, _, _, VAcc1}} = fabric2_db:FoldFun(Db, ViewCb, Acc, Opts), @@ -980,25 +1043,15 @@ view_cb({row, Row}, {iter, Db, Args, VAcc}) -> Row end, chttpd_stats:incr_rows(), - {Go, NewVAcc} = couch_mrview_http:view_cb({row, NewRow}, VAcc), + {Go, NewVAcc} = couch_views_http:view_cb({row, NewRow}, VAcc), {Go, {iter, Db, Args, NewVAcc}}; view_cb(Msg, {iter, Db, Args, VAcc}) -> - {Go, NewVAcc} = couch_mrview_http:view_cb(Msg, VAcc), + {Go, NewVAcc} = couch_views_http:view_cb(Msg, VAcc), {Go, {iter, Db, Args, NewVAcc}}; -view_cb({row, Row} = Msg, Acc) -> - case lists:keymember(doc, 1, Row) of - true -> - chttpd_stats:incr_reads(); - false -> - ok - end, - chttpd_stats:incr_rows(), - couch_mrview_http:view_cb(Msg, Acc); - view_cb(Msg, Acc) -> - couch_mrview_http:view_cb(Msg, Acc). + couch_views_http:view_cb(Msg, Acc). db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> % check for the existence of the doc to handle the 404 case. diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index c9340fbe2..8e2a08e2b 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -14,18 +14,32 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). --export([handle_view_req/3]). +-export([ + handle_view_req/3, + validate_args/2, + parse_queries/4, + view_cb/2 +]). + +-define(DEFAULT_ALL_DOCS_PAGE_SIZE, 2000). +-define(DEFAULT_VIEWS_PAGE_SIZE, 2000). multi_query_view(Req, Db, DDoc, ViewName, Queries) -> - Args0 = couch_mrview_http:parse_params(Req, undefined), + Args = couch_views_http:parse_params(Req, undefined), + case couch_views_util:is_paginated(Args) of + false -> + stream_multi_query_view(Req, Db, DDoc, ViewName, Args, Queries); + true -> + paginate_multi_query_view(Req, Db, DDoc, ViewName, Args, Queries) + end. + + +stream_multi_query_view(Req, Db, DDoc, ViewName, Args0, Queries) -> {ok, #mrst{views=Views}} = couch_mrview_util:ddoc_to_mrst(Db, DDoc), Args1 = couch_mrview_util:set_view_type(Args0, ViewName, Views), - ArgQueries = lists:map(fun({Query}) -> - QueryArg = couch_mrview_http:parse_params(Query, undefined, - Args1, [decoded]), - QueryArg1 = couch_mrview_util:set_view_type(QueryArg, ViewName, Views), - fabric_util:validate_args(Db, DDoc, QueryArg1) - end, Queries), + ArgQueries = parse_queries(Req, Args1, Queries, fun(QueryArg) -> + couch_mrview_util:set_view_type(QueryArg, ViewName, Views) + end), VAcc0 = #vacc{db=Db, req=Req, prepend="\r\n"}, FirstChunk = "{\"results\":[", {ok, Resp0} = chttpd:start_delayed_json_response(VAcc0#vacc.req, 200, [], FirstChunk), @@ -38,15 +52,46 @@ multi_query_view(Req, Db, DDoc, ViewName, Queries) -> {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). + +paginate_multi_query_view(Req, Db, DDoc, ViewName, Args0, Queries) -> + {ok, #mrst{views=Views}} = couch_mrview_util:ddoc_to_mrst(Db, DDoc), + ArgQueries = parse_queries(Req, Args0, Queries, fun(QueryArg) -> + couch_mrview_util:set_view_type(QueryArg, ViewName, Views) + end), + KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + #mrargs{page_size = PageSize} = Args0, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + EtagTerm = {Parts, UpdateSeq, Args0}, + Response = couch_views_http:paginated( + Req, EtagTerm, PageSize, ArgQueries, KeyFun, + fun(Args) -> + {ok, #vacc{meta=MetaMap, buffer=Items}} = couch_views:query( + Db, DDoc, ViewName, fun view_cb/2, #vacc{paginated=true}, Args), + {MetaMap, Items} + end), + chttpd:send_json(Req, Response). + + design_doc_post_view(Req, Props, Db, DDoc, ViewName, Keys) -> Args = couch_mrview_http:parse_body_and_query(Req, Props, Keys), fabric_query_view(Db, Req, DDoc, ViewName, Args). design_doc_view(Req, Db, DDoc, ViewName, Keys) -> - Args = couch_mrview_http:parse_params(Req, Keys), + Args = couch_views_http:parse_params(Req, Keys), fabric_query_view(Db, Req, DDoc, ViewName, Args). + fabric_query_view(Db, Req, DDoc, ViewName, Args) -> + case couch_views_util:is_paginated(Args) of + false -> + stream_fabric_query_view(Db, Req, DDoc, ViewName, Args); + true -> + paginate_fabric_query_view(Db, Req, DDoc, ViewName, Args) + end. + + +stream_fabric_query_view(Db, Req, DDoc, ViewName, Args) -> Max = chttpd:chunked_response_buffer_size(), Fun = fun view_cb/2, VAcc = #vacc{db=Db, req=Req, threshold=Max}, @@ -54,16 +99,31 @@ fabric_query_view(Db, Req, DDoc, ViewName, Args) -> {ok, Resp#vacc.resp}. +paginate_fabric_query_view(Db, Req, DDoc, ViewName, Args0) -> + KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + ETagTerm = {Parts, UpdateSeq, Args0}, + Response = couch_views_http:paginated( + Req, ETagTerm, Args0, KeyFun, + fun(Args) -> + VAcc0 = #vacc{paginated=true}, + {ok, VAcc1} = couch_views:query(Db, DDoc, ViewName, fun view_cb/2, VAcc0, Args), + #vacc{meta=Meta, buffer=Items} = VAcc1, + {Meta, Items} + end), + chttpd:send_json(Req, Response). + view_cb({row, Row} = Msg, Acc) -> case lists:keymember(doc, 1, Row) of true -> chttpd_stats:incr_reads(); false -> ok end, chttpd_stats:incr_rows(), - couch_mrview_http:view_cb(Msg, Acc); + couch_views_http:view_cb(Msg, Acc); view_cb(Msg, Acc) -> - couch_mrview_http:view_cb(Msg, Acc). + couch_views_http:view_cb(Msg, Acc). handle_view_req(#httpd{method='POST', @@ -111,6 +171,86 @@ assert_no_queries_param(_) -> }). +validate_args(Req, #mrargs{page_size = PageSize} = Args) when is_integer(PageSize) -> + MaxPageSize = max_page_size(Req), + couch_views_util:validate_args(Args, [{page_size, MaxPageSize}]); + +validate_args(_Req, #mrargs{} = Args) -> + couch_views_util:validate_args(Args, []). + + +max_page_size(#httpd{path_parts=[_Db, <<"_all_docs">>, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_all_docs/queries", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_all_docs">>]}) -> + config:get_integer( + "request_limits", "_all_docs", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_local_docs">>, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_all_docs/queries", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_local_docs">>]}) -> + config:get_integer( + "request_limits", "_all_docs", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_design_docs">>, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_all_docs/queries", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_design_docs">>]}) -> + config:get_integer( + "request_limits", "_all_docs", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[ + _Db, <<"_design">>, _DDocName, <<"_view">>, _View, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_view/queries", ?DEFAULT_VIEWS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[ + _Db, <<"_design">>, _DDocName, <<"_view">>, _View]}) -> + config:get_integer( + "request_limits", "_view", ?DEFAULT_VIEWS_PAGE_SIZE). + + +parse_queries(Req, #mrargs{page_size = PageSize} = Args0, Queries, Fun) + when is_integer(PageSize) -> + MaxPageSize = max_page_size(Req), + if length(Queries) < PageSize -> ok; true -> + throw({ + query_parse_error, + <<"Provided number of queries is more than given page_size">> + }) + end, + couch_views_util:validate_args(Fun(Args0), [{page_size, MaxPageSize}]), + Args = Args0#mrargs{page_size = undefined}, + lists:map(fun({Query}) -> + Args1 = couch_views_http:parse_params(Query, undefined, Args, [decoded]), + if not is_integer(Args1#mrargs.page_size) -> ok; true -> + throw({ + query_parse_error, + <<"You cannot specify `page_size` inside the query">> + }) + end, + Args2 = maybe_set_page_size(Args1, MaxPageSize), + couch_views_util:validate_args(Fun(Args2), [{page_size, MaxPageSize}]) + end, Queries); + +parse_queries(_Req, #mrargs{} = Args, Queries, Fun) -> + lists:map(fun({Query}) -> + Args1 = couch_views_http:parse_params(Query, undefined, Args, [decoded]), + couch_views_util:validate_args(Fun(Args1)) + end, Queries). + + +maybe_set_page_size(#mrargs{page_size = undefined} = Args, MaxPageSize) -> + Args#mrargs{page_size = MaxPageSize}; + +maybe_set_page_size(#mrargs{} = Args, _MaxPageSize) -> + Args. + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/couch_mrview/include/couch_mrview.hrl b/src/couch_mrview/include/couch_mrview.hrl index bb0ab0b46..e0f80df81 100644 --- a/src/couch_mrview/include/couch_mrview.hrl +++ b/src/couch_mrview/include/couch_mrview.hrl @@ -81,7 +81,9 @@ conflicts, callback, sorted = true, - extra = [] + extra = [], + page_size = undefined, + bookmark=nil }). -record(vacc, { @@ -95,7 +97,9 @@ bufsize = 0, threshold = 1490, row_sent = false, - meta_sent = false + meta_sent = false, + paginated = false, + meta = #{} }). -record(lacc, { diff --git a/src/couch_mrview/src/couch_mrview_http.erl b/src/couch_mrview/src/couch_mrview_http.erl index 3cf8833d7..e1ba9d656 100644 --- a/src/couch_mrview/src/couch_mrview_http.erl +++ b/src/couch_mrview/src/couch_mrview_http.erl @@ -35,6 +35,8 @@ parse_params/3, parse_params/4, view_cb/2, + row_to_obj/1, + row_to_obj/2, row_to_json/1, row_to_json/2, check_view_etag/3 @@ -413,11 +415,19 @@ prepend_val(#vacc{prepend=Prepend}) -> row_to_json(Row) -> + ?JSON_ENCODE(row_to_obj(Row)). + + +row_to_json(Kind, Row) -> + ?JSON_ENCODE(row_to_obj(Kind, Row)). + + +row_to_obj(Row) -> Id = couch_util:get_value(id, Row), - row_to_json(Id, Row). + row_to_obj(Id, Row). -row_to_json(error, Row) -> +row_to_obj(error, Row) -> % Special case for _all_docs request with KEYS to % match prior behavior. Key = couch_util:get_value(key, Row), @@ -426,9 +436,8 @@ row_to_json(error, Row) -> ReasonProp = if Reason == undefined -> []; true -> [{reason, Reason}] end, - Obj = {[{key, Key}, {error, Val}] ++ ReasonProp}, - ?JSON_ENCODE(Obj); -row_to_json(Id0, Row) -> + {[{key, Key}, {error, Val}] ++ ReasonProp}; +row_to_obj(Id0, Row) -> Id = case Id0 of undefined -> []; Id0 -> [{id, Id0}] @@ -439,8 +448,7 @@ row_to_json(Id0, Row) -> undefined -> []; Doc0 -> [{doc, Doc0}] end, - Obj = {Id ++ [{key, Key}, {value, Val}] ++ Doc}, - ?JSON_ENCODE(Obj). + {Id ++ [{key, Key}, {value, Val}] ++ Doc}. parse_params(#httpd{}=Req, Keys) -> @@ -523,6 +531,8 @@ parse_param(Key, Val, Args, IsDecoded) -> Args#mrargs{end_key_docid=couch_util:to_binary(Val)}; "limit" -> Args#mrargs{limit=parse_pos_int(Val)}; + "page_size" -> + Args#mrargs{page_size=parse_pos_int(Val)}; "stale" when Val == "ok" orelse Val == <<"ok">> -> Args#mrargs{stable=true, update=false}; "stale" when Val == "update_after" orelse Val == <<"update_after">> -> diff --git a/src/couch_views/src/couch_views_http.erl b/src/couch_views/src/couch_views_http.erl new file mode 100644 index 000000000..ae6725649 --- /dev/null +++ b/src/couch_views/src/couch_views_http.erl @@ -0,0 +1,292 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_http). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +-export([ + parse_body_and_query/2, + parse_body_and_query/3, + parse_params/2, + parse_params/4, + row_to_obj/1, + row_to_obj/2, + view_cb/2, + paginated/5, + paginated/6 +]). + +-define(BOOKMARK_VSN, 1). + +parse_body_and_query(#httpd{method='POST'} = Req, Keys) -> + Props = chttpd:json_body_obj(Req), + parse_body_and_query(Req, Props, Keys); + +parse_body_and_query(Req, Keys) -> + parse_params(chttpd:qs(Req), Keys, #mrargs{keys=Keys, group=undefined, + group_level=undefined}, [keep_group_level]). + +parse_body_and_query(Req, {Props}, Keys) -> + Args = #mrargs{keys=Keys, group=undefined, group_level=undefined}, + BodyArgs = parse_params(Props, Keys, Args, [decoded]), + parse_params(chttpd:qs(Req), Keys, BodyArgs, [keep_group_level]). + +parse_params(#httpd{}=Req, Keys) -> + parse_params(chttpd:qs(Req), Keys); +parse_params(Props, Keys) -> + Args = #mrargs{}, + parse_params(Props, Keys, Args). + + +parse_params(Props, Keys, Args) -> + parse_params(Props, Keys, Args, []). + + +parse_params([{"bookmark", Bookmark}], _Keys, #mrargs{}, _Options) -> + bookmark_decode(Bookmark); + +parse_params(Props, Keys, #mrargs{}=Args, Options) -> + case couch_util:get_value("bookmark", Props, nil) of + nil -> + ok; + _ -> + throw({bad_request, "Cannot use `bookmark` with other options"}) + end, + couch_mrview_http:parse_params(Props, Keys, Args, Options). + + +row_to_obj(Row) -> + Id = couch_util:get_value(id, Row), + row_to_obj(Id, Row). + + +row_to_obj(Id, Row) -> + couch_mrview_http:row_to_obj(Id, Row). + + +view_cb(Msg, #vacc{paginated = false}=Acc) -> + couch_mrview_http:view_cb(Msg, Acc); +view_cb(Msg, #vacc{paginated = true}=Acc) -> + paginated_cb(Msg, Acc). + + +paginated_cb({row, Row}, #vacc{buffer=Buf}=Acc) -> + {ok, Acc#vacc{buffer = [row_to_obj(Row) | Buf]}}; + +paginated_cb({error, Reason}, #vacc{}=_Acc) -> + throw({error, Reason}); + +paginated_cb(complete, #vacc{buffer=Buf}=Acc) -> + {ok, Acc#vacc{buffer=lists:reverse(Buf)}}; + +paginated_cb({meta, Meta}, #vacc{}=VAcc) -> + MetaMap = lists:foldl(fun(MetaData, Acc) -> + case MetaData of + {_Key, undefined} -> + Acc; + {total, _Value} -> + %% We set total_rows elsewere + Acc; + {Key, Value} -> + maps:put(list_to_binary(atom_to_list(Key)), Value, Acc) + end + end, #{}, Meta), + {ok, VAcc#vacc{meta=MetaMap}}. + + +paginated(Req, EtagTerm, #mrargs{page_size = PageSize} = Args, KeyFun, Fun) -> + Etag = couch_httpd:make_etag(EtagTerm), + chttpd:etag_respond(Req, Etag, fun() -> + hd(do_paginated(PageSize, [set_limit(Args)], KeyFun, Fun)) + end). + + +paginated(Req, EtagTerm, PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> + Etag = couch_httpd:make_etag(EtagTerm), + chttpd:etag_respond(Req, Etag, fun() -> + Results = do_paginated(PageSize, QueriesArgs, KeyFun, Fun), + #{results => Results} + end). + + +do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> + {_N, Results} = lists:foldl(fun(Args0, {Limit, Acc}) -> + case Limit > 0 of + true -> + Args = set_limit(Args0#mrargs{page_size = Limit}), + {Meta, Items} = Fun(Args), + Result = maybe_add_bookmark( + PageSize, Args, Meta, Items, KeyFun), + #{total_rows := Total} = Result, + {Limit - Total, [Result | Acc]}; + false -> + Bookmark = bookmark_encode(Args0), + Result = #{ + rows => [], + next => Bookmark, + total_rows => 0 + }, + {Limit, [Result | Acc]} + end + end, {PageSize, []}, QueriesArgs), + lists:reverse(Results). + + +maybe_add_bookmark(PageSize, Args0, Response, Items, KeyFun) -> + #mrargs{page_size = Limit} = Args0, + Args = Args0#mrargs{page_size = PageSize}, + case check_completion(Limit, Items) of + {Rows, nil} -> + maps:merge(Response, #{ + rows => Rows, + total_rows => length(Rows) + }); + {Rows, Next} -> + NextKey = KeyFun(Next), + if is_binary(NextKey) -> ok; true -> + throw("Provided KeyFun should return binary") + end, + Bookmark = bookmark_encode(Args#mrargs{start_key=NextKey}), + maps:merge(Response, #{ + rows => Rows, + next => Bookmark, + total_rows => length(Rows) + }) + end. + + +set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) + when is_integer(PageSize) andalso Limit > PageSize -> + Args#mrargs{limit = PageSize + 1}; + +set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) + when is_integer(PageSize) -> + Args#mrargs{limit = Limit + 1}. + + +check_completion(Limit, Items) when length(Items) > Limit -> + case lists:split(Limit, Items) of + {Head, [NextItem | _]} -> + {Head, NextItem}; + {Head, []} -> + {Head, nil} + end; + +check_completion(_Limit, Items) -> + {Items, nil}. + + +bookmark_encode(Args0) -> + Defaults = #mrargs{}, + {RevTerms, Mask, _} = lists:foldl(fun(Value, {Acc, Mask, Idx}) -> + case element(Idx, Defaults) of + Value -> + {Acc, Mask, Idx + 1}; + _Default when Idx == #mrargs.bookmark -> + {Acc, Mask, Idx + 1}; + _Default -> + % Its `(Idx - 1)` because the initial `1` + % value already accounts for one bit. + {[Value | Acc], (1 bsl (Idx - 1)) bor Mask, Idx + 1} + end + end, {[], 0, 1}, tuple_to_list(Args0)), + Terms = lists:reverse(RevTerms), + TermBin = term_to_binary(Terms, [compressed, {minor_version, 2}]), + MaskBin = binary:encode_unsigned(Mask), + RawBookmark = <>, + couch_util:encodeBase64Url(RawBookmark). + + +bookmark_decode(Bookmark) -> + try + RawBin = couch_util:decodeBase64Url(Bookmark), + <> = RawBin, + Mask = binary:decode_unsigned(MaskBin), + Index = mask_to_index(Mask, 1, []), + Terms = binary_to_term(TermBin, [safe]), + lists:foldl(fun({Idx, Value}, Acc) -> + setelement(Idx, Acc, Value) + end, #mrargs{}, lists:zip(Index, Terms)) + catch _:_ -> + throw({bad_request, <<"Invalid bookmark">>}) + end. + + +mask_to_index(0, _Pos, Acc) -> + lists:reverse(Acc); +mask_to_index(Mask, Pos, Acc) when is_integer(Mask), Mask > 0 -> + NewAcc = case Mask band 1 of + 0 -> Acc; + 1 -> [Pos | Acc] + end, + mask_to_index(Mask bsr 1, Pos + 1, NewAcc). + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +bookmark_encode_decode_test() -> + ?assertEqual( + #mrargs{page_size = 5}, + bookmark_decode(bookmark_encode(#mrargs{page_size = 5})) + ), + + Randomized = lists:foldl(fun(Idx, Acc) -> + if Idx == #mrargs.bookmark -> Acc; true -> + setelement(Idx, Acc, couch_uuids:random()) + end + end, #mrargs{}, lists:seq(1, record_info(size, mrargs))), + + ?assertEqual( + Randomized, + bookmark_decode(bookmark_encode(Randomized)) + ). + + +check_completion_test() -> + ?assertEqual( + {[], nil}, + check_completion(1, []) + ), + ?assertEqual( + {[1], nil}, + check_completion(1, [1]) + ), + ?assertEqual( + {[1], 2}, + check_completion(1, [1, 2]) + ), + ?assertEqual( + {[1], 2}, + check_completion(1, [1, 2, 3]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(3, [1, 2]) + ), + ?assertEqual( + {[1, 2, 3], nil}, + check_completion(3, [1, 2, 3]) + ), + ?assertEqual( + {[1, 2, 3], 4}, + check_completion(3, [1, 2, 3, 4]) + ), + ?assertEqual( + {[1, 2, 3], 4}, + check_completion(3, [1, 2, 3, 4, 5]) + ), + ok. +-endif. \ No newline at end of file diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl index 395660c02..154e9e270 100644 --- a/src/couch_views/src/couch_views_util.erl +++ b/src/couch_views/src/couch_views_util.erl @@ -15,7 +15,9 @@ -export([ ddoc_to_mrst/2, - validate_args/1 + validate_args/1, + validate_args/2, + is_paginated/1 ]). @@ -79,10 +81,14 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> {ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}. +validate_args(Args) -> + validate_args(Args, []). + + % This is mostly a copy of couch_mrview_util:validate_args/1 but it doesn't % update start / end keys and also throws a not_implemented error for reduce % -validate_args(#mrargs{} = Args) -> +validate_args(#mrargs{} = Args, Opts) -> GroupLevel = determine_group_level(Args), Reduce = Args#mrargs.reduce, @@ -193,6 +199,24 @@ validate_args(#mrargs{} = Args) -> _ -> mrverror(<<"Invalid value for `sorted`.">>) end, + MaxPageSize = couch_util:get_value(page_size, Opts, 0), + case {Args#mrargs.page_size, MaxPageSize} of + {_, 0} -> ok; + {Value, _} -> validate_limit(<<"page_size">>, Value, 1, MaxPageSize) + end, + + case {Args#mrargs.skip, MaxPageSize} of + {_, 0} -> ok; + {Skip, _} -> validate_limit(<<"skip">>, Skip, 0, MaxPageSize) + end, + + case {is_list(Args#mrargs.keys), is_integer(Args#mrargs.page_size)} of + {true, true} -> + mrverror(<<"`page_size` is incompatible with `keys`">>); + _ -> + ok + end, + case {Reduce, Args#mrargs.view_type} of {false, _} -> ok; {_, red} -> throw(not_implemented); @@ -201,6 +225,31 @@ validate_args(#mrargs{} = Args) -> Args#mrargs{group_level=GroupLevel}. +validate_limit(Name, Value, _Min, _Max) when not is_integer(Value) -> + mrverror(<<"`", Name/binary, "` should be an integer">>); + +validate_limit(Name, Value, Min, Max) when Value > Max -> + range_error_msg(Name, Min, Max); + +validate_limit(Name, Value, Min, Max) when Value < Min -> + range_error_msg(Name, Min, Max); + +validate_limit(_Name, _Value, _Min, _Max) -> + ok. + +range_error_msg(Name, Min, Max) -> + MinBin = list_to_binary(integer_to_list(Min)), + MaxBin = list_to_binary(integer_to_list(Max)), + mrverror(<< + "`", + Name/binary, + "` should be an integer in range [", + MinBin/binary, + " .. ", + MaxBin/binary, + "]" + >>). + determine_group_level(#mrargs{group=undefined, group_level=undefined}) -> 0; @@ -220,3 +269,10 @@ determine_group_level(#mrargs{group_level=GroupLevel}) -> mrverror(Mesg) -> throw({query_parse_error, Mesg}). + + +is_paginated(#mrargs{page_size = PageSize}) when is_integer(PageSize) -> + true; + +is_paginated(_) -> + false. -- cgit v1.2.1 From 02e4c3e96aad6f653af9a9550c8c7a9340fc2e58 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 14 May 2020 12:47:29 -0700 Subject: Add tests for pagination API --- src/chttpd/test/exunit/pagination_test.exs | 771 +++++++++++++++++++++++++++++ 1 file changed, 771 insertions(+) diff --git a/src/chttpd/test/exunit/pagination_test.exs b/src/chttpd/test/exunit/pagination_test.exs index 4b12c8b2f..fcb8f9add 100644 --- a/src/chttpd/test/exunit/pagination_test.exs +++ b/src/chttpd/test/exunit/pagination_test.exs @@ -68,6 +68,52 @@ defmodule Couch.Test.Pagination do %{view_name: "all", ddoc_id: ddoc_id} end + defp all_docs(context) do + assert Map.has_key?(context, :page_size), "Please define '@describetag page_size: 4'" + + assert Map.has_key?(context, :descending), + "Please define '@describetag descending: false'" + + resp = + Couch.Session.get(context.session, "/#{context.db_name}/_all_docs", + query: %{page_size: context.page_size, descending: context.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + %{ + response: resp.body + } + end + + defp paginate(context) do + if Map.has_key?(context.response, "next") do + bookmark = context.response["next"] + pages = Map.get(context, :pages, [context.response]) + assert length(pages) < div(context.n_docs, context.page_size) + 1 + + resp = + Couch.Session.get(context.session, "/#{context.db_name}/_all_docs", + query: %{bookmark: bookmark} + ) + + context = + Map.merge(context, %{ + pages: [resp.body | pages], + response: resp.body + }) + + paginate(context) + else + context = + Map.update(context, :pages, [], fn acc -> + Enum.reverse(acc) + end) + + context + end + end + def create_db(session, db_name, opts \\ []) do retry_until(fn -> resp = Couch.Session.put(session, "/#{db_name}", opts) @@ -298,5 +344,730 @@ defmodule Couch.Test.Pagination do assert q1 == Enum.reverse(q2) assert q1 == Enum.sort(q1) end + + test "ensure we paginate starting from first query", ctx do + queries = %{ + queries: [%{descending: false}, %{descending: true}] + } + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size}, + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + [q1, q2] = resp.body["results"] + q1 = Enum.map(q1["rows"], fn row -> row["id"] end) + q2 = Enum.map(q2["rows"], fn row -> row["id"] end) + assert ctx.page_size == length(q1) + assert q2 == [] + end + end + + describe "Pagination API (10 docs)" do + @describetag n_docs: 10 + @describetag page_size: 4 + setup [:with_session, :random_db, :with_docs] + + test ": _all_docs?page_size=4", ctx do + %{session: session, db_name: db_name} = ctx + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + end + + test ": _all_docs/queries should limit number of queries", ctx do + queries = %{ + queries: [%{}, %{}, %{}, %{}, %{}] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size}, + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "Provided number of queries is more than given page_size" + end + + test ": _all_docs/queries should forbid `page_size` in queries", ctx do + queries = %{ + queries: [%{page_size: 3}] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size}, + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "You cannot specify `page_size` inside the query" + end + + test ": _all_docs should forbid `page_size` and `keys`", ctx do + body = %{ + page_size: 3, + keys: [ + "002", + "004" + ] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs", + body: :jiffy.encode(body) + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "`page_size` is incompatible with `keys`" + end + + test ": _all_docs should limit 'skip' parameter", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{page_size: ctx.page_size, skip: 3000} + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "`skip` should be an integer in range [0 .. 2000]" + end + + test ": _all_docs should forbid extra parameters when 'bookmark' is present", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{page_size: ctx.page_size, skip: 3000, bookmark: ""} + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "Cannot use `bookmark` with other options" + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}&descending=#{ + descending + }" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_docs, :all_docs] + + test "should return 'next' bookmark", ctx do + body = ctx.response + assert Map.has_key?(body, "next") + end + + test "total_rows matches the length of rows array", ctx do + body = ctx.response + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows matches the requested page_size", ctx do + body = ctx.response + assert body["total_rows"] == ctx.page_size + end + + test "can use 'next' bookmark to get remaining results", ctx do + bookmark = ctx.response["next"] + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + assert body["total_rows"] <= ctx.page_size + end + end + + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}&descending=#{ + descending + } : range" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_docs] + + test "start_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {start_pos, doc_ids} = + if ctx.descending do + {head_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), -tail_pos))} + else + {tail_pos, Enum.drop(Enum.sort(doc_ids), tail_pos - 1)} + end + + start_key = ~s("#{docid(start_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "end_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {end_pos, doc_ids} = + if ctx.descending do + {tail_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), tail_pos - 1))} + else + {head_pos, Enum.drop(Enum.sort(doc_ids), -tail_pos)} + end + + end_key = ~s("#{docid(end_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "range between start_key and end_key works", ctx do + head_pos = 2 + slice_size = 3 + doc_ids = Enum.sort(Enum.map(ctx.docs, fn doc -> doc["id"] end)) + # -1 due to 0 based indexing + # -2 is due to 0 based indexing and inclusive end + slice = Enum.slice(doc_ids, (head_pos - 1)..(head_pos + slice_size - 2)) + + {start_key, end_key, doc_ids} = + if ctx.descending do + reversed = Enum.reverse(slice) + [first | _] = reversed + [last | _] = slice + {~s("#{first}"), ~s("#{last}"), reversed} + else + [first | _] = slice + [last | _] = Enum.reverse(slice) + {~s("#{first}"), ~s("#{last}"), slice} + end + + assert length(doc_ids) == slice_size + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}&descending=#{ + descending + } : pages" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_docs, :all_docs, :paginate] + + test "final page doesn't include 'next' bookmark", ctx do + assert not Map.has_key?(ctx.response, "next") + assert ctx.response["total_rows"] == rem(ctx.n_docs, ctx.page_size) + end + + test "each but last page has page_size rows", ctx do + pages = Enum.drop(ctx.pages, -1) + + assert Enum.all?(pages, fn resp -> + length(resp["rows"]) == ctx.page_size + end) + end + + test "sum of rows on all pages is equal to number of documents", ctx do + pages = ctx.pages + n = Enum.reduce(pages, 0, fn resp, acc -> acc + length(resp["rows"]) end) + assert n == ctx.n_docs + end + + test "the rows are correctly sorted", ctx do + pages = ctx.pages + + ids = + Enum.reduce(pages, [], fn resp, acc -> + acc ++ Enum.map(resp["rows"], fn row -> row["id"] end) + end) + + if ctx.descending do + assert Enum.reverse(Enum.sort(ids)) == ids + else + assert Enum.sort(ids) == ids + end + end + end + end + end + + for n <- 10..11 do + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}" do + @describetag n_docs: 10 + @describetag descending: false + @describetag page_size: n + setup [:with_session, :random_db, :with_docs, :all_docs] + + test "should not return 'next' bookmark", ctx do + body = ctx.response + assert not Map.has_key?(body, "next") + end + + test "total_rows matches the length of rows array", ctx do + body = ctx.response + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows less than the requested page_size", ctx do + body = ctx.response + assert body["total_rows"] <= ctx.page_size + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : _all_docs/queries?page_size=#{n}&descending=#{ + descending + } : pages" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + + @describetag queries: %{ + queries: [ + %{ + descending: true + }, + %{ + limit: n + 1, + skip: 2 + } + ] + } + + setup [:with_session, :random_db, :with_docs] + + test "one of the results contains 'next' bookmark", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "each 'next' bookmark is working", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + bookmarks = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> Map.get(result, "next") end) + + assert [] != bookmarks + + Enum.each(bookmarks, fn bookmark -> + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert [] != resp.body["rows"] + end) + + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "can post bookmarks to queries", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + queries = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert [] != result["rows"] + end) + end + + test "respect request page_size", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + Enum.each(results ++ resp.body["results"], fn result -> + assert length(result["rows"]) <= ctx.page_size + end) + end + + test "independent page_size in the bookmark", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + queries = + resp.body["results"] + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert length(result["rows"]) > ctx.page_size + end) + end + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view?page_size=#{n}&descending=#{ + descending + }" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_view, :with_docs] + + test "should return 'next' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert Map.has_key?(resp.body, "next") + end + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows matches the requested page_size", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert resp.body["total_rows"] == ctx.page_size + end + + test "can use 'next' bookmark to get remaining results", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + bookmark = resp.body["next"] + + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + assert body["total_rows"] <= ctx.page_size + end + end + end + end + + for n <- 10..11 do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view?page_size=#{n}" do + @describetag n_docs: 10 + @describetag descending: false + @describetag page_size: n + setup [:with_session, :random_db, :with_view, :with_docs] + + test "should not return 'next' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert not Map.has_key?(resp.body, "next") + end + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows less than the requested page_size", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert resp.body["total_rows"] <= ctx.page_size + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view/queries?page_size=#{ + n + }&descending=#{descending} : pages" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + + @describetag queries: %{ + queries: [ + %{ + descending: true + }, + %{ + limit: n + 1, + skip: 2 + } + ] + } + setup [:with_session, :random_db, :with_view, :with_docs] + + test "one of the results contains 'next' bookmark", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "each 'next' bookmark is working", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + bookmarks = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> Map.get(result, "next") end) + + assert [] != bookmarks + + Enum.each(bookmarks, fn bookmark -> + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert [] != resp.body["rows"] + end) + + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "can post bookmarks to queries", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + queries = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert [] != result["rows"] + end) + end + + test "respect request page_size", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + Enum.each(results ++ resp.body["results"], fn result -> + assert length(result["rows"]) <= ctx.page_size + end) + end + + test "independent page_size in the bookmark", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + queries = + resp.body["results"] + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert length(result["rows"]) > ctx.page_size + end) + end + end + end end end -- cgit v1.2.1 From 6f2417e1af712b3720cf6c07713d7751cbc9fbef Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 15 May 2020 17:17:50 -0400 Subject: Fix flaky couch_jobs type monitor test Sometimes this test fails on Jenkins but doesn't fail locally. The attempted fix is to make sure to simply retry a few times for the number of children in the supervisor to be the expected values. Also extend the timeout to 15 seconds. --- src/couch_jobs/test/couch_jobs_tests.erl | 38 ++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl index fbe4e93a0..11572a4b9 100644 --- a/src/couch_jobs/test/couch_jobs_tests.erl +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -207,33 +207,67 @@ resubmit_as_job_creator(#{t1 := T, j1 := J}) -> type_timeouts_and_server(#{t1 := T, t1_timeout := T1Timeout}) -> - ?_test(begin + {timeout, 15, ?_test(begin + + WaitForActivityMonitors = fun(N) -> + test_util:wait(fun() -> + Pids = couch_jobs_activity_monitor_sup:get_child_pids(), + case length(Pids) == N of + true -> ok; + false -> wait + end + end) + end, + + WaitForNotifiers = fun(N) -> + test_util:wait(fun() -> + Pids = couch_jobs_notifier_sup:get_child_pids(), + case length(Pids) == N of + true -> ok; + false -> wait + end + end) + end, + couch_jobs_server:force_check_types(), ?assertEqual(T1Timeout, couch_jobs:get_type_timeout(T)), + WaitForActivityMonitors(2), ?assertEqual(2, length(couch_jobs_activity_monitor_sup:get_child_pids())), + + WaitForNotifiers(2), ?assertEqual(2, length(couch_jobs_notifier_sup:get_child_pids())), + ?assertMatch({ok, _}, couch_jobs_server:get_notifier_server(T)), ?assertEqual(ok, couch_jobs:set_type_timeout(<<"t3">>, 8)), couch_jobs_server:force_check_types(), + + WaitForActivityMonitors(3), ?assertEqual(3, length(couch_jobs_activity_monitor_sup:get_child_pids())), + + WaitForNotifiers(3), ?assertEqual(3, length(couch_jobs_notifier_sup:get_child_pids())), ?assertEqual(ok, couch_jobs:clear_type_timeout(<<"t3">>)), couch_jobs_server:force_check_types(), + + WaitForActivityMonitors(2), ?assertEqual(2, length(couch_jobs_activity_monitor_sup:get_child_pids())), + + WaitForNotifiers(2), ?assertEqual(2, length(couch_jobs_notifier_sup:get_child_pids())), + ?assertMatch({error, _}, couch_jobs_server:get_notifier_server(<<"t3">>)), ?assertEqual(not_found, couch_jobs:get_type_timeout(<<"t3">>)) - end). + end)}. dead_notifier_restarts_jobs_server(#{}) -> -- cgit v1.2.1 From 03992009e788d631b1a09aff3cfb88f97f73ce23 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Sun, 17 May 2020 03:17:04 -0400 Subject: Fix license file --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 048ee41a5..e578d3654 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2020 The Apache Foundation + Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. -- cgit v1.2.1 From 4f7d1d97fd7d960f7ef6e9f1764bfd6e55ba8e0c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 14 May 2020 16:17:58 +0100 Subject: allow configurability of JWT claims that require a value e.g; [jwt] required_claims = {iss, "https://example.com/issuer"} --- rel/overlay/etc/default.ini | 4 +- src/couch/src/couch_httpd.erl | 2 + src/couch/src/couch_httpd_auth.erl | 19 +++++++--- test/elixir/test/jwtauth_test.exs | 77 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 6 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 6fe2260b4..057ed4c1c 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -142,7 +142,9 @@ max_db_number_for_dbs_info_req = 100 ;[jwt_auth] ; List of claims to validate -; required_claims = +; can be the name of a claim like "exp" or a tuple if the claim requires +; a parameter +; required_claims = exp, {iss, "IssuerNameHere"} ; ; [jwt_keys] ; Configure at least one key here if using the JWT auth handler. diff --git a/src/couch/src/couch_httpd.erl b/src/couch/src/couch_httpd.erl index ef90d6b2a..8f7fedd5e 100644 --- a/src/couch/src/couch_httpd.erl +++ b/src/couch/src/couch_httpd.erl @@ -931,6 +931,8 @@ error_info({error, {illegal_database_name, Name}}) -> {400, <<"illegal_database_name">>, Message}; error_info({missing_stub, Reason}) -> {412, <<"missing_stub">>, Reason}; +error_info({misconfigured_server, Reason}) -> + {500, <<"misconfigured_server">>, couch_util:to_binary(Reason)}; error_info({Error, Reason}) -> {500, couch_util:to_binary(Error), couch_util:to_binary(Reason)}; error_info(Error) -> diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 2383be798..0d3add0c8 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -209,13 +209,22 @@ jwt_authentication_handler(Req) -> get_configured_claims() -> Claims = config:get("jwt_auth", "required_claims", ""), - case re:split(Claims, "\s*,\s*", [{return, list}]) of - [[]] -> - []; %% if required_claims is the empty string. - List -> - [list_to_existing_atom(C) || C <- List] + Re = "((?[a-z]+)|{(?[a-z]+)\s*,\s*\"(?[^\"]+)\"})", + case re:run(Claims, Re, [global, {capture, [key1, key2, val], binary}]) of + nomatch when Claims /= "" -> + couch_log:error("[jwt_auth] required_claims is set to an invalid value.", []), + throw({misconfigured_server, <<"JWT is not configured correctly">>}); + nomatch -> + []; + {match, Matches} -> + lists:map(fun to_claim/1, Matches) end. +to_claim([Key, <<>>, <<>>]) -> + binary_to_atom(Key, latin1); +to_claim([<<>>, Key, Value]) -> + {binary_to_atom(Key, latin1), Value}. + cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index 2fb89c3af..7281ed146 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -137,4 +137,81 @@ defmodule JwtAuthTest do assert resp.body["userCtx"]["name"] == "adm" assert resp.body["info"]["authenticated"] == "default" end + + test "jwt auth with required iss claim", _context do + + secret = "zxczxc12zxczxc12" + + server_config = [ + %{ + :section => "jwt_auth", + :key => "required_claims", + :value => "{iss, \"hello\"}" + }, + %{ + :section => "jwt_keys", + :key => "hmac:_default", + :value => :base64.encode(secret) + }, + %{ + :section => "jwt_auth", + :key => "allowed_algorithms", + :value => "HS256, HS384, HS512" + } + ] + + run_on_modified_server(server_config, fn -> good_iss("HS256", secret) end) + run_on_modified_server(server_config, fn -> bad_iss("HS256", secret) end) + end + + def good_iss(alg, key) do + {:ok, token} = :jwtf.encode( + { + [ + {"alg", alg}, + {"typ", "JWT"} + ] + }, + { + [ + {"iss", "hello"}, + {"sub", "couch@apache.org"}, + {"_couchdb.roles", ["testing"] + } + ] + }, key) + + resp = Couch.get("/_session", + headers: [authorization: "Bearer #{token}"] + ) + + assert resp.body["userCtx"]["name"] == "couch@apache.org" + assert resp.body["userCtx"]["roles"] == ["testing"] + assert resp.body["info"]["authenticated"] == "jwt" + end + + def bad_iss(alg, key) do + {:ok, token} = :jwtf.encode( + { + [ + {"alg", alg}, + {"typ", "JWT"} + ] + }, + { + [ + {"iss", "goodbye"}, + {"sub", "couch@apache.org"}, + {"_couchdb.roles", ["testing"] + } + ] + }, key) + + resp = Couch.get("/_session", + headers: [authorization: "Bearer #{token}"] + ) + + assert resp.status_code == 400 + end + end -- cgit v1.2.1 From fad38281474813f8479c7fb71862555b7f381755 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Wed, 20 May 2020 12:50:46 -0700 Subject: Fix handling of limit query parameter --- src/chttpd/test/exunit/pagination_test.exs | 49 +++++++++++++++++++++++ src/couch_views/src/couch_views_http.erl | 63 +++++++++++++++++++++--------- 2 files changed, 93 insertions(+), 19 deletions(-) diff --git a/src/chttpd/test/exunit/pagination_test.exs b/src/chttpd/test/exunit/pagination_test.exs index fcb8f9add..140a5dc88 100644 --- a/src/chttpd/test/exunit/pagination_test.exs +++ b/src/chttpd/test/exunit/pagination_test.exs @@ -384,6 +384,55 @@ defmodule Couch.Test.Pagination do assert resp.status_code == 200, "got error #{inspect(resp.body)}" end + test ": _all_docs?page_size=4 should respect limit", ctx do + %{session: session, db_name: db_name} = ctx + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size - 2} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size - 2 + assert not Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size - 1} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size - 1 + assert not Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size + assert not Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size + 1} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size + assert Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size + 2} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size + assert Map.has_key?(resp.body, "next") + end + test ": _all_docs/queries should limit number of queries", ctx do queries = %{ queries: [%{}, %{}, %{}, %{}, %{}] diff --git a/src/couch_views/src/couch_views_http.erl b/src/couch_views/src/couch_views_http.erl index ae6725649..b9bc2b3c0 100644 --- a/src/couch_views/src/couch_views_http.erl +++ b/src/couch_views/src/couch_views_http.erl @@ -108,7 +108,7 @@ paginated_cb({meta, Meta}, #vacc{}=VAcc) -> paginated(Req, EtagTerm, #mrargs{page_size = PageSize} = Args, KeyFun, Fun) -> Etag = couch_httpd:make_etag(EtagTerm), chttpd:etag_respond(Req, Etag, fun() -> - hd(do_paginated(PageSize, [set_limit(Args)], KeyFun, Fun)) + hd(do_paginated(PageSize, [Args], KeyFun, Fun)) end). @@ -124,10 +124,10 @@ do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> {_N, Results} = lists:foldl(fun(Args0, {Limit, Acc}) -> case Limit > 0 of true -> - Args = set_limit(Args0#mrargs{page_size = Limit}), + {OriginalLimit, Args} = set_limit(Args0#mrargs{page_size = Limit}), {Meta, Items} = Fun(Args), Result = maybe_add_bookmark( - PageSize, Args, Meta, Items, KeyFun), + OriginalLimit, PageSize, Args, Meta, Items, KeyFun), #{total_rows := Total} = Result, {Limit - Total, [Result | Acc]}; false -> @@ -143,10 +143,9 @@ do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> lists:reverse(Results). -maybe_add_bookmark(PageSize, Args0, Response, Items, KeyFun) -> - #mrargs{page_size = Limit} = Args0, - Args = Args0#mrargs{page_size = PageSize}, - case check_completion(Limit, Items) of +maybe_add_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> + #mrargs{page_size = RequestedLimit} = Args0, + case check_completion(OriginalLimit, RequestedLimit, Items) of {Rows, nil} -> maps:merge(Response, #{ rows => Rows, @@ -157,6 +156,7 @@ maybe_add_bookmark(PageSize, Args0, Response, Items, KeyFun) -> if is_binary(NextKey) -> ok; true -> throw("Provided KeyFun should return binary") end, + Args = Args0#mrargs{page_size = PageSize}, Bookmark = bookmark_encode(Args#mrargs{start_key=NextKey}), maps:merge(Response, #{ rows => Rows, @@ -168,14 +168,23 @@ maybe_add_bookmark(PageSize, Args0, Response, Items, KeyFun) -> set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) when is_integer(PageSize) andalso Limit > PageSize -> - Args#mrargs{limit = PageSize + 1}; + {Limit, Args#mrargs{limit = PageSize + 1}}; set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) when is_integer(PageSize) -> - Args#mrargs{limit = Limit + 1}. + {Limit, Args#mrargs{limit = Limit + 1}}. -check_completion(Limit, Items) when length(Items) > Limit -> +check_completion(OriginalLimit, RequestedLimit, Items) + when is_integer(OriginalLimit) andalso OriginalLimit =< RequestedLimit -> + {Rows, _} = split(OriginalLimit, Items), + {Rows, nil}; + +check_completion(_OriginalLimit, RequestedLimit, Items) -> + split(RequestedLimit, Items). + + +split(Limit, Items) when length(Items) > Limit -> case lists:split(Limit, Items) of {Head, [NextItem | _]} -> {Head, NextItem}; @@ -183,7 +192,7 @@ check_completion(Limit, Items) when length(Items) > Limit -> {Head, nil} end; -check_completion(_Limit, Items) -> +split(_Limit, Items) -> {Items, nil}. @@ -258,35 +267,51 @@ bookmark_encode_decode_test() -> check_completion_test() -> ?assertEqual( {[], nil}, - check_completion(1, []) + check_completion(100, 1, []) ), ?assertEqual( {[1], nil}, - check_completion(1, [1]) + check_completion(100, 1, [1]) ), ?assertEqual( {[1], 2}, - check_completion(1, [1, 2]) + check_completion(100, 1, [1, 2]) ), ?assertEqual( {[1], 2}, - check_completion(1, [1, 2, 3]) + check_completion(100, 1, [1, 2, 3]) ), ?assertEqual( {[1, 2], nil}, - check_completion(3, [1, 2]) + check_completion(100, 3, [1, 2]) ), ?assertEqual( {[1, 2, 3], nil}, - check_completion(3, [1, 2, 3]) + check_completion(100, 3, [1, 2, 3]) ), ?assertEqual( {[1, 2, 3], 4}, - check_completion(3, [1, 2, 3, 4]) + check_completion(100, 3, [1, 2, 3, 4]) ), ?assertEqual( {[1, 2, 3], 4}, - check_completion(3, [1, 2, 3, 4, 5]) + check_completion(100, 3, [1, 2, 3, 4, 5]) + ), + ?assertEqual( + {[1], nil}, + check_completion(1, 1, [1]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(2, 3, [1, 2]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(2, 3, [1, 2, 3]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(2, 3, [1, 2, 3, 4, 5]) ), ok. -endif. \ No newline at end of file -- cgit v1.2.1 From 51694f74911683ca009af42779e6fc362c6fdc5a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 19 May 2020 12:52:02 -0400 Subject: Bulk docs transaction batching * Interactive (regular) requests are split into smaller transactions, so larger updates won't fail with either timeout so or transaction too large FDB errors. * Non-interactive (replicated) requests can now batch their updates in a few transaction and gain extra performance. Batch size is configurable: ``` [fabric] update_docs_batch_size = 5000000 ``` --- rebar.config.script | 2 +- rel/overlay/etc/default.ini | 3 + src/fabric/src/fabric2_db.erl | 173 ++++++++++++++++---- src/fabric/src/fabric2_fdb.erl | 8 + src/fabric/test/fabric2_update_docs_tests.erl | 222 ++++++++++++++++++++++++++ 5 files changed, 379 insertions(+), 29 deletions(-) create mode 100644 src/fabric/test/fabric2_update_docs_tests.erl diff --git a/rebar.config.script b/rebar.config.script index 03c380f46..c145566a3 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -152,7 +152,7 @@ DepDescs = [ %% Independent Apps {config, "config", {tag, "2.1.7"}}, {b64url, "b64url", {tag, "1.0.2"}}, -{erlfdb, "erlfdb", {tag, "v1.2.0"}}, +{erlfdb, "erlfdb", {tag, "v1.2.1"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, {khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.4"}}, diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 66680a4e8..35e5147b2 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -242,6 +242,9 @@ port = 6984 ; ; Byte size of binary chunks written to FDB values. Defaults to FDB max limit. ;binary_chunk_size = 100000 +; +; Bulk docs transaction batch size in bytes +;update_docs_batch_size = 5000000 ; [rexi] ; buffer_count = 2000 diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 8764d4e18..6540e0b6d 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -156,6 +156,19 @@ -define(RETURN(Term), throw({?MODULE, Term})). +-define(DEFAULT_UPDATE_DOCS_BATCH_SIZE, 5000000). + + +-record(bacc, { + db, + docs, + batch_size, + options, + rev_futures, + seen, + results +}). + create(DbName, Options) -> case validate_dbname(DbName) of @@ -861,18 +874,8 @@ update_docs(Db, Docs0, Options) -> Docs1 = apply_before_doc_update(Db, Docs0, Options), try validate_atomic_update(Docs0, lists:member(all_or_nothing, Options)), - Resps0 = case lists:member(replicated_changes, Options) of - false -> - fabric2_fdb:transactional(Db, fun(TxDb) -> - update_docs_interactive(TxDb, Docs1, Options) - end); - true -> - lists:map(fun(Doc) -> - fabric2_fdb:transactional(Db, fun(TxDb) -> - update_doc_int(TxDb, Doc, Options) - end) - end, Docs1) - end, + + Resps0 = batch_update_docs(Db, Docs1, Options), % Notify index builder fabric2_index:db_updated(name(Db)), @@ -895,7 +898,7 @@ update_docs(Db, Docs0, Options) -> Else end end, Resps0), - case lists:member(replicated_changes, Options) of + case is_replicated(Options) of true -> {ok, lists:flatmap(fun(R) -> case R of @@ -1647,9 +1650,8 @@ update_doc_int(#{} = Db, #doc{} = Doc, Options) -> <> -> true; _ -> false end, - IsReplicated = lists:member(replicated_changes, Options), try - case {IsLocal, IsReplicated} of + case {IsLocal, is_replicated(Options)} of {false, false} -> update_doc_interactive(Db, Doc, Options); {false, true} -> update_doc_replicated(Db, Doc, Options); {true, _} -> update_local_doc(Db, Doc, Options) @@ -1659,17 +1661,119 @@ update_doc_int(#{} = Db, #doc{} = Doc, Options) -> end. -update_docs_interactive(Db, Docs0, Options) -> - Docs = tag_docs(Docs0), - Futures = get_winning_rev_futures(Db, Docs), - {Result, _} = lists:mapfoldl(fun(Doc, SeenIds) -> - try - update_docs_interactive(Db, Doc, Options, Futures, SeenIds) - catch throw:{?MODULE, Return} -> - {Return, SeenIds} +batch_update_docs(Db, Docs, Options) -> + BAcc = #bacc{ + db = Db, + docs = Docs, + batch_size = get_batch_size(Options), + options = Options, + rev_futures = #{}, + seen = [], + results = [] + }, + #bacc{results = Res} = batch_update_docs(BAcc), + lists:reverse(Res). + + +batch_update_docs(#bacc{docs = []} = BAcc) -> + BAcc; + +batch_update_docs(#bacc{db = Db} = BAcc) -> + #bacc{ + db = Db, + docs = Docs, + options = Options + } = BAcc, + + BAccTx2 = fabric2_fdb:transactional(Db, fun(TxDb) -> + BAccTx = BAcc#bacc{db = TxDb}, + case is_replicated(Options) of + false -> + Tagged = tag_docs(Docs), + RevFutures = get_winning_rev_futures(TxDb, Tagged), + BAccTx1 = BAccTx#bacc{ + docs = Tagged, + rev_futures = RevFutures + }, + batch_update_interactive_tx(BAccTx1); + true -> + BAccTx1 = batch_update_replicated_tx(BAccTx), + % For replicated updates reset `seen` after every transaction + BAccTx1#bacc{seen = []} end - end, [], Docs), - Result. + end), + + % Clean up after the transaction ends so we can recurse with a clean state + maps:map(fun(Tag, RangeFuture) when is_reference(Tag) -> + ok = erlfdb:cancel(RangeFuture, [flush]) + end, BAccTx2#bacc.rev_futures), + + BAcc1 = BAccTx2#bacc{ + db = Db, + rev_futures = #{} + }, + + batch_update_docs(BAcc1). + + +batch_update_interactive_tx(#bacc{docs = []} = BAcc) -> + BAcc; + +batch_update_interactive_tx(#bacc{} = BAcc) -> + #bacc{ + db = TxDb, + docs = [Doc | Docs], + options = Options, + batch_size = MaxSize, + rev_futures = RevFutures, + seen = Seen, + results = Results + } = BAcc, + {Res, Seen1} = try + update_docs_interactive(TxDb, Doc, Options, RevFutures, Seen) + catch throw:{?MODULE, Return} -> + {Return, Seen} + end, + BAcc1 = BAcc#bacc{ + docs = Docs, + results = [Res | Results], + seen = Seen1 + }, + case fabric2_fdb:get_approximate_tx_size(TxDb) > MaxSize of + true -> BAcc1; + false -> batch_update_interactive_tx(BAcc1) + end. + + +batch_update_replicated_tx(#bacc{docs = []} = BAcc) -> + BAcc; + +batch_update_replicated_tx(#bacc{} = BAcc) -> + #bacc{ + db = TxDb, + docs = [Doc | Docs], + options = Options, + batch_size = MaxSize, + seen = Seen, + results = Results + } = BAcc, + case lists:member(Doc#doc.id, Seen) of + true -> + % If we already updated this doc in the current transaction, wait + % till the next transaction to update it again. + BAcc; + false -> + Res = update_doc_int(TxDb, Doc, Options), + BAcc1 = BAcc#bacc{ + docs = Docs, + results = [Res | Results], + seen = [Doc#doc.id | Seen] + }, + case fabric2_fdb:get_approximate_tx_size(TxDb) > MaxSize of + true -> BAcc1; + false -> batch_update_replicated_tx(BAcc1) + end + end. update_docs_interactive(Db, #doc{id = <>} = Doc, @@ -2122,9 +2226,8 @@ doc_to_revid(#doc{revs = Revs}) -> tag_docs([]) -> []; tag_docs([#doc{meta = Meta} = Doc | Rest]) -> - NewDoc = Doc#doc{ - meta = [{ref, make_ref()} | Meta] - }, + Meta1 = lists:keystore(ref, 1, Meta, {ref, make_ref()}), + NewDoc = Doc#doc{meta = Meta1}, [NewDoc | tag_docs(Rest)]. @@ -2226,3 +2329,17 @@ get_cached_db(#{} = Db, Opts) when is_list(Opts) -> fabric2_fdb:ensure_current(TxDb) end) end. + + +is_replicated(Options) when is_list(Options) -> + lists:member(replicated_changes, Options). + + +get_batch_size(Options) -> + case fabric2_util:get_value(batch_size, Options) of + undefined -> + config:get_integer("fabric", "update_docs_batch_size", + ?DEFAULT_UPDATE_DOCS_BATCH_SIZE); + Val when is_integer(Val) -> + Val + end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index f274aa606..e8f6e0daa 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -75,6 +75,8 @@ new_versionstamp/1, + get_approximate_tx_size/1, + debug_cluster/0, debug_cluster/2 ]). @@ -1159,6 +1161,12 @@ new_versionstamp(Tx) -> {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF, TxId}. +get_approximate_tx_size(#{} = TxDb) -> + require_transaction(TxDb), + #{tx := Tx} = TxDb, + erlfdb:wait(erlfdb:get_approximate_size(Tx)). + + debug_cluster() -> debug_cluster(<<>>, <<16#FE, 16#FF, 16#FF>>). diff --git a/src/fabric/test/fabric2_update_docs_tests.erl b/src/fabric/test/fabric2_update_docs_tests.erl new file mode 100644 index 000000000..5a2389abf --- /dev/null +++ b/src/fabric/test/fabric2_update_docs_tests.erl @@ -0,0 +1,222 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_update_docs_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +update_docs_test_() -> + { + "Test update_docs", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(update_docs), + ?TDEF_FE(update_docs_replicated), + ?TDEF_FE(update_docs_batches), + ?TDEF_FE(update_docs_replicated_batches), + ?TDEF_FE(update_docs_duplicate_ids_conflict), + ?TDEF_FE(update_docs_duplicate_ids_with_batches), + ?TDEF_FE(update_docs_replicate_batches_duplicate_id) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + meck:new(erlfdb, [passthrough]), + Db. + + +cleanup(#{} = Db) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +update_docs(Db) -> + ?assertEqual({ok, []}, fabric2_db:update_docs(Db, [])), + + Doc1 = doc(), + Res1 = fabric2_db:update_docs(Db, [Doc1]), + ?assertMatch({ok, [_]}, Res1), + {ok, [Doc1Res]} = Res1, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), + {ok, {1, Rev1}} = Doc1Res, + {ok, Doc1Open} = fabric2_db:open_doc(Db, Doc1#doc.id), + ?assertEqual(Doc1#doc{revs = {1, [Rev1]}}, Doc1Open), + + Doc2 = doc(), + Doc3 = doc(), + Res2 = fabric2_db:update_docs(Db, [Doc2, Doc3]), + ?assertMatch({ok, [_, _]}, Res2), + {ok, [Doc2Res, Doc3Res]} = Res2, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc2Res), + ?assertMatch({ok, {1, <<_/binary>>}}, Doc3Res). + + +update_docs_replicated(Db) -> + Opts = [replicated_changes], + + ?assertEqual({ok, []}, fabric2_db:update_docs(Db, [], Opts)), + + Doc1 = doc(10, {1, [rev()]}), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, [Doc1], Opts)), + {ok, Doc1Open} = fabric2_db:open_doc(Db, Doc1#doc.id), + ?assertEqual(Doc1, Doc1Open), + + Doc2 = doc(10, {1, [rev()]}), + Doc3 = doc(10, {1, [rev()]}), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, [Doc2, Doc3], Opts)), + {ok, Doc2Open} = fabric2_db:open_doc(Db, Doc2#doc.id), + ?assertEqual(Doc2, Doc2Open), + {ok, Doc3Open} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual(Doc3, Doc3Open). + + +update_docs_batches(Db) -> + Opts = [{batch_size, 5000}], + + Docs1 = [doc(9000), doc(9000)], + + meck:reset(erlfdb), + ?assertMatch({ok, [_ | _]}, fabric2_db:update_docs(Db, Docs1, Opts)), + ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertMatch({ok, #doc{}}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs1), + + Docs2 = [doc(10), doc(10), doc(9000), doc(10)], + + meck:reset(erlfdb), + ?assertMatch({ok, [_ | _]}, fabric2_db:update_docs(Db, Docs2, Opts)), + ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertMatch({ok, #doc{}}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs2). + + +update_docs_replicated_batches(Db) -> + Opts = [{batch_size, 5000}, replicated_changes], + + Docs1 = [doc(Size, {1, [rev()]}) || Size <- [9000, 9000]], + + meck:reset(erlfdb), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs1, Opts)), + ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs1), + + Docs2 = [doc(Size, {1, [rev()]}) || Size <- [10, 10, 9000, 10]], + + meck:reset(erlfdb), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs2, Opts)), + ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs2). + + +update_docs_duplicate_ids_conflict(Db) -> + Doc = doc(), + + Res = fabric2_db:update_docs(Db, [Doc, doc(), Doc]), + ?assertMatch({ok, [_, _, _]}, Res), + + {ok, [Doc1Res, Doc2Res, Doc3Res]} = Res, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), + ?assertMatch({ok, {1, <<_/binary>>}}, Doc2Res), + ?assertMatch(conflict, Doc3Res). + + +update_docs_duplicate_ids_with_batches(Db) -> + Opts = [{batch_size, 5000}], + + Doc = doc(9000), + + meck:reset(erlfdb), + Res = fabric2_db:update_docs(Db, [Doc, doc(9000), Doc], Opts), + ?assertMatch({ok, [_, _, _]}, Res), + ?assertEqual(3, meck:num_calls(erlfdb, transactional, 2)), + + {ok, [Doc1Res, Doc2Res, Doc3Res]} = Res, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), + ?assertMatch({ok, {1, <<_/binary>>}}, Doc2Res), + ?assertMatch(conflict, Doc3Res). + + +update_docs_replicate_batches_duplicate_id(Db) -> + Opts = [replicated_changes], + + Doc = doc(10, {1, [rev()]}), + Docs = [Doc, Doc], + + meck:reset(erlfdb), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs, Opts)), + ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), + + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +% Utility functions + +doc() -> + doc(2). + + +doc(Size) -> + doc(Size, undefined). + + +doc(Size, Revs) -> + Doc = #doc{ + id = fabric2_util:uuid(), + body = doc_body(Size) + }, + case Revs of + undefined -> Doc; + _ -> Doc#doc{revs = Revs} + end. + + +rev() -> + fabric2_util:to_hex(crypto:strong_rand_bytes(16)). + + +doc_body(Size) when is_integer(Size), Size >= 2 -> + Val = fabric2_util:to_hex(crypto:strong_rand_bytes(Size div 2)), + {[{<<"x">>, Val}]}. -- cgit v1.2.1 From 44935aca8fdbf5f38027d6cb3bb11c678b3c9330 Mon Sep 17 00:00:00 2001 From: jiangph Date: Thu, 21 May 2020 10:36:31 +0800 Subject: Improve log of permanently deleting databases --- src/fabric/src/fabric2_db_expiration.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fabric/src/fabric2_db_expiration.erl b/src/fabric/src/fabric2_db_expiration.erl index 3363d2427..92f22e749 100644 --- a/src/fabric/src/fabric2_db_expiration.erl +++ b/src/fabric/src/fabric2_db_expiration.erl @@ -180,8 +180,8 @@ process_row(DbInfo) -> Since = Now - Retention, case Since >= timestamp_to_sec(TimeStamp) of true -> - couch_log:notice("Permanently deleting ~p database with" - " timestamp ~p", [DbName, TimeStamp]), + couch_log:notice("Permanently deleting ~s database with" + " timestamp ~s", [DbName, TimeStamp]), ok = fabric2_db:delete(DbName, [{deleted_at, TimeStamp}]); false -> ok -- cgit v1.2.1 From 850cc1268574c24520111cd9b7e1d896b2474c6e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 21 May 2020 16:03:50 +0100 Subject: make jwtf_keystore compatible with erlang 19 --- src/jwtf/src/jwtf_keystore.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl index be261e67c..3f7d2dc9c 100644 --- a/src/jwtf/src/jwtf_keystore.erl +++ b/src/jwtf/src/jwtf_keystore.erl @@ -140,7 +140,7 @@ get_from_config(Kty, KID) -> end. pem_decode(PEM) -> - BinPEM = iolist_to_binary(string:replace(PEM, "\\n", "\n", all)), + BinPEM = iolist_to_binary(lists:join("\n", string:split(PEM, "\\n", all))), case public_key:pem_decode(BinPEM) of [PEMEntry] -> public_key:pem_entry_decode(PEMEntry); -- cgit v1.2.1 From e245aa017015291c3e8e83f418c513c75372c3c5 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 21 May 2020 18:34:02 +0100 Subject: make jwtf_keystore compatible with erlang 19 for real this time --- src/jwtf/src/jwtf_keystore.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl index 3f7d2dc9c..c2d80b9cb 100644 --- a/src/jwtf/src/jwtf_keystore.erl +++ b/src/jwtf/src/jwtf_keystore.erl @@ -140,7 +140,7 @@ get_from_config(Kty, KID) -> end. pem_decode(PEM) -> - BinPEM = iolist_to_binary(lists:join("\n", string:split(PEM, "\\n", all))), + BinPEM = re:replace(PEM, "\\\\n", "\n", [global, {return, binary}]), case public_key:pem_decode(BinPEM) of [PEMEntry] -> public_key:pem_entry_decode(PEMEntry); -- cgit v1.2.1 From 5bade6fde84c46c472ab64967563cb07a239f026 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 21 May 2020 18:40:55 -0400 Subject: Improve load handling in couch_jobs and couch_views Increase couch_views job timeout by 20 seconds. This will set a larger jitter when multiple nodes concurrently check and re-equeue jobs. It would reduce the chance of them bumping into each other and conflicting. If they do conflict in activity monitor, catch the error and emit an error log. We gain some more robustness under load for a longer timeout for jobs with workers that have suddenly died getting re-enqueued. --- src/couch_jobs/src/couch_jobs_activity_monitor.erl | 9 ++++++++- src/couch_views/src/couch_views_jobs.erl | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor.erl b/src/couch_jobs/src/couch_jobs_activity_monitor.erl index ef82e6bd9..6f50d9653 100644 --- a/src/couch_jobs/src/couch_jobs_activity_monitor.erl +++ b/src/couch_jobs/src/couch_jobs_activity_monitor.erl @@ -65,7 +65,14 @@ handle_cast(Msg, St) -> handle_info(check_activity, St) -> - St1 = check_activity(St), + St1 = try + check_activity(St) + catch + {error, {erlfdb_error, 1020}} -> + LogMsg = "~p : type:~p got 1020 error, possibly from overload", + couch_log:error(LogMsg, [?MODULE, St#st.type]), + St + end, St2 = schedule_check(St1), {noreply, St2}; diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index 909e9234f..a9ca168ee 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -31,7 +31,7 @@ set_timeout() -> - couch_jobs:set_type_timeout(?INDEX_JOB_TYPE, 6). + couch_jobs:set_type_timeout(?INDEX_JOB_TYPE, 26). build_view(TxDb, Mrst, UpdateSeq) -> -- cgit v1.2.1 From a0814126bdae274f4260edd87e8b23736370885e Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 22 May 2020 07:12:32 -0700 Subject: Add support for previous bookmark --- src/chttpd/test/exunit/pagination_test.exs | 47 ++++++++++++++++++++++++++++++ src/couch_views/src/couch_views_http.erl | 43 +++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/src/chttpd/test/exunit/pagination_test.exs b/src/chttpd/test/exunit/pagination_test.exs index 140a5dc88..7fd962381 100644 --- a/src/chttpd/test/exunit/pagination_test.exs +++ b/src/chttpd/test/exunit/pagination_test.exs @@ -872,6 +872,18 @@ defmodule Couch.Test.Pagination do assert Map.has_key?(resp.body, "next") end + test "first page should not return 'previous' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert not Map.has_key?(resp.body, "previous") + end + test "total_rows matches the length of rows array", ctx do resp = Couch.Session.get( @@ -919,6 +931,41 @@ defmodule Couch.Test.Pagination do assert body["total_rows"] == length(body["rows"]) assert body["total_rows"] <= ctx.page_size end + + test "can use 'previous' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + next_bookmark = resp.body["next"] + + first_page_ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: next_bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert Map.has_key?(resp.body, "previous") + + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: resp.body["previous"]} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert first_page_ids == ids + end end end end diff --git a/src/couch_views/src/couch_views_http.erl b/src/couch_views/src/couch_views_http.erl index b9bc2b3c0..8e12b2476 100644 --- a/src/couch_views/src/couch_views_http.erl +++ b/src/couch_views/src/couch_views_http.erl @@ -126,8 +126,9 @@ do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> true -> {OriginalLimit, Args} = set_limit(Args0#mrargs{page_size = Limit}), {Meta, Items} = Fun(Args), - Result = maybe_add_bookmark( + Result0 = maybe_add_next_bookmark( OriginalLimit, PageSize, Args, Meta, Items, KeyFun), + Result = maybe_add_previous_bookmark(Args, Result0, KeyFun), #{total_rows := Total} = Result, {Limit - Total, [Result | Acc]}; false -> @@ -143,8 +144,11 @@ do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> lists:reverse(Results). -maybe_add_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> - #mrargs{page_size = RequestedLimit} = Args0, +maybe_add_next_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> + #mrargs{ + page_size = RequestedLimit, + extra = Extra + } = Args0, case check_completion(OriginalLimit, RequestedLimit, Items) of {Rows, nil} -> maps:merge(Response, #{ @@ -152,12 +156,17 @@ maybe_add_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> total_rows => length(Rows) }); {Rows, Next} -> + FirstKey = first_key(KeyFun, Rows), NextKey = KeyFun(Next), if is_binary(NextKey) -> ok; true -> throw("Provided KeyFun should return binary") end, - Args = Args0#mrargs{page_size = PageSize}, - Bookmark = bookmark_encode(Args#mrargs{start_key=NextKey}), + Args = Args0#mrargs{ + page_size = PageSize, + start_key = NextKey, + extra = lists:keystore(fk, 1, Extra, {fk, FirstKey}) + }, + Bookmark = bookmark_encode(Args), maps:merge(Response, #{ rows => Rows, next => Bookmark, @@ -166,6 +175,30 @@ maybe_add_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> end. +maybe_add_previous_bookmark(#mrargs{extra = Extra} = Args, #{rows := Rows} = Result, KeyFun) -> + StartKey = couch_util:get_value(fk, Extra), + case first_key(KeyFun, Rows) of + undefined -> + Result; + EndKey -> + Bookmark = bookmark_encode( + Args#mrargs{ + start_key = StartKey, + end_key = EndKey, + inclusive_end = false + } + ), + maps:put(previous, Bookmark, Result) + end. + + +first_key(_KeyFun, []) -> + undefined; + +first_key(KeyFun, [First | _]) -> + KeyFun(First). + + set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) when is_integer(PageSize) andalso Limit > PageSize -> {Limit, Args#mrargs{limit = PageSize + 1}}; -- cgit v1.2.1 From d0689449301017385f6d44212376614fa5143d9b Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 22 May 2020 11:46:19 -0400 Subject: Remove erlfdb mock from update_docs/2,3 test In a constrained CI environment transactions could retry multiple times so we cannot rely on precisely counting erlfdb:transactional/2 calls. --- src/fabric/test/fabric2_update_docs_tests.erl | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/fabric/test/fabric2_update_docs_tests.erl b/src/fabric/test/fabric2_update_docs_tests.erl index 5a2389abf..469fa0d1b 100644 --- a/src/fabric/test/fabric2_update_docs_tests.erl +++ b/src/fabric/test/fabric2_update_docs_tests.erl @@ -54,12 +54,10 @@ teardown_all(Ctx) -> setup() -> {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), - meck:new(erlfdb, [passthrough]), Db. cleanup(#{} = Db) -> - meck:unload(), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -108,9 +106,7 @@ update_docs_batches(Db) -> Docs1 = [doc(9000), doc(9000)], - meck:reset(erlfdb), ?assertMatch({ok, [_ | _]}, fabric2_db:update_docs(Db, Docs1, Opts)), - ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), lists:foreach(fun(#doc{} = Doc) -> ?assertMatch({ok, #doc{}}, fabric2_db:open_doc(Db, Doc#doc.id)) @@ -118,9 +114,7 @@ update_docs_batches(Db) -> Docs2 = [doc(10), doc(10), doc(9000), doc(10)], - meck:reset(erlfdb), ?assertMatch({ok, [_ | _]}, fabric2_db:update_docs(Db, Docs2, Opts)), - ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), lists:foreach(fun(#doc{} = Doc) -> ?assertMatch({ok, #doc{}}, fabric2_db:open_doc(Db, Doc#doc.id)) @@ -132,9 +126,7 @@ update_docs_replicated_batches(Db) -> Docs1 = [doc(Size, {1, [rev()]}) || Size <- [9000, 9000]], - meck:reset(erlfdb), ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs1, Opts)), - ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), lists:foreach(fun(#doc{} = Doc) -> ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)) @@ -142,9 +134,7 @@ update_docs_replicated_batches(Db) -> Docs2 = [doc(Size, {1, [rev()]}) || Size <- [10, 10, 9000, 10]], - meck:reset(erlfdb), ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs2, Opts)), - ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), lists:foreach(fun(#doc{} = Doc) -> ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)) @@ -168,10 +158,8 @@ update_docs_duplicate_ids_with_batches(Db) -> Doc = doc(9000), - meck:reset(erlfdb), Res = fabric2_db:update_docs(Db, [Doc, doc(9000), Doc], Opts), ?assertMatch({ok, [_, _, _]}, Res), - ?assertEqual(3, meck:num_calls(erlfdb, transactional, 2)), {ok, [Doc1Res, Doc2Res, Doc3Res]} = Res, ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), @@ -185,9 +173,7 @@ update_docs_replicate_batches_duplicate_id(Db) -> Doc = doc(10, {1, [rev()]}), Docs = [Doc, Doc], - meck:reset(erlfdb), ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs, Opts)), - ?assertEqual(2, meck:num_calls(erlfdb, transactional, 2)), ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)). -- cgit v1.2.1 From 08a0c6b6ff39045c0df7f40b57777afb6dbbd89f Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Sat, 9 May 2020 20:31:15 +0200 Subject: Port rev_stemming into elixir --- test/elixir/README.md | 2 +- test/elixir/test/rev_stemming_test.exs | 193 +++++++++++++++++++++++++++++++++ test/javascript/tests/rev_stemming.js | 1 + 3 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/rev_stemming_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index bb9b4d2da..dfa4c62b3 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -87,7 +87,7 @@ X means done, - means partially - [ ] Port replicator_db_update_security.js - [ ] Port replicator_db_user_ctx.js - [ ] Port replicator_db_write_auth.js - - [ ] Port rev_stemming.js + - [X] Port rev_stemming.js - [X] Port rewrite.js - [ ] Port rewrite_js.js - [X] Port security_validation.js diff --git a/test/elixir/test/rev_stemming_test.exs b/test/elixir/test/rev_stemming_test.exs new file mode 100644 index 000000000..51e959b48 --- /dev/null +++ b/test/elixir/test/rev_stemming_test.exs @@ -0,0 +1,193 @@ +defmodule RevStemmingTest do + use CouchTestCase + + @moduletag :revs + + @moduledoc """ + This is a port of the rev_stemming.js suite + """ + + @new_limit 5 + + @tag :with_db + test "revs limit update", context do + db_name = context[:db_name] + + resp = Couch.get("/#{db_name}/_revs_limit") + assert resp.body == 1000 + + create_rev_doc(db_name, "foo", @new_limit + 1) + resp = Couch.get("/#{db_name}/foo?revs=true") + assert length(resp.body["_revisions"]["ids"]) == @new_limit + 1 + + resp = + Couch.put("/#{db_name}/_revs_limit", + body: "#{@new_limit}", + headers: ["Content-type": "application/json"] + ) + + assert resp.status_code == 200 + + create_rev_doc(db_name, "foo", @new_limit + 1) + resp = Couch.get("/#{db_name}/foo?revs=true") + assert length(resp.body["_revisions"]["ids"]) == @new_limit + end + + @tag :with_db + test "revs limit produces replication conflict ", context do + db_name = context[:db_name] + + db_name_b = "#{db_name}_b" + create_db(db_name_b) + delete_db_on_exit([db_name_b]) + + resp = + Couch.put("/#{db_name}/_revs_limit", + body: "#{@new_limit}", + headers: ["Content-type": "application/json"] + ) + + assert resp.status_code == 200 + + create_rev_doc(db_name, "foo", @new_limit + 1) + resp = Couch.get("/#{db_name}/foo?revs=true") + assert length(resp.body["_revisions"]["ids"]) == @new_limit + + # If you replicate after you make more edits than the limit, you'll + # cause a spurious edit conflict. + replicate(db_name, db_name_b) + resp = Couch.get("/#{db_name_b}/foo?conflicts=true") + assert not Map.has_key?(resp.body, "_conflicts") + + create_rev_doc(db_name, "foo", @new_limit - 1) + + # one less edit than limit, no conflict + replicate(db_name, db_name_b) + resp = Couch.get("/#{db_name_b}/foo?conflicts=true") + assert not Map.has_key?(resp.body, "_conflicts") + prev_conflicted_rev = resp.body["_rev"] + + # now we hit the limit + create_rev_doc(db_name, "foo", @new_limit + 1) + + replicate(db_name, db_name_b) + resp = Couch.get("/#{db_name_b}/foo?conflicts=true") + assert Map.has_key?(resp.body, "_conflicts") + + conflicted_rev = + resp.body["_conflicts"] + |> Enum.at(0) + + # we have a conflict, but the previous replicated rev is always the losing + # conflict + assert conflicted_rev == prev_conflicted_rev + end + + @tag :with_db + test "revs limit is kept after compaction", context do + db_name = context[:db_name] + + create_rev_doc(db_name, "bar", @new_limit + 1) + resp = Couch.get("/#{db_name}/bar?revs=true") + assert length(resp.body["_revisions"]["ids"]) == @new_limit + 1 + + resp = + Couch.put("/#{db_name}/_revs_limit", + body: "#{@new_limit}", + headers: ["Content-type": "application/json"] + ) + + assert resp.status_code == 200 + + # We having already updated bar before setting the limit, so it's still got + # a long rev history. compact to stem the revs. + resp = Couch.get("/#{db_name}/bar?revs=true") + assert length(resp.body["_revisions"]["ids"]) == @new_limit + + compact(db_name) + wait_until_compact_complete(db_name) + + # force reload because ETags don't honour compaction + resp = + Couch.get("/#{db_name}/bar?revs=true", + headers: ["if-none-match": "pommes"] + ) + + assert length(resp.body["_revisions"]["ids"]) == @new_limit + end + + # function to create a doc with multiple revisions + defp create_rev_doc(db_name, id, num_revs) do + resp = Couch.get("/#{db_name}/#{id}") + + doc = + if resp.status_code == 200 do + resp.body + else + %{_id: id, count: 0} + end + + {:ok, resp} = create_doc(db_name, doc) + create_rev_doc(db_name, id, num_revs, [Map.put(doc, :_rev, resp.body["rev"])]) + end + + defp create_rev_doc(db_name, id, num_revs, revs) do + if length(revs) < num_revs do + doc = %{_id: id, _rev: Enum.at(revs, -1)[:_rev], count: length(revs)} + {:ok, resp} = create_doc(db_name, doc) + + create_rev_doc( + db_name, + id, + num_revs, + revs ++ [Map.put(doc, :_rev, resp.body["rev"])] + ) + else + revs + end + end + + defp build_uri(db_name) do + username = System.get_env("EX_USERNAME") || "adm" + password = System.get_env("EX_PASSWORD") || "pass" + + "/#{db_name}" + |> Couch.process_url() + |> URI.parse() + |> Map.put(:userinfo, "#{username}:#{password}") + |> URI.to_string() + end + + defp replicate(src, tgt) do + src_uri = build_uri(src) + tgt_uri = build_uri(tgt) + + body = %{source: src_uri, target: tgt_uri} + + resp = Couch.post("/_replicate", body: body) + assert resp.status_code == 200 + resp.body + end + + def delete_db_on_exit(db_names) when is_list(db_names) do + on_exit(fn -> + Enum.each(db_names, fn name -> + delete_db(name) + end) + end) + end + + defp compact(db_name) do + resp = Couch.post("/#{db_name}/_compact") + assert resp.status_code == 202 + resp.body + end + + defp wait_until_compact_complete(db_name) do + retry_until( + fn -> Map.get(info(db_name), "compact_running") == false end, + 200, + 10_000 + ) + end +end diff --git a/test/javascript/tests/rev_stemming.js b/test/javascript/tests/rev_stemming.js index 238868f60..725c0f1c9 100644 --- a/test/javascript/tests/rev_stemming.js +++ b/test/javascript/tests/rev_stemming.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.rev_stemming = function(debug) { var db_name_orig = get_random_db_name(); -- cgit v1.2.1 From 4e64f5b492990b03f7f58a47ec173d048a3f381f Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 25 May 2020 08:11:45 +0200 Subject: move compact and replicate functions into CouchTestCase shared module --- test/elixir/lib/couch/db_test.ex | 56 +++++++++++++++++++++++++- test/elixir/test/auth_cache_test.exs | 15 ------- test/elixir/test/compact_test.exs | 12 ------ test/elixir/test/partition_size_limit_test.exs | 12 ------ test/elixir/test/purge_test.exs | 20 +-------- test/elixir/test/replication_test.exs | 38 ----------------- test/elixir/test/rev_stemming_test.exs | 36 ----------------- test/elixir/test/users_db_test.exs | 22 ---------- 8 files changed, 56 insertions(+), 155 deletions(-) diff --git a/test/elixir/lib/couch/db_test.ex b/test/elixir/lib/couch/db_test.ex index e3f32f839..a61db1424 100644 --- a/test/elixir/lib/couch/db_test.ex +++ b/test/elixir/lib/couch/db_test.ex @@ -278,6 +278,60 @@ defmodule Couch.DBTest do resp.body end + def compact(db_name) do + resp = Couch.post("/#{db_name}/_compact") + assert resp.status_code == 202 + + retry_until( + fn -> Map.get(info(db_name), "compact_running") == false end, + 200, + 10_000 + ) + + resp.body + end + + def replicate(src, tgt, options \\ []) do + username = System.get_env("EX_USERNAME") || "adm" + password = System.get_env("EX_PASSWORD") || "pass" + + {userinfo, options} = Keyword.pop(options, :userinfo) + + userinfo = + if userinfo == nil do + "#{username}:#{password}" + else + userinfo + end + + src = set_user(src, userinfo) + tgt = set_user(tgt, userinfo) + + defaults = [headers: [], body: %{}, timeout: 30_000] + options = defaults |> Keyword.merge(options) |> Enum.into(%{}) + + %{body: body} = options + body = [source: src, target: tgt] |> Enum.into(body) + options = Map.put(options, :body, body) + + resp = Couch.post("/_replicate", Enum.to_list(options)) + assert HTTPotion.Response.success?(resp), "#{inspect(resp)}" + resp.body + end + + defp set_user(uri, userinfo) do + case URI.parse(uri) do + %{scheme: nil} -> + uri + + %{userinfo: nil} = uri -> + URI.to_string(Map.put(uri, :userinfo, userinfo)) + + _ -> + uri + end + end + def view(db_name, view_name, options \\ nil, keys \\ nil) do [view_root, view_name] = String.split(view_name, "/") @@ -423,7 +477,7 @@ defmodule Couch.DBTest do Enum.each(setting.nodes, fn node_value -> node = elem(node_value, 0) value = elem(node_value, 1) - + if value == ~s(""\\n) or value == "" or value == nil do resp = Couch.delete( diff --git a/test/elixir/test/auth_cache_test.exs b/test/elixir/test/auth_cache_test.exs index 2ba396de7..8b7c29c71 100644 --- a/test/elixir/test/auth_cache_test.exs +++ b/test/elixir/test/auth_cache_test.exs @@ -66,14 +66,6 @@ defmodule AuthCacheTest do sess end - defp wait_until_compact_complete(db_name) do - retry_until( - fn -> Map.get(info(db_name), "compact_running") == false end, - 200, - 10_000 - ) - end - defp assert_cache(event, user, password, expect \\ :expect_login_success) do hits_before = hits() misses_before = misses() @@ -112,12 +104,6 @@ defmodule AuthCacheTest do end end - defp compact(db_name) do - resp = Couch.post("/#{db_name}/_compact") - assert resp.status_code == 202 - resp.body - end - def save_doc(db_name, body) do resp = Couch.put("/#{db_name}/#{body["_id"]}", body: body) assert resp.status_code in [201, 202] @@ -206,7 +192,6 @@ defmodule AuthCacheTest do # there was a cache hit assert_cache(:expect_hit, "johndoe", "123456") compact(db_name) - wait_until_compact_complete(db_name) assert_cache(:expect_hit, "johndoe", "123456") end end diff --git a/test/elixir/test/compact_test.exs b/test/elixir/test/compact_test.exs index d99a7a78e..461a1d347 100644 --- a/test/elixir/test/compact_test.exs +++ b/test/elixir/test/compact_test.exs @@ -82,18 +82,6 @@ defmodule CompactTest do assert Couch.post("/#{db}/_ensure_full_commit").body["ok"] == true end - defp compact(db) do - assert Couch.post("/#{db}/_compact").status_code == 202 - - retry_until( - fn -> - Couch.get("/#{db}").body["compact_running"] == false - end, - 200, - 20_000 - ) - end - defp get_info(db) do Couch.get("/#{db}").body end diff --git a/test/elixir/test/partition_size_limit_test.exs b/test/elixir/test/partition_size_limit_test.exs index 5141d0d8b..6ef686611 100644 --- a/test/elixir/test/partition_size_limit_test.exs +++ b/test/elixir/test/partition_size_limit_test.exs @@ -68,18 +68,6 @@ defmodule PartitionSizeLimitTest do assert resp.status_code in [201, 202] end - defp compact(db) do - assert Couch.post("/#{db}/_compact").status_code == 202 - - retry_until( - fn -> - Couch.get("/#{db}").body["compact_running"] == false - end, - 200, - 20_000 - ) - end - test "fill partition manually", context do db_name = context[:db_name] partition = "foo" diff --git a/test/elixir/test/purge_test.exs b/test/elixir/test/purge_test.exs index 3920b3f26..5fc03f16b 100644 --- a/test/elixir/test/purge_test.exs +++ b/test/elixir/test/purge_test.exs @@ -53,12 +53,7 @@ defmodule PurgeTest do test_all_docs_twice(db_name, num_docs, 0, 2) # purge sequences are preserved after compaction (COUCHDB-1021) - resp = Couch.post("/#{db_name}/_compact") - assert resp.status_code == 202 - - retry_until(fn -> - info(db_name)["compact_running"] == false - end) + compact(db_name) compacted_info = info(db_name) assert compacted_info["purge_seq"] == purged_info["purge_seq"] @@ -127,19 +122,6 @@ defmodule PurgeTest do delete_db(db_name_b) end - def replicate(src, tgt, options \\ []) do - defaults = [headers: [], body: %{}, timeout: 30_000] - options = defaults |> Keyword.merge(options) |> Enum.into(%{}) - - %{body: body} = options - body = [source: src, target: tgt] |> Enum.into(body) - options = Map.put(options, :body, body) - - resp = Couch.post("/_replicate", Enum.to_list(options)) - assert HTTPotion.Response.success?(resp), "#{inspect(resp)}" - resp.body - end - defp open_doc(db_name, id, expect \\ 200) do resp = Couch.get("/#{db_name}/#{id}") assert resp.status_code == expect diff --git a/test/elixir/test/replication_test.exs b/test/elixir/test/replication_test.exs index bdd683e97..075f65bfa 100644 --- a/test/elixir/test/replication_test.exs +++ b/test/elixir/test/replication_test.exs @@ -7,7 +7,6 @@ defmodule ReplicationTest do """ # TODO: Parameterize these - @admin_account "adm:pass" @db_pairs_prefixes [ {"remote-to-remote", "http://127.0.0.1:15984/", "http://127.0.0.1:15984/"} ] @@ -1584,30 +1583,6 @@ defmodule ReplicationTest do resp.body end - def replicate(src, tgt, options \\ []) do - {userinfo, options} = Keyword.pop(options, :userinfo) - - userinfo = - if userinfo == nil do - @admin_account - else - userinfo - end - - src = set_user(src, userinfo) - tgt = set_user(tgt, userinfo) - - defaults = [headers: [], body: %{}, timeout: 30_000] - options = defaults |> Keyword.merge(options) |> Enum.into(%{}) - - %{body: body} = options - body = [source: src, target: tgt] |> Enum.into(body) - options = Map.put(options, :body, body) - - resp = Couch.post("/_replicate", Enum.to_list(options)) - assert HTTPotion.Response.success?(resp), "#{inspect(resp)}" - resp.body - end def cancel_replication(src, tgt) do body = %{:cancel => true} @@ -1737,19 +1712,6 @@ defmodule ReplicationTest do end) end - def set_user(uri, userinfo) do - case URI.parse(uri) do - %{scheme: nil} -> - uri - - %{userinfo: nil} = uri -> - URI.to_string(Map.put(uri, :userinfo, userinfo)) - - _ -> - uri - end - end - def get_att1_data do File.read!(Path.expand("data/lorem.txt", __DIR__)) end diff --git a/test/elixir/test/rev_stemming_test.exs b/test/elixir/test/rev_stemming_test.exs index 51e959b48..9a16d481d 100644 --- a/test/elixir/test/rev_stemming_test.exs +++ b/test/elixir/test/rev_stemming_test.exs @@ -105,7 +105,6 @@ defmodule RevStemmingTest do assert length(resp.body["_revisions"]["ids"]) == @new_limit compact(db_name) - wait_until_compact_complete(db_name) # force reload because ETags don't honour compaction resp = @@ -147,28 +146,6 @@ defmodule RevStemmingTest do end end - defp build_uri(db_name) do - username = System.get_env("EX_USERNAME") || "adm" - password = System.get_env("EX_PASSWORD") || "pass" - - "/#{db_name}" - |> Couch.process_url() - |> URI.parse() - |> Map.put(:userinfo, "#{username}:#{password}") - |> URI.to_string() - end - - defp replicate(src, tgt) do - src_uri = build_uri(src) - tgt_uri = build_uri(tgt) - - body = %{source: src_uri, target: tgt_uri} - - resp = Couch.post("/_replicate", body: body) - assert resp.status_code == 200 - resp.body - end - def delete_db_on_exit(db_names) when is_list(db_names) do on_exit(fn -> Enum.each(db_names, fn name -> @@ -177,17 +154,4 @@ defmodule RevStemmingTest do end) end - defp compact(db_name) do - resp = Couch.post("/#{db_name}/_compact") - assert resp.status_code == 202 - resp.body - end - - defp wait_until_compact_complete(db_name) do - retry_until( - fn -> Map.get(info(db_name), "compact_running") == false end, - 200, - 10_000 - ) - end end diff --git a/test/elixir/test/users_db_test.exs b/test/elixir/test/users_db_test.exs index 1d34d8c9e..62877d542 100644 --- a/test/elixir/test/users_db_test.exs +++ b/test/elixir/test/users_db_test.exs @@ -50,28 +50,6 @@ defmodule UsersDbTest do create_db(@users_db_name) end - defp replicate(source, target, rep_options \\ []) do - headers = Keyword.get(rep_options, :headers, []) - body = Keyword.get(rep_options, :body, %{}) - - body = - body - |> Map.put("source", source) - |> Map.put("target", target) - - retry_until( - fn -> - resp = Couch.post("/_replicate", headers: headers, body: body, timeout: 10_000) - assert HTTPotion.Response.success?(resp) - assert resp.status_code == 200 - assert resp.body["ok"] - resp - end, - 500, - 20_000 - ) - end - defp save_as(db_name, doc, options) do session = Keyword.get(options, :use_session) expect_response = Keyword.get(options, :expect_response, [201, 202]) -- cgit v1.2.1 From 19c040f65120898a3cfb54b643e2e49bdd192d02 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 27 May 2020 13:01:34 -0400 Subject: Lower the default batch size for update_docs to 2.5MB Observed a number of timeouts with the previous default --- rel/overlay/etc/default.ini | 2 +- src/fabric/src/fabric2_db.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 35e5147b2..3630259a1 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -244,7 +244,7 @@ port = 6984 ;binary_chunk_size = 100000 ; ; Bulk docs transaction batch size in bytes -;update_docs_batch_size = 5000000 +;update_docs_batch_size = 2500000 ; [rexi] ; buffer_count = 2000 diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 6540e0b6d..667cf35c6 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -156,7 +156,7 @@ -define(RETURN(Term), throw({?MODULE, Term})). --define(DEFAULT_UPDATE_DOCS_BATCH_SIZE, 5000000). +-define(DEFAULT_UPDATE_DOCS_BATCH_SIZE, 2500000). -record(bacc, { -- cgit v1.2.1 From 56738359ac92e10187e908e1620fef13476862fe Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 27 May 2020 13:56:44 -0400 Subject: Introduce _bulk_docs max_doc_count limit Let users specify the maximum document count for the _bulk_docs requests. If the document count exceeds the maximum it would return a 413 HTTP error. This would also signal the replicator to try to bisect the _bulk_docs array into smaller batches. --- rel/overlay/etc/default.ini | 4 ++++ src/chttpd/src/chttpd.erl | 2 ++ src/chttpd/src/chttpd_db.erl | 5 +++++ src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl | 22 +++++++++++++++++++++- 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 3630259a1..43e1c0ba3 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -55,6 +55,10 @@ changes_doc_ids_optimization_threshold = 100 ; for size calculation instead of 7. max_document_size = 8000000 ; bytes ; +; Maximum number of documents in a _bulk_docs request. Anything larger +; returns a 413 error for the whole request +;max_bulk_docs_count = 10000 +; ; Maximum attachment size. ; max_attachment_size = infinity ; diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 699601c0e..e8639ed8d 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -956,6 +956,8 @@ error_info(request_entity_too_large) -> {413, <<"too_large">>, <<"the request entity is too large">>}; error_info({request_entity_too_large, {attachment, AttName}}) -> {413, <<"attachment_too_large">>, AttName}; +error_info({request_entity_too_large, {bulk_docs, Max}}) when is_integer(Max) -> + {413, <<"max_bulk_docs_count_exceeded">>, integer_to_binary(Max)}; error_info({request_entity_too_large, DocID}) -> {413, <<"document_too_large">>, DocID}; error_info({error, security_migration_updates_disabled}) -> diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 5cfbd1d5f..5af98fe3a 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -484,6 +484,11 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> DocsArray0 -> DocsArray0 end, + MaxDocs = config:get_integer("couchdb", "max_bulk_docs_count", 10000), + case length(DocsArray) =< MaxDocs of + true -> ok; + false -> throw({request_entity_too_large, {bulk_docs, MaxDocs}}) + end, couch_stats:update_histogram([couchdb, httpd, bulk_docs], length(DocsArray)), Options = case chttpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> diff --git a/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl b/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl index 88e2797a3..2b04050a2 100644 --- a/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl +++ b/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl @@ -29,6 +29,7 @@ setup() -> Hashed = couch_passwords:hash_admin_password(?PASS), ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), ok = config:set("couchdb", "max_document_size", "50"), + ok = config:set("couchdb", "max_bulk_docs_count", "2"), TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), @@ -39,7 +40,8 @@ setup() -> teardown(Url) -> delete_db(Url), ok = config:delete("admins", ?USER, _Persist=false), - ok = config:delete("couchdb", "max_document_size"). + ok = config:delete("couchdb", "max_document_size"), + ok = config:delete("couchdb", "max_bulk_docs_count"). create_db(Url) -> {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), @@ -67,6 +69,7 @@ all_test_() -> fun post_single_doc/1, fun put_single_doc/1, fun bulk_doc/1, + fun bulk_docs_too_many_docs/1, fun put_post_doc_attach_inline/1, fun put_multi_part_related/1, fun post_multi_part_form/1 @@ -100,6 +103,23 @@ bulk_doc(Url) -> Expect = {[{<<"error">>,<<"document_too_large">>},{<<"reason">>,<<>>}]}, ?_assertEqual(Expect, ResultJson). + +bulk_docs_too_many_docs(Url) -> + Docs = "{\"docs\": [" + "{\"doc1\": \"{}\"}, " + "{\"doc2\": \"{}\"}, " + "{\"doc3\": \"{}\"}" + "]}", + {ok, Code, _, ResultBody} = test_request:post(Url ++ "/_bulk_docs/", + [?CONTENT_JSON, ?AUTH], Docs), + ResultJson = ?JSON_DECODE(ResultBody), + ExpectJson = {[ + {<<"error">>,<<"max_bulk_docs_count_exceeded">>}, + {<<"reason">>,<<"2">>} + ]}, + ?_assertEqual({413, ExpectJson}, {Code, ResultJson}). + + put_post_doc_attach_inline(Url) -> Body1 = "{\"body\":\"This is a body.\",", Body2 = lists:concat(["{\"body\":\"This is a body it should fail", -- cgit v1.2.1 From 10559eae51ded2371f4150836311cfe7517a230f Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 28 May 2020 14:52:34 -0400 Subject: Don't skip over docs in mango indices on erlfdb errors --- src/couch_views/src/couch_views_updater.erl | 5 +++++ src/couch_views/test/couch_views_updater_test.erl | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl index 30dfac326..a87fab117 100644 --- a/src/couch_views/src/couch_views_updater.erl +++ b/src/couch_views/src/couch_views_updater.erl @@ -31,6 +31,11 @@ index(Db, #doc{id = Id, revs = Revs} = Doc, _NewWinner, _OldWinner, NewRevId, index_int(Db, Doc, Seq) end catch + error:{erlfdb, ErrCode} when is_integer(ErrCode) -> + DbName = fabric2_db:name(Db), + couch_log:error("Mango index erlfdb error Db ~s Doc ~p ~p", + [DbName, Id, ErrCode]), + erlang:raise(error, {erlfdb, ErrCode}, erlang:get_stacktrace()); Error:Reason -> DbName = fabric2_db:name(Db), couch_log:error("Mango index error for Db ~s Doc ~p ~p ~p", diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl index e45622512..0dfc57461 100644 --- a/src/couch_views/test/couch_views_updater_test.erl +++ b/src/couch_views/test/couch_views_updater_test.erl @@ -35,7 +35,8 @@ indexer_test_() -> ?TDEF_FE(index_docs), ?TDEF_FE(update_doc), ?TDEF_FE(delete_doc), - ?TDEF_FE(includes_design_docs) + ?TDEF_FE(includes_design_docs), + ?TDEF_FE(handle_erlfdb_errors) ] } } @@ -68,10 +69,12 @@ foreach_setup() -> Docs = make_docs(3), fabric2_db:update_docs(Db, Docs), + meck:new(couch_views_fdb, [passthrough]), {Db, DDoc}. foreach_teardown({Db, _}) -> + meck:unload(), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -131,6 +134,13 @@ includes_design_docs({Db, _}) -> ], Docs). +handle_erlfdb_errors({Db, _}) -> + meck:expect(couch_views_fdb, write_doc, fun(_, _, _, _) -> + error({erlfdb, 1009}) + end), + ?assertError({erlfdb, 1009}, fabric2_db:update_docs(Db, [doc(4)])). + + run_query(Db, DDoc) -> Args = #mrargs{ view_type = map, -- cgit v1.2.1 From ef4cfb447c2835dd45139dc9eaded361bb87ced6 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 28 May 2020 15:16:18 -0400 Subject: Fix mango erlfdb error catch clause erlfdb -> erlfdb_error --- src/couch_views/src/couch_views_updater.erl | 5 +++-- src/couch_views/test/couch_views_updater_test.erl | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl index a87fab117..ba9fadb51 100644 --- a/src/couch_views/src/couch_views_updater.erl +++ b/src/couch_views/src/couch_views_updater.erl @@ -31,11 +31,12 @@ index(Db, #doc{id = Id, revs = Revs} = Doc, _NewWinner, _OldWinner, NewRevId, index_int(Db, Doc, Seq) end catch - error:{erlfdb, ErrCode} when is_integer(ErrCode) -> + error:{erlfdb_error, ErrCode} when is_integer(ErrCode) -> + Stack = erlang:get_stacktrace(), DbName = fabric2_db:name(Db), couch_log:error("Mango index erlfdb error Db ~s Doc ~p ~p", [DbName, Id, ErrCode]), - erlang:raise(error, {erlfdb, ErrCode}, erlang:get_stacktrace()); + erlang:raise(error, {erlfdb_error, ErrCode}, Stack); Error:Reason -> DbName = fabric2_db:name(Db), couch_log:error("Mango index error for Db ~s Doc ~p ~p ~p", diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl index 0dfc57461..89c341a17 100644 --- a/src/couch_views/test/couch_views_updater_test.erl +++ b/src/couch_views/test/couch_views_updater_test.erl @@ -36,7 +36,7 @@ indexer_test_() -> ?TDEF_FE(update_doc), ?TDEF_FE(delete_doc), ?TDEF_FE(includes_design_docs), - ?TDEF_FE(handle_erlfdb_errors) + ?TDEF_FE(handle_erlfdb_errors, 15) ] } } @@ -136,9 +136,9 @@ includes_design_docs({Db, _}) -> handle_erlfdb_errors({Db, _}) -> meck:expect(couch_views_fdb, write_doc, fun(_, _, _, _) -> - error({erlfdb, 1009}) + error({erlfdb_error, 1009}) end), - ?assertError({erlfdb, 1009}, fabric2_db:update_docs(Db, [doc(4)])). + ?assertError({erlfdb_error, 1009}, fabric2_db:update_docs(Db, [doc(4)])). run_query(Db, DDoc) -> -- cgit v1.2.1 From 0152c1e428b1b0a0bebe3a3e4cba331079d99255 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 29 May 2020 14:06:04 -0400 Subject: Fix bad catch statement in couch_jobs activity monitor --- src/couch_jobs/src/couch_jobs_activity_monitor.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor.erl b/src/couch_jobs/src/couch_jobs_activity_monitor.erl index 6f50d9653..abb8982d9 100644 --- a/src/couch_jobs/src/couch_jobs_activity_monitor.erl +++ b/src/couch_jobs/src/couch_jobs_activity_monitor.erl @@ -68,7 +68,7 @@ handle_info(check_activity, St) -> St1 = try check_activity(St) catch - {error, {erlfdb_error, 1020}} -> + error:{erlfdb_error, 1020} -> LogMsg = "~p : type:~p got 1020 error, possibly from overload", couch_log:error(LogMsg, [?MODULE, St#st.type]), St -- cgit v1.2.1 From b9aa2c6b1d2ded7fa1920551b747758e5eebc7bb Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 29 May 2020 14:17:20 -0400 Subject: Protect couch_jobs activity monitor against timeouts as well --- src/couch_jobs/src/couch_jobs_activity_monitor.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor.erl b/src/couch_jobs/src/couch_jobs_activity_monitor.erl index abb8982d9..9802f5798 100644 --- a/src/couch_jobs/src/couch_jobs_activity_monitor.erl +++ b/src/couch_jobs/src/couch_jobs_activity_monitor.erl @@ -68,9 +68,9 @@ handle_info(check_activity, St) -> St1 = try check_activity(St) catch - error:{erlfdb_error, 1020} -> - LogMsg = "~p : type:~p got 1020 error, possibly from overload", - couch_log:error(LogMsg, [?MODULE, St#st.type]), + error:{erlfdb_error, Err} when Err =:= 1020 orelse Err =:= 1031 -> + LogMsg = "~p : type:~p got ~p error, possibly from overload", + couch_log:error(LogMsg, [?MODULE, St#st.type, Err]), St end, St2 = schedule_check(St1), -- cgit v1.2.1 From 6501709000117d14cd49c5bbfeaf27efd508a5e9 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 29 May 2020 16:59:35 -0400 Subject: Guard couch_jobs:accept_loop timing out And also against too many conflicts during overload --- src/couch_jobs/src/couch_jobs.erl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index d9ea0fbfa..adc1b464e 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -297,9 +297,17 @@ accept_loop(Type, NoSched, MaxSchedTime, Timeout) -> TxFun = fun(JTx) -> couch_jobs_fdb:accept(JTx, Type, MaxSchedTime, NoSched) end, - case couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), TxFun) of + AcceptResult = try + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), TxFun) + catch + error:{erlfdb_error, Err} when Err =:= 1020 orelse Err =:= 1031 -> + retry + end, + case AcceptResult of {ok, Job, Data} -> {ok, Job, Data}; + retry -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout); {not_found, PendingWatch} -> case wait_pending(PendingWatch, MaxSchedTime, Timeout) of {error, not_found} -> -- cgit v1.2.1 From 0be139a8e20d1be0cf63e611159015b8bc6c6e1a Mon Sep 17 00:00:00 2001 From: Simon Klassen <6997477+sklassen@users.noreply.github.com> Date: Sun, 31 May 2020 07:50:19 +0800 Subject: 2906 couchjs sm version (#2911) Closes #2906 * Added a suffix to the first line of couchjs with the (static) version number compiled * Update rebar.config.script * In couchjs -h replaced the link to jira with a link to github Co-authored-by: simon.klassen Co-authored-by: Jan Lehnardt Date: Tue, 2 Jun 2020 12:03:58 -0400 Subject: Prevent eviction of newer handles from fabric_server cache Check metadata versions to ensure newer handles are not clobbered. The same thing is done for removal, `maybe_remove/1` removes handle only if there isn't a newer handle already there. --- src/fabric/src/fabric2_server.erl | 62 ++++++++++++++++++++++++++----- src/fabric/test/fabric2_db_misc_tests.erl | 17 +++++++++ 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index 957efff3b..b557da8c7 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -17,9 +17,15 @@ -export([ start_link/0, + fetch/2, + store/1, + maybe_update/1, + remove/1, + maybe_remove/1, + fdb_directory/0, fdb_cluster/0 ]). @@ -66,27 +72,57 @@ start_link() -> fetch(DbName, UUID) when is_binary(DbName) -> case {UUID, ets:lookup(?MODULE, DbName)} of {_, []} -> undefined; - {undefined, [{DbName, #{} = Db}]} -> Db; - {<<_/binary>>, [{DbName, #{uuid := UUID} = Db}]} -> Db; - {<<_/binary>>, [{DbName, #{} = _Db}]} -> undefined + {undefined, [{DbName, _UUID, _, #{} = Db}]} -> Db; + {<<_/binary>>, [{DbName, UUID, _, #{} = Db}]} -> Db; + {<<_/binary>>, [{DbName, _UUID, _, #{} = _Db}]} -> undefined end. store(#{name := DbName} = Db0) when is_binary(DbName) -> - Db1 = Db0#{ - tx := undefined, - user_ctx := #user_ctx{}, - security_fun := undefined - }, - true = ets:insert(?MODULE, {DbName, Db1}), + #{ + uuid := UUID, + md_version := MDVer + } = Db0, + Db1 = sanitize(Db0), + case ets:insert_new(?MODULE, {DbName, UUID, MDVer, Db1}) of + true -> ok; + false -> maybe_update(Db1) + end, ok. +maybe_update(#{name := DbName} = Db0) when is_binary(DbName) -> + #{ + uuid := UUID, + md_version := MDVer + } = Db0, + Db1 = sanitize(Db0), + Head = {DbName, UUID, '$1', '_'}, + Guard = {'=<', '$1', MDVer}, + Body = {DbName, UUID, MDVer, {const, Db1}}, + try + 1 =:= ets:select_replace(?MODULE, [{Head, [Guard], [{Body}]}]) + catch + error:badarg -> + false + end. + + remove(DbName) when is_binary(DbName) -> true = ets:delete(?MODULE, DbName), ok. +maybe_remove(#{name := DbName} = Db) when is_binary(DbName) -> + #{ + uuid := UUID, + md_version := MDVer + } = Db, + Head = {DbName, UUID, '$1', '_'}, + Guard = {'=<', '$1', MDVer}, + 1 =:= ets:select_delete(?MODULE, [{Head, [Guard], [true]}]). + + init(_) -> ets:new(?MODULE, [ public, @@ -229,3 +265,11 @@ set_option(Db, Option, Val) -> Msg = "~p : Could not set fdb tx option ~p = ~p", couch_log:error(Msg, [?MODULE, Option, Val]) end. + + +sanitize(#{} = Db) -> + Db#{ + tx := undefined, + user_ctx := #user_ctx{}, + security_fun := undefined + }. diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 9c95ca565..2fad73f1f 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -50,6 +50,7 @@ misc_test_() -> ?TDEF(ensure_full_commit), ?TDEF(metadata_bump), ?TDEF(db_version_bump), + ?TDEF(db_cache_doesnt_evict_newer_handles), ?TDEF(events_listener) ]) } @@ -375,6 +376,22 @@ db_version_bump({DbName, _, _}) -> ?assertMatch(#{db_version := NewDbVersion}, Db2). +db_cache_doesnt_evict_newer_handles({DbName, _, _}) -> + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + CachedDb = fabric2_server:fetch(DbName, undefined), + + StaleDb = Db#{md_version := <<0>>}, + + ok = fabric2_server:store(StaleDb), + ?assertEqual(CachedDb, fabric2_server:fetch(DbName, undefined)), + + ?assert(not fabric2_server:maybe_update(StaleDb)), + ?assertEqual(CachedDb, fabric2_server:fetch(DbName, undefined)), + + ?assert(not fabric2_server:maybe_remove(StaleDb)), + ?assertEqual(CachedDb, fabric2_server:fetch(DbName, undefined)). + + events_listener({DbName, Db, _}) -> Opts = [ {dbname, DbName}, -- cgit v1.2.1 From 9f8b4fdb26414b556edeaaa1d56bf13d418e0c87 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 2 Jun 2020 12:07:44 -0400 Subject: Remove on_commit handler from fabric2_fdb Update db handles right away as soon we db verison is checked. This ensures concurrent requests will get access to the current handle as soon as possible and may avoid doing extra version checks and re-opens. --- src/fabric/include/fabric2.hrl | 1 - src/fabric/src/fabric2_fdb.erl | 72 +++++++++++++++--------------------------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 2e71787c3..29f3b029e 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -70,7 +70,6 @@ -define(PDICT_CHECKED_MD_IS_CURRENT, '$fabric_checked_md_is_current'). -define(PDICT_TX_ID_KEY, '$fabric_tx_id'). -define(PDICT_TX_RES_KEY, '$fabric_tx_result'). --define(PDICT_ON_COMMIT_FUN, '$fabric_on_commit_fun'). -define(PDICT_FOLD_ACC_STATE, '$fabric_fold_acc_state'). % Let's keep these in ascending order diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index e8f6e0daa..7e736b041 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -171,11 +171,7 @@ do_transaction(Fun, LayerPrefix) when is_function(Fun, 1) -> true -> get_previous_transaction_result(); false -> - try - execute_transaction(Tx, Fun, LayerPrefix) - after - erase({?PDICT_ON_COMMIT_FUN, Tx}) - end + execute_transaction(Tx, Fun, LayerPrefix) end end) after @@ -1311,18 +1307,14 @@ check_db_version(#{} = Db, CheckDbVersion) -> } = Db, AlreadyChecked = get(?PDICT_CHECKED_DB_IS_CURRENT), - if not CheckDbVersion orelse AlreadyChecked == true -> Db; true -> + if not CheckDbVersion orelse AlreadyChecked == true -> current; true -> DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), case erlfdb:wait(erlfdb:get(Tx, DbVersionKey)) of DbVersion -> put(?PDICT_CHECKED_DB_IS_CURRENT, true), - Now = erlang:monotonic_time(millisecond), - Db1 = Db#{check_current_ts := Now}, - on_commit(Tx, fun() -> fabric2_server:store(Db1) end), - Db1; + current; _NewDBVersion -> - fabric2_server:remove(maps:get(name, Db)), - throw({?MODULE, reopen}) + stale end end. @@ -1830,10 +1822,6 @@ fold_range_cb({K, V}, #fold_acc{} = Acc) -> restart_fold(Tx, #fold_acc{} = Acc) -> erase(?PDICT_CHECKED_MD_IS_CURRENT), - % Not actually committing anyting so we skip on-commit handlers here. Those - % are usually to refresh db handles in the cache. If the iterator runs for - % a while it might be inserting a stale handle in there anyway. - erase({?PDICT_ON_COMMIT_FUN, Tx}), ok = erlfdb:reset(Tx), @@ -1871,17 +1859,31 @@ ensure_current(Db) -> ensure_current(#{} = Db0, CheckDbVersion) -> require_transaction(Db0), - Db2 = case check_metadata_version(Db0) of - {current, Db1} -> Db1; - {stale, Db1} -> check_db_version(Db1, CheckDbVersion) + Db3 = case check_metadata_version(Db0) of + {current, Db1} -> + Db1; + {stale, Db1} -> + case check_db_version(Db1, CheckDbVersion) of + current -> + % If db version is current, update cache with the latest + % metadata so other requests can immediately see the + % refreshed db handle. + Now = erlang:monotonic_time(millisecond), + Db2 = Db1#{check_current_ts := Now}, + fabric2_server:maybe_update(Db2), + Db2; + stale -> + fabric2_server:maybe_remove(Db1), + throw({?MODULE, reopen}) + end end, - case maps:get(security_fun, Db2) of + case maps:get(security_fun, Db3) of SecurityFun when is_function(SecurityFun, 2) -> - #{security_doc := SecDoc} = Db2, - ok = SecurityFun(Db2, SecDoc), - Db2#{security_fun := undefined}; + #{security_doc := SecDoc} = Db3, + ok = SecurityFun(Db3, SecDoc), + Db3#{security_fun := undefined}; undefined -> - Db2 + Db3 end. @@ -1928,7 +1930,6 @@ execute_transaction(Tx, Fun, LayerPrefix) -> erlfdb:set(Tx, get_transaction_id(Tx, LayerPrefix), <<>>), put(?PDICT_TX_RES_KEY, Result) end, - ok = run_on_commit_fun(Tx), Result. @@ -1963,27 +1964,6 @@ get_transaction_id(Tx, LayerPrefix) -> end. -on_commit(Tx, Fun) when is_function(Fun, 0) -> - % Here we rely on Tx objects matching. However they contain a nif resource - % object. Before Erlang 20.0 those would have been represented as empty - % binaries and would have compared equal to each other. See - % http://erlang.org/doc/man/erl_nif.html for more info. We assume we run on - % Erlang 20+ here and don't worry about that anymore. - case get({?PDICT_ON_COMMIT_FUN, Tx}) of - undefined -> put({?PDICT_ON_COMMIT_FUN, Tx}, Fun); - _ -> error({?MODULE, on_commit_function_already_set}) - end. - - -run_on_commit_fun(Tx) -> - case get({?PDICT_ON_COMMIT_FUN, Tx}) of - undefined -> - ok; - Fun when is_function(Fun, 0) -> - Fun(), - ok - end. - with_span(Operation, ExtraTags, Fun) -> case ctrace:has_span() of true -> -- cgit v1.2.1 From b3fe0902683af5c65813cb0623a791cd8d9b8873 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 2 Jun 2020 16:57:35 -0400 Subject: Handle error:{timeout, _} exception in couch_jobs:accept Under load accept loop can blow up with timeout error from `erlfdb:wait(...)`(https://github.com/apache/couchdb-erlfdb/blob/master/src/erlfdb.erl#L255) so guard against it just like we do for fdb transaction timeout (1031) errors. --- src/couch_jobs/src/couch_jobs.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index adc1b464e..5f79407c5 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -300,6 +300,8 @@ accept_loop(Type, NoSched, MaxSchedTime, Timeout) -> AcceptResult = try couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), TxFun) catch + error:{timeout, _} -> + retry; error:{erlfdb_error, Err} when Err =:= 1020 orelse Err =:= 1031 -> retry end, -- cgit v1.2.1 From b417bc1ef75115f9766822559dec91ce43e21b74 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 2 Jun 2020 17:20:19 -0400 Subject: Improve efficiency of couch_jobs:accept/2 for views Use the `no_schedule` option to speed up job dequeuing. This optimization allows dequeuing jobs more efficiently if these conditions are met: 1) Job IDs start with a random prefix 2) No time-based scheduling is used Both of those can be true for views job IDs can be generated such that signature comes before the db name part, which is what this commit does. The way the optimization works, is random IDs are generating in pending jobs range, then, a key selection is used to pick either a job before or after it. That reduces each dequeue attempt to just 1 read instead of reading up to 1000 jobs. --- src/couch_views/src/couch_views_indexer.erl | 3 ++- src/couch_views/src/couch_views_jobs.erl | 4 +++- src/couch_views/test/couch_views_cleanup_test.erl | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index bd1bd4de6..4d09fdb6d 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -44,7 +44,8 @@ spawn_link() -> init() -> - {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, #{}), + Opts = #{no_schedule => true}, + {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, Opts), Data = upgrade_data(Data0), #{ <<"db_name">> := DbName, diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl index a9ca168ee..4b0aa2660 100644 --- a/src/couch_views/src/couch_views_jobs.erl +++ b/src/couch_views/src/couch_views_jobs.erl @@ -134,7 +134,9 @@ job_id(#{name := DbName}, #mrst{sig = Sig}) -> job_id(DbName, Sig) -> HexSig = fabric2_util:to_hex(Sig), - <>. + % Put signature first in order to be able to use the no_schedule + % couch_jobs:accept/2 option + <>. job_data(Db, Mrst) -> diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl index b5e081a98..e4dcdceea 100644 --- a/src/couch_views/test/couch_views_cleanup_test.erl +++ b/src/couch_views/test/couch_views_cleanup_test.erl @@ -408,4 +408,4 @@ job_id(Db, DDoc) -> DbName = fabric2_db:name(Db), {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc), HexSig = fabric2_util:to_hex(Sig), - <>. + <>. -- cgit v1.2.1 From 2e938cae2083a16173b39f03b909c0d22201a443 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 3 Jun 2020 12:40:24 -0400 Subject: Fix couch_jobs accept timeout when no_schedule option is used When waiting to accept jobs and scheduling was used, timeout is limited based on the time scheduling parameter. When no_schedule option is used, time scheduling parameter is set to 0 always, and so in that case, we have to special-case the limit to return `infinity`. Later on when we wait for the watch to fire, the actual timeout can still be limited, by a separate user specified timeout option, but if user specifies `infinity` there and sets `#{no_schedule => true}` then we should respect and never return `{error, not_found}` in response. --- src/couch_jobs/src/couch_jobs.erl | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index 5f79407c5..88b4bf470 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -311,7 +311,7 @@ accept_loop(Type, NoSched, MaxSchedTime, Timeout) -> retry -> accept_loop(Type, NoSched, MaxSchedTime, Timeout); {not_found, PendingWatch} -> - case wait_pending(PendingWatch, MaxSchedTime, Timeout) of + case wait_pending(PendingWatch, MaxSchedTime, Timeout, NoSched) of {error, not_found} -> {error, not_found}; retry -> @@ -326,14 +326,14 @@ job(Type, JobId) -> #{job => true, type => Type, id => JobId}. -wait_pending(PendingWatch, _MaxSTime, 0) -> +wait_pending(PendingWatch, _MaxSTime, _UserTimeout = 0, _NoSched) -> erlfdb:cancel(PendingWatch, [flush]), {error, not_found}; -wait_pending(PendingWatch, MaxSTime, UserTimeout) -> +wait_pending(PendingWatch, MaxSTime, UserTimeout, NoSched) -> NowMSec = erlang:system_time(millisecond), Timeout0 = max(?MIN_ACCEPT_WAIT_MSEC, MaxSTime * 1000 - NowMSec), - Timeout = min(limit_timeout(Timeout0), UserTimeout), + Timeout = min(limit_timeout(Timeout0, NoSched), UserTimeout), try erlfdb:wait(PendingWatch, [{timeout, Timeout}]), ok @@ -348,7 +348,7 @@ wait_pending(PendingWatch, MaxSTime, UserTimeout) -> wait_any(Subs, Timeout0, ResendQ) when is_list(Subs) -> - Timeout = limit_timeout(Timeout0), + Timeout = limit_timeout(Timeout0, false), receive {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data0} = Msg -> case lists:keyfind(Ref, 2, Subs) of @@ -365,7 +365,7 @@ wait_any(Subs, Timeout0, ResendQ) when is_list(Subs) -> wait_any(Subs, State, Timeout0, ResendQ) when is_list(Subs) -> - Timeout = limit_timeout(Timeout0), + Timeout = limit_timeout(Timeout0, false), receive {?COUCH_JOBS_EVENT, Ref, Type, Id, MsgState, Data0} = Msg -> case lists:keyfind(Ref, 2, Subs) of @@ -385,10 +385,13 @@ wait_any(Subs, State, Timeout0, ResendQ) when end. -limit_timeout(Timeout) when is_integer(Timeout), Timeout < 16#FFFFFFFF -> +limit_timeout(_Timeout, true) -> + infinity; + +limit_timeout(Timeout, false) when is_integer(Timeout), Timeout < 16#FFFFFFFF -> Timeout; -limit_timeout(_Timeout) -> +limit_timeout(_Timeout, false) -> infinity. -- cgit v1.2.1 From 19ae50815ca1016719f94a2757e08757b37fb949 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 4 Jun 2020 12:19:10 -0400 Subject: Include database uuid in db info result As per ML [discussion](https://lists.apache.org/thread.html/rb328513fb932e231cf8793f92dd1cc2269044cb73cb43a6662c464a1%40%3Cdev.couchdb.apache.org%3E) add a `uuid` field to db info results in order to be able to uniquely identify a particular instance of a database. When a database is deleted and re-created with the same name, it will return a new `uuid` value. --- src/fabric/src/fabric2_fdb.erl | 12 ++++++++++-- src/fabric/test/fabric2_db_crud_tests.erl | 4 ++-- src/fabric/test/fabric2_db_misc_tests.erl | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 7e736b041..f721ca4ab 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -107,6 +107,7 @@ db_prefix, changes_future, meta_future, + uuid_future, retries = 0 }). @@ -494,6 +495,9 @@ get_info_future(Tx, DbPrefix) -> {reverse, true} ]), + UUIDKey = erlfdb_tuple:pack({?DB_CONFIG, <<"uuid">>}, DbPrefix), + UUIDFuture = erlfdb:get(Tx, UUIDKey), + StatsPrefix = erlfdb_tuple:pack({?DB_STATS}, DbPrefix), MetaFuture = erlfdb:get_range_startswith(Tx, StatsPrefix), @@ -508,7 +512,8 @@ get_info_future(Tx, DbPrefix) -> tx = SaveTx, db_prefix = DbPrefix, changes_future = ChangesFuture, - meta_future = MetaFuture + meta_future = MetaFuture, + uuid_future = UUIDFuture }. @@ -1986,6 +1991,7 @@ get_info_wait_int(#info_future{} = InfoFuture) -> #info_future{ db_prefix = DbPrefix, changes_future = ChangesFuture, + uuid_future = UUIDFuture, meta_future = MetaFuture } = InfoFuture, @@ -1998,6 +2004,8 @@ get_info_wait_int(#info_future{} = InfoFuture) -> end, CProp = {update_seq, RawSeq}, + UUIDProp = {uuid, erlfdb:wait(UUIDFuture)}, + MProps = lists:foldl(fun({K, V}, Acc) -> case erlfdb_tuple:unpack(K, DbPrefix) of {?DB_STATS, <<"doc_count">>} -> @@ -2014,7 +2022,7 @@ get_info_wait_int(#info_future{} = InfoFuture) -> end end, [{sizes, {[]}}], erlfdb:wait(MetaFuture)), - [CProp | MProps]. + [CProp, UUIDProp | MProps]. binary_chunk_size() -> diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index b529935be..000f3709c 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -631,7 +631,7 @@ get_info_wait_retry_on_tx_too_old(_) -> ok = erlfdb:set_option(Tx, disallow_writes), InfoF = fabric2_fdb:get_info_future(Tx, DbPrefix), - {info_future, _, _, ChangesF, _, _} = InfoF, + {info_future, _, _, ChangesF, _, _, _} = InfoF, raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1007}, 3), ?assertError({erlfdb_error, 1007}, fabric2_fdb:get_info_wait(InfoF)), @@ -659,7 +659,7 @@ get_info_wait_retry_on_tx_abort(_)-> ok = erlfdb:set_option(Tx, disallow_writes), InfoF = fabric2_fdb:get_info_future(Tx, DbPrefix), - {info_future, _, _, ChangesF, _, _} = InfoF, + {info_future, _, _, ChangesF, _, _, _} = InfoF, raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1025}, 3), ?assertError({erlfdb_error, 1025}, fabric2_fdb:get_info_wait(InfoF)), diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl index 2fad73f1f..23532144d 100644 --- a/src/fabric/test/fabric2_db_misc_tests.erl +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -75,7 +75,10 @@ empty_db_info({DbName, Db, _}) -> ?assertEqual(DbName, fabric2_util:get_value(db_name, Info)), ?assertEqual(0, fabric2_util:get_value(doc_count, Info)), ?assertEqual(0, fabric2_util:get_value(doc_del_count, Info)), - ?assert(is_binary(fabric2_util:get_value(update_seq, Info))). + ?assert(is_binary(fabric2_util:get_value(update_seq, Info))), + InfoUUID = fabric2_util:get_value(uuid, Info), + UUID = fabric2_db:get_uuid(Db), + ?assertEqual(UUID, InfoUUID). accessors({DbName, Db, _}) -> -- cgit v1.2.1 From ab93b155095643ab13f3779abc6c948fe273fe4b Mon Sep 17 00:00:00 2001 From: mauroporras Date: Thu, 28 May 2020 08:53:25 -0500 Subject: feat(auth): Allow a custom JWT claim for roles --- rel/overlay/etc/default.ini | 1 + src/couch/src/couch_httpd_auth.erl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 057ed4c1c..f3f12ca96 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -145,6 +145,7 @@ max_db_number_for_dbs_info_req = 100 ; can be the name of a claim like "exp" or a tuple if the claim requires ; a parameter ; required_claims = exp, {iss, "IssuerNameHere"} +; roles_claim_name = https://example.com/roles ; ; [jwt_keys] ; Configure at least one key here if using the JWT auth handler. diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 0d3add0c8..45a82bd0f 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -198,7 +198,7 @@ jwt_authentication_handler(Req) -> false -> throw({unauthorized, <<"Token missing sub claim.">>}); {_, User} -> Req#httpd{user_ctx=#user_ctx{ name = User, - roles = couch_util:get_value(<<"_couchdb.roles">>, Claims, []) + roles = couch_util:get_value(?l2b(config:get("jwt_auth", "roles_claim_name", "_couchdb.roles")), Claims, []) }} end; {error, Reason} -> -- cgit v1.2.1 From 10fae610f3463e215f37296acc40df1c62cbd8c4 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 5 Jun 2020 12:40:08 +0100 Subject: Report if FIPS mode is enabled This will only report "fips" in the welcome message if FIPS mode was enabled at boot (i.e, in vm.args). --- src/couch/src/couch_server.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index b2f8fdead..6db3f7448 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -246,6 +246,16 @@ init([]) -> % Mark being able to receive documents with an _access property as a supported feature config:enable_feature('access-ready'), + % Mark if fips is enabled + case + erlang:function_exported(crypto, info_fips, 0) andalso + crypto:info_fips() == enabled of + true -> + config:enable_feature('fips'); + false -> + ok + end, + % read config and register for configuration changes % just stop if one of the config settings change. couch_server_sup -- cgit v1.2.1 From 3536ad87088ad0901cf28213f006ddf7c52b85b5 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 4 Jun 2020 17:57:41 -0400 Subject: Split couch_views acceptors and workers Optimize couch_views by using a separate set of acceptors and workers. Previously, all `max_workers` where spawned on startup, and were to waiting to accept jobs in parallel. In a setup with a large number of pods, and 100 workers per pod, that could lead to a lot of conflicts being generated when all those workers race to accept the same job at the same time. The improvement is to spawn only a limited number of acceptors (5, by default), then, spawn more after some of them become workers. Also, when some workers finish or die with an error, check if more acceptors could be spawned. As an example, here is what might happen with `max_acceptors = 5` and `max_workers = 100` (`A` and `W` are the current counts of acceptors and workers, respectively): 1. Starting out: `A = 5, W = 0` 2. After 2 acceptors start running: `A = 3, W = 2` Then immediately 2 more acceptors are spawned: `A = 5, W = 2` 3. After 95 workers are started: `A = 5, W = 95` 4. Now if 3 acceptors accept, it would look like: `A = 2, W = 98` But no more acceptors would be started. 5. If the last 2 acceptors also accept jobs: `A = 0, W = 100` At this point no more indexing jobs can be accepted and started until at least one of the workers finish and exit. 6. If 1 worker exits: `A = 0, W = 99` An acceptor will be immediately spawned `A = 1, W = 99` 7. If all 99 workers exit, it will go back to: `A = 5, W = 0` --- rel/overlay/etc/default.ini | 12 +- src/couch_views/src/couch_views_indexer.erl | 3 + src/couch_views/src/couch_views_server.erl | 96 +++++++-- src/couch_views/test/couch_views_server_test.erl | 218 +++++++++++++++++++++ .../test/couch_views_trace_index_test.erl | 2 + 5 files changed, 309 insertions(+), 22 deletions(-) create mode 100644 src/couch_views/test/couch_views_server_test.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 43e1c0ba3..40a3b3179 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -294,11 +294,17 @@ iterations = 10 ; iterations for password hashing ; Settings for view indexing [couch_views] -; max_workers = 100 +; Maximum acceptors waiting to accept view indexing jobs +;max_acceptors = 5 +; +; Maximum number of view indexing workers +;max_workers = 100 +; ; The maximum allowed key size emitted from a view for a document (in bytes) -; key_size_limit = 8000 +;key_size_limit = 8000 +; ; The maximum allowed value size emitted from a view for a document (in bytes) -; value_size_limit = 64000 +;value_size_limit = 64000 ; CSP (Content Security Policy) Support for _utils [csp] diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 4d09fdb6d..31868d9c0 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -46,6 +46,9 @@ spawn_link() -> init() -> Opts = #{no_schedule => true}, {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, Opts), + + couch_views_server:accepted(self()), + Data = upgrade_data(Data0), #{ <<"db_name">> := DbName, diff --git a/src/couch_views/src/couch_views_server.erl b/src/couch_views/src/couch_views_server.erl index d14216e40..e45a9f315 100644 --- a/src/couch_views/src/couch_views_server.erl +++ b/src/couch_views/src/couch_views_server.erl @@ -20,6 +20,9 @@ start_link/0 ]). +-export([ + accepted/1 +]). -export([ init/1, @@ -30,7 +33,7 @@ code_change/3 ]). - +-define(MAX_ACCEPTORS, 5). -define(MAX_WORKERS, 100). @@ -38,20 +41,44 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). +accepted(Worker) when is_pid(Worker) -> + gen_server:call(?MODULE, {accepted, Worker}, infinity). + + init(_) -> process_flag(trap_exit, true), couch_views_jobs:set_timeout(), St = #{ + acceptors => #{}, workers => #{}, + max_acceptors => max_acceptors(), max_workers => max_workers() }, - {ok, spawn_workers(St)}. + {ok, spawn_acceptors(St)}. terminate(_, _St) -> ok. +handle_call({accepted, Pid}, _From, St) -> + #{ + acceptors := Acceptors, + workers := Workers + } = St, + case maps:is_key(Pid, Acceptors) of + true -> + St1 = St#{ + acceptors := maps:remove(Pid, Acceptors), + workers := Workers#{Pid => true} + }, + {reply, ok, spawn_acceptors(St1)}; + false -> + LogMsg = "~p : unknown acceptor processs ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), + {stop, {unknown_acceptor_pid, Pid}, St} + end; + handle_call(Msg, _From, St) -> {stop, {bad_call, Msg}, {bad_call, Msg}, St}. @@ -61,19 +88,16 @@ handle_cast(Msg, St) -> handle_info({'EXIT', Pid, Reason}, St) -> - #{workers := Workers} = St, - case maps:is_key(Pid, Workers) of - true -> - if Reason == normal -> ok; true -> - LogMsg = "~p : indexer process ~p exited with ~p", - couch_log:error(LogMsg, [?MODULE, Pid, Reason]) - end, - NewWorkers = maps:remove(Pid, Workers), - {noreply, spawn_workers(St#{workers := NewWorkers})}; - false -> - LogMsg = "~p : unknown process ~p exited with ~p", - couch_log:error(LogMsg, [?MODULE, Pid, Reason]), - {stop, {unknown_pid_exit, Pid}, St} + #{ + acceptors := Acceptors, + workers := Workers + } = St, + + % In Erlang 21+ could check map keys directly in the function head + case {maps:is_key(Pid, Acceptors), maps:is_key(Pid, Workers)} of + {true, false} -> handle_acceptor_exit(St, Pid, Reason); + {false, true} -> handle_worker_exit(St, Pid, Reason); + {false, false} -> handle_unknown_exit(St, Pid, Reason) end; handle_info(Msg, St) -> @@ -84,20 +108,54 @@ code_change(_OldVsn, St, _Extra) -> {ok, St}. -spawn_workers(St) -> +% Worker process exit handlers + +handle_acceptor_exit(#{acceptors := Acceptors} = St, Pid, Reason) -> + St1 = St#{acceptors := maps:remove(Pid, Acceptors)}, + LogMsg = "~p : acceptor process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {noreply, spawn_acceptors(St1)}. + + +handle_worker_exit(#{workers := Workers} = St, Pid, normal) -> + St1 = St#{workers := maps:remove(Pid, Workers)}, + {noreply, spawn_acceptors(St1)}; + +handle_worker_exit(#{workers := Workers} = St, Pid, Reason) -> + St1 = St#{workers := maps:remove(Pid, Workers)}, + LogMsg = "~p : indexer process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {noreply, spawn_acceptors(St1)}. + + +handle_unknown_exit(St, Pid, Reason) -> + LogMsg = "~p : unknown process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unknown_pid_exit, Pid}, St}. + + +spawn_acceptors(St) -> #{ workers := Workers, + acceptors := Acceptors, + max_acceptors := MaxAcceptors, max_workers := MaxWorkers } = St, - case maps:size(Workers) < MaxWorkers of + ACnt = maps:size(Acceptors), + WCnt = maps:size(Workers), + case ACnt < MaxAcceptors andalso (ACnt + WCnt) < MaxWorkers of true -> Pid = couch_views_indexer:spawn_link(), - NewSt = St#{workers := Workers#{Pid => true}}, - spawn_workers(NewSt); + NewSt = St#{acceptors := Acceptors#{Pid => true}}, + spawn_acceptors(NewSt); false -> St end. +max_acceptors() -> + config:get_integer("couch_views", "max_acceptors", ?MAX_ACCEPTORS). + + max_workers() -> config:get_integer("couch_views", "max_workers", ?MAX_WORKERS). diff --git a/src/couch_views/test/couch_views_server_test.erl b/src/couch_views/test/couch_views_server_test.erl new file mode 100644 index 000000000..23c807cc2 --- /dev/null +++ b/src/couch_views/test/couch_views_server_test.erl @@ -0,0 +1,218 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_server_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +couch_views_server_test_() -> + { + "Test couch_views_server", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(max_acceptors_started), + ?TDEF_FE(acceptors_become_workers), + ?TDEF_FE(handle_worker_death), + ?TDEF_FE(handle_acceptor_death), + ?TDEF_FE(handle_unknown_process_death), + ?TDEF_FE(max_workers_limit_works), + ?TDEF_FE(max_acceptors_greater_than_max_workers) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_rate, + couch_js, + couch_eval + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + config:set("couch_views", "max_acceptors", "2", false), + config:set("couch_views", "max_workers", "4", false), + meck:new(couch_views_server, [passthrough]), + meck:new(couch_views_indexer, [passthrough]), + meck:expect(couch_views_indexer, init, fun() -> + receive pls_accept -> ok end, + couch_views_server:accepted(self()), + receive pls_die -> ok end + end), + ok = application:start(couch_views). + + +foreach_teardown(_) -> + ok = application:stop(couch_views), + meck:unload(), + config:delete("couch_views", "max_acceptors", false), + config:delete("couch_views", "max_workers", false), + ok. + + +max_acceptors_started(_) -> + #{max_acceptors := MaxAcceptors, max_workers := MaxWorkers} = get_state(), + ?assertEqual(2, MaxAcceptors), + ?assertEqual(4, MaxWorkers), + + ?assertEqual(0, maps:size(workers())), + + [Pid1, Pid2] = maps:keys(acceptors()), + ?assert(is_pid(Pid1)), + ?assert(is_pid(Pid2)), + ?assert(is_process_alive(Pid1)), + ?assert(is_process_alive(Pid2)). + + +acceptors_become_workers(_) -> + ?assertEqual(0, maps:size(workers())), + + InitAcceptors = acceptors(), + accept_all(), + + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(2, maps:size(workers())), + + ?assertEqual(InitAcceptors, workers()). + + +handle_worker_death(_) -> + [Pid1, Pid2] = maps:keys(acceptors()), + accept_all(), + + % One worker exits normal + finish_normal([Pid1]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(1, maps:size(workers())), + + % The other blows up with an error + finish_error([Pid2]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(0, maps:size(workers())). + + +handle_acceptor_death(_) -> + [Pid1, Pid2] = maps:keys(acceptors()), + finish_error([Pid1]), + + NewAcceptors = acceptors(), + ?assertEqual(2, maps:size(NewAcceptors)), + ?assert(lists:member(Pid2, maps:keys(NewAcceptors))), + ?assert(not lists:member(Pid1, maps:keys(NewAcceptors))). + + +handle_unknown_process_death(_) -> + meck:reset(couch_views_server), + Pid = self(), + whereis(couch_views_server) ! {'EXIT', Pid, blah}, + meck:wait(1, couch_views_server, terminate, + [{unknown_pid_exit, Pid}, '_'], 5000). + + +max_workers_limit_works(_) -> + % Accept 2 jobs -> 2 workers + accept_all(), + ?assertEqual(2, maps:size(workers())), + + % Accept 2 more jobs -> 4 workers + accept_all(), + ?assertEqual(0, maps:size(acceptors())), + ?assertEqual(4, maps:size(workers())), + + % Kill 1 worker -> 1 acceptor and 3 workers + [Worker1 | _] = maps:keys(workers()), + finish_normal([Worker1]), + ?assertEqual(1, maps:size(acceptors())), + ?assertEqual(3, maps:size(workers())), + + % Kill 2 more workers -> 2 acceptors and 1 worker + [Worker2, Worker3 | _] = maps:keys(workers()), + finish_normal([Worker2, Worker3]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(1, maps:size(workers())), + + % Kill 1 last worker -> 2 acceptors and 0 workers + [Worker4] = maps:keys(workers()), + finish_normal([Worker4]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(0, maps:size(workers())). + +max_acceptors_greater_than_max_workers(_) -> + [Pid1, Pid2] = maps:keys(acceptors()), + + sys:replace_state(couch_views_server, fun(#{} = St) -> + St#{max_workers := 1} + end), + + accept_all(), + + finish_normal([Pid1]), + finish_normal([Pid2]), + + % Only 1 acceptor should start as it is effectively limited by max_workers + ?assertEqual(1, maps:size(acceptors())), + ?assertEqual(0, maps:size(workers())). + + +% Utility functions + +accept_all() -> + Acceptors = acceptors(), + meck:reset(couch_views_server), + [Pid ! pls_accept || Pid <- maps:keys(Acceptors)], + meck:wait(maps:size(Acceptors), couch_views_server, handle_call, 3, 5000). + + +acceptors() -> + #{acceptors := Acceptors} = get_state(), + Acceptors. + + +workers() -> + #{workers := Workers} = get_state(), + Workers. + + +get_state() -> + sys:get_state(couch_views_server, infinity). + + +finish_normal(Workers) when is_list(Workers) -> + meck:reset(couch_views_server), + [Pid ! pls_die || Pid <- Workers], + meck:wait(length(Workers), couch_views_server, handle_info, + [{'_', '_', normal}, '_'], 5000). + + +finish_error(Workers) when is_list(Workers) -> + meck:reset(couch_views_server), + [exit(Pid, badness) || Pid <- Workers], + meck:wait(length(Workers), couch_views_server, handle_info, + [{'_', '_', badness}, '_'], 5000). diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl index 5b15a4ce2..f5ea37982 100644 --- a/src/couch_views/test/couch_views_trace_index_test.erl +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -88,6 +88,8 @@ trace_single_doc(Db) -> meck:expect(couch_jobs, accept, 2, {ok, job, JobData}), meck:expect(couch_jobs, update, 3, {ok, job}), meck:expect(couch_jobs, finish, 3, ok), + meck:expect(couch_views_server, accepted, 1, ok), + put(erlfdb_trace, <<"views_write_one_doc">>), couch_views_indexer:init(), -- cgit v1.2.1 From a7803fb2023b72b684f8d2f1198363b9d6723400 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 9 Jun 2020 17:53:56 -0400 Subject: In replicator, when rescheduling, pick only pending jobs which are not running Previously, when pending jobs were picked in the `ets:foldl` traversal, both running and non-running jobs were considered and a large number of running jobs could displace pending jobs in the accumulator. In the worst case, no crashed jobs would be restarted during rescheduling. --- src/couch_replicator/src/couch_replicator_scheduler.erl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl index 53c040e8c..641443a7c 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler.erl @@ -456,6 +456,9 @@ pending_jobs(Count) when is_integer(Count), Count > 0 -> [Job || {_Started, Job} <- gb_sets:to_list(Set1)]. +pending_fold(#job{pid = Pid}, Acc) when is_pid(Pid) -> + Acc; + pending_fold(Job, {Set, Now, Count, HealthThreshold}) -> Set1 = case {not_recently_crashed(Job, Now, HealthThreshold), gb_sets:size(Set) >= Count} of @@ -1051,6 +1054,7 @@ scheduler_test_() -> [ t_pending_jobs_simple(), t_pending_jobs_skip_crashed(), + t_pending_jobs_skip_running(), t_one_job_starts(), t_no_jobs_start_if_max_is_0(), t_one_job_starts_if_max_is_1(), @@ -1112,6 +1116,18 @@ t_pending_jobs_skip_crashed() -> end). +t_pending_jobs_skip_running() -> + ?_test(begin + Job1 = continuous(1), + Job2 = continuous_running(2), + Job3 = oneshot(3), + Job4 = oneshot_running(4), + Jobs = [Job1, Job2, Job3, Job4], + setup_jobs(Jobs), + ?assertEqual([Job1, Job3], pending_jobs(4)) + end). + + t_one_job_starts() -> ?_test(begin setup_jobs([oneshot(1)]), -- cgit v1.2.1 From b17bc49aac9e20696ca530bb09e21cefa4e4d6d1 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 10 Jun 2020 17:22:36 -0400 Subject: Handle transaction and future timeouts in couch_jobs notifiers In an overload scenario do not let notifiers crash and lose their subscribers, instead make them more robust and let them retry on future or transaction timeouts. --- src/couch_jobs/src/couch_jobs_notifier.erl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs_notifier.erl b/src/couch_jobs/src/couch_jobs_notifier.erl index 1c554a0c0..ff4492bc5 100644 --- a/src/couch_jobs/src/couch_jobs_notifier.erl +++ b/src/couch_jobs/src/couch_jobs_notifier.erl @@ -115,7 +115,7 @@ handle_cast(Msg, St) -> handle_info({type_updated, VS}, St) -> VSMax = flush_type_updated_messages(VS), - {noreply, notify_subscribers(VSMax, St)}; + {noreply, try_notify_subscribers(VSMax, St)}; handle_info({Ref, ready}, St) when is_reference(Ref) -> % Don't crash out couch_jobs_server and the whole application would need to @@ -228,6 +228,15 @@ get_active_since(#st{jtx = JTx, type = Type, subs = Subs}, VS) -> end, maps:with(maps:keys(Subs), AllUpdated)). +try_notify_subscribers(ActiveVS, #st{} = St) -> + try + notify_subscribers(ActiveVS, St) + catch + error:{timeout, _} -> try_notify_subscribers(ActiveVS, St); + error:{erlfdb_error, 1031} -> try_notify_subscribers(ActiveVS, St) + end. + + notify_subscribers(_, #st{subs = Subs} = St) when map_size(Subs) =:= 0 -> St; -- cgit v1.2.1 From fa16e6a8ca1344a609421c7b2fe3a14fa2c85eaa Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 12 Jun 2020 13:35:57 -0400 Subject: Bump erlfdb to v1.2.2 https://github.com/apache/couchdb-erlfdb/releases/tag/v1.2.2 --- rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config.script b/rebar.config.script index c145566a3..caf69131d 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -152,7 +152,7 @@ DepDescs = [ %% Independent Apps {config, "config", {tag, "2.1.7"}}, {b64url, "b64url", {tag, "1.0.2"}}, -{erlfdb, "erlfdb", {tag, "v1.2.1"}}, +{erlfdb, "erlfdb", {tag, "v1.2.2"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, {khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.4"}}, -- cgit v1.2.1 From 3423d1936d918598fbe434a3382dd93ae4d0228a Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Fri, 12 Jun 2020 11:02:48 -0700 Subject: add back r and w options --- src/mango/src/mango_opts.erl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mango/src/mango_opts.erl b/src/mango/src/mango_opts.erl index e35767600..7bae9c90d 100644 --- a/src/mango/src/mango_opts.erl +++ b/src/mango/src/mango_opts.erl @@ -64,6 +64,12 @@ validate_idx_create({Props}) -> {optional, true}, {default, auto_name}, {validator, fun validate_idx_name/1} + ]}, + {<<"w">>, [ + {tag, w}, + {optional, true}, + {default, 2}, + {validator, fun is_pos_integer/1} ]} ], validate(Props, Opts). @@ -111,6 +117,12 @@ validate_find({Props}) -> {default, []}, {validator, fun validate_fields/1} ]}, + {<<"r">>, [ + {tag, r}, + {optional, true}, + {default, 1}, + {validator, fun mango_opts:is_pos_integer/1} + ]}, {<<"conflicts">>, [ {tag, conflicts}, {optional, true}, -- cgit v1.2.1 From 7e008d0fa3d4ba0fcc31989d3ded853683ecaf81 Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Tue, 16 Jun 2020 22:55:50 -0300 Subject: Reserve aegis namespace under ?CLUSTER_CONFIG --- src/fabric/include/fabric2.hrl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 29f3b029e..2eba4d5eb 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -28,6 +28,10 @@ -define(EXPIRING_CACHE, 53). -define(TX_IDS, 255). +% Cluster Level: (LayerPrefix, ?CLUSTER_CONFIG, X, ...) + +-define(AEGIS, 0). + % Database Level: (LayerPrefix, ?DBS, DbPrefix, X, ...) -define(DB_VERSION, 0). -- cgit v1.2.1 From 6659dbbd7c556b8dc00c075e331d7b106d44088d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bessenyei=20Bal=C3=A1zs=20Don=C3=A1t?= Date: Wed, 17 Jun 2020 20:09:11 +0200 Subject: Make restricted partition search parameters return bad request According to https://docs.couchdb.org/en/master/ddocs/search.html there are parameters for searches that are not allowed for partitioned queries. Those restrictions were not enforced, thus making the software and docs inconsistent. This commit adds them to validation so that the behavior matches the one described in the docs. --- src/dreyfus/src/dreyfus_httpd.erl | 22 ++++++++++--- .../test/elixir/test/partition_search_test.exs | 36 +++++++++++++++++++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/dreyfus/src/dreyfus_httpd.erl b/src/dreyfus/src/dreyfus_httpd.erl index 346f5ede6..f0a130ef2 100644 --- a/src/dreyfus/src/dreyfus_httpd.erl +++ b/src/dreyfus/src/dreyfus_httpd.erl @@ -447,10 +447,15 @@ validate_search_restrictions(Db, DDoc, Args) -> q = Query, partition = Partition, grouping = Grouping, - limit = Limit + limit = Limit, + counts = Counts, + drilldown = Drilldown, + ranges = Ranges } = Args, #grouping{ - by = GroupBy + by = GroupBy, + limit = GroupLimit, + sort = GroupSort } = Grouping, case Query of @@ -496,9 +501,18 @@ validate_search_restrictions(Db, DDoc, Args) -> parse_non_negative_int_param("limit", Limit, "max_limit", MaxLimit) end, - case GroupBy /= nil andalso is_binary(Partition) of + DefaultArgs = #index_query_args{}, + + case is_binary(Partition) andalso ( + Counts /= DefaultArgs#index_query_args.counts + orelse Drilldown /= DefaultArgs#index_query_args.drilldown + orelse Ranges /= DefaultArgs#index_query_args.ranges + orelse GroupSort /= DefaultArgs#index_query_args.grouping#grouping.sort + orelse GroupBy /= DefaultArgs#index_query_args.grouping#grouping.by + orelse GroupLimit /= DefaultArgs#index_query_args.grouping#grouping.limit + ) of true -> - Msg5 = <<"`group_by` and `partition` are incompatible">>, + Msg5 = <<"`partition` and any of `drilldown`, `ranges`, `group_field`, `group_sort`, `group_limit` or `group_by` are incompatible">>, throw({bad_request, Msg5}); false -> ok diff --git a/src/dreyfus/test/elixir/test/partition_search_test.exs b/src/dreyfus/test/elixir/test/partition_search_test.exs index 19a915ad3..121995449 100644 --- a/src/dreyfus/test/elixir/test/partition_search_test.exs +++ b/src/dreyfus/test/elixir/test/partition_search_test.exs @@ -21,7 +21,7 @@ defmodule PartitionSearchTest do } end - resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}, query: %{w: 3}) + resp = Couch.post("/#{db_name}/_bulk_docs", headers: ["Content-Type": "application/json"], body: %{:docs => docs}, query: %{w: 3}) assert resp.status_code in [201, 202] end @@ -166,7 +166,7 @@ defmodule PartitionSearchTest do resp = Couch.get(url, query: %{q: "some:field"}) assert resp.status_code == 200 ids = get_ids(resp) - assert ids == ["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"] + assert Enum.sort(ids) == Enum.sort(["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"]) end @tag :with_db @@ -179,7 +179,7 @@ defmodule PartitionSearchTest do resp = Couch.get(url, query: %{q: "some:field"}) assert resp.status_code == 200 ids = get_ids(resp) - assert ids == ["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"] + assert Enum.sort(ids) == Enum.sort(["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"]) end @tag :with_db @@ -192,7 +192,7 @@ defmodule PartitionSearchTest do resp = Couch.get(url, query: %{q: "some:field", limit: 3}) assert resp.status_code == 200 ids = get_ids(resp) - assert ids == ["bar:1", "bar:5", "bar:9"] + assert Enum.sort(ids) == Enum.sort(["bar:1", "bar:5", "bar:9"]) end @tag :with_db @@ -216,4 +216,32 @@ defmodule PartitionSearchTest do resp = Couch.post(url, body: %{q: "some:field", partition: "bar"}) assert resp.status_code == 400 end + + @tag :with_partitioned_db + test "restricted parameters are not allowed in query or body", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + body = %{q: "some:field", partition: "foo"} + + Enum.each( + [ + {:counts, "[\"type\"]"}, + {:group_field, "some"}, + {:ranges, :jiffy.encode(%{price: %{cheap: "[0 TO 100]"}})}, + {:drilldown, "[\"key\",\"a\"]"}, + ], + fn {key, value} -> + url = "/#{db_name}/_partition/foo/_design/library/_search/books" + bannedparam = Map.put(body, key, value) + get_resp = Couch.get(url, query: bannedparam) + %{:body => %{"reason" => get_reason}} = get_resp + assert Regex.match?(~r/are incompatible/, get_reason) + post_resp = Couch.post(url, body: bannedparam) + %{:body => %{"reason" => post_reason}} = post_resp + assert Regex.match?(~r/are incompatible/, post_reason) + end + ) + end end -- cgit v1.2.1 From 34baa46002a4ede723961a7d768eb25977965157 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Thu, 18 Jun 2020 14:55:38 +0200 Subject: fix: send CSP header to make Fauxotn work fully Co-authored-by: Robert Newson --- src/chttpd/src/chttpd_auth.erl.orig | 89 ++++++++++++++++++++++++++++++ src/chttpd/src/chttpd_misc.erl | 2 +- src/chttpd/test/eunit/chttpd_csp_tests.erl | 2 +- 3 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 src/chttpd/src/chttpd_auth.erl.orig diff --git a/src/chttpd/src/chttpd_auth.erl.orig b/src/chttpd/src/chttpd_auth.erl.orig new file mode 100644 index 000000000..607f09a8a --- /dev/null +++ b/src/chttpd/src/chttpd_auth.erl.orig @@ -0,0 +1,89 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_auth). + +-export([authenticate/2]). +-export([authorize/2]). + +-export([default_authentication_handler/1]). +-export([cookie_authentication_handler/1]). +-export([proxy_authentication_handler/1]). +-export([party_mode_handler/1]). + +-export([handle_session_req/1]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(SERVICE_ID, chttpd_auth). + + +%% ------------------------------------------------------------------ +%% API Function Definitions +%% ------------------------------------------------------------------ + +authenticate(HttpReq, Default) -> + maybe_handle(authenticate, [HttpReq], Default). + +authorize(HttpReq, Default) -> + maybe_handle(authorize, [HttpReq], Default). + + +%% ------------------------------------------------------------------ +%% Default callbacks +%% ------------------------------------------------------------------ + +default_authentication_handler(Req) -> + couch_httpd_auth:default_authentication_handler(Req, chttpd_auth_cache). + +cookie_authentication_handler(Req) -> + couch_httpd_auth:cookie_authentication_handler(Req, chttpd_auth_cache). + +proxy_authentication_handler(Req) -> + couch_httpd_auth:proxy_authentication_handler(Req). + +party_mode_handler(#httpd{method='POST', path_parts=[<<"_session">>]} = Req) -> + % See #1947 - users should always be able to attempt a login + Req#httpd{user_ctx=#user_ctx{}}; +party_mode_handler(Req) -> + RequireValidUser = config:get_boolean("chttpd", "require_valid_user", false), + ExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", true), + case RequireValidUser andalso not ExceptUp of + true -> + throw({unauthorized, <<"Authentication required.">>}); + false -> + case config:get("admins") of + [] -> + Req#httpd{user_ctx = ?ADMIN_USER}; + _ -> + Req#httpd{user_ctx=#user_ctx{}} + end + end. + +handle_session_req(Req) -> + couch_httpd_auth:handle_session_req(Req, chttpd_auth_cache). + + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +maybe_handle(Func, Args, Default) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + case couch_epi:decide(Handle, ?SERVICE_ID, Func, Args, []) of + no_decision when is_function(Default) -> + apply(Default, Args); + no_decision -> + Default; + {decided, Result} -> + Result + end. diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index ffb5295b5..830fea378 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -105,7 +105,7 @@ handle_utils_dir_req(Req, _) -> send_method_not_allowed(Req, "GET,HEAD"). maybe_add_csp_headers(Headers, "true") -> - DefaultValues = "default-src 'self'; img-src 'self' data:; font-src 'self'; " + DefaultValues = "child-src 'self' data: blob:; default-src 'self'; img-src 'self' data:; font-src 'self'; " "script-src 'self' 'unsafe-eval'; style-src 'self' 'unsafe-inline';", Value = config:get("csp", "header_value", DefaultValues), [{"Content-Security-Policy", Value} | Headers]; diff --git a/src/chttpd/test/eunit/chttpd_csp_tests.erl b/src/chttpd/test/eunit/chttpd_csp_tests.erl index e86436254..b80e3fee6 100644 --- a/src/chttpd/test/eunit/chttpd_csp_tests.erl +++ b/src/chttpd/test/eunit/chttpd_csp_tests.erl @@ -56,7 +56,7 @@ should_not_return_any_csp_headers_when_disabled(Url) -> should_apply_default_policy(Url) -> ?_assertEqual( - "default-src 'self'; img-src 'self' data:; font-src 'self'; " + "child-src 'self' data: blob:; default-src 'self'; img-src 'self' data:; font-src 'self'; " "script-src 'self' 'unsafe-eval'; style-src 'self' 'unsafe-inline';", begin {ok, _, Headers, _} = test_request:get(Url), -- cgit v1.2.1 From 074789f20ffc65411d01d58a3d007cbae83bf58c Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Mon, 18 May 2020 23:16:26 +0200 Subject: Upgrade Credo to 1.4.0 --- mix.exs | 2 +- mix.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mix.exs b/mix.exs index bab22f12f..ae42af5d6 100644 --- a/mix.exs +++ b/mix.exs @@ -70,7 +70,7 @@ defmodule CouchDBTest.Mixfile do {:jwtf, path: Path.expand("src/jwtf", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, - {:credo, "~> 1.3.1", only: [:dev, :test, :integration], runtime: false} + {:credo, "~> 1.4.0", only: [:dev, :test, :integration], runtime: false} ] end diff --git a/mix.lock b/mix.lock index 29151a77e..8b6489f0c 100644 --- a/mix.lock +++ b/mix.lock @@ -1,13 +1,13 @@ %{ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm", "805abd97539caf89ec6d4732c91e62ba9da0cda51ac462380bbd28ee697a8c42"}, - "credo": {:hex, :credo, "1.3.1", "082e8d9268a489becf8e7aa75671a7b9088b1277cd6c1b13f40a55554b3f5126", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "0da816ed52fa520b9ea0e5d18a0d3ca269e0bd410b1174d88d8abd94be6cce3c"}, + "credo": {:hex, :credo, "1.4.0", "92339d4cbadd1e88b5ee43d427b639b68a11071b6f73854e33638e30a0ea11f5", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "1fd3b70dce216574ce3c18bdf510b57e7c4c85c2ec9cad4bff854abaf7e58658"}, "excoveralls": {:hex, :excoveralls, "0.12.1", "a553c59f6850d0aff3770e4729515762ba7c8e41eedde03208182a8dc9d0ce07", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "5c1f717066a299b1b732249e736c5da96bb4120d1e55dc2e6f442d251e18a812"}, "hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "e0100f8ef7d1124222c11ad362c857d3df7cb5f4204054f9f0f4a728666591fc"}, "httpotion": {:hex, :httpotion, "3.1.3", "fdaf1e16b9318dcb722de57e75ac368c93d4c6e3c9125f93e960f953a750fb77", [:mix], [{:ibrowse, "== 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: false]}], "hexpm", "e420172ef697a0f1f4dc40f89a319d5a3aad90ec51fa424f08c115f04192ae43"}, "ibrowse": {:hex, :ibrowse, "4.4.0", "2d923325efe0d2cb09b9c6a047b2835a5eda69d8a47ed6ff8bc03628b764e991", [:rebar3], [], "hexpm"}, "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "4bdd305eb64e18b0273864920695cb18d7a2021f31a11b9c5fbcd9a253f936e2"}, - "jason": {:hex, :jason, "1.2.0", "10043418c42d2493d0ee212d3fddd25d7ffe484380afad769a0a38795938e448", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "116747dbe057794c3a3e4e143b7c8390b29f634e16c78a7f59ba75bfa6852e7f"}, + "jason": {:hex, :jason, "1.2.1", "12b22825e22f468c02eb3e4b9985f3d0cb8dc40b9bd704730efa11abd2708c44", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b659b8571deedf60f79c5a608e15414085fa141344e2716fbd6988a084b5f993"}, "jiffy": {:hex, :jiffy, "0.15.2", "de266c390111fd4ea28b9302f0bc3d7472468f3b8e0aceabfbefa26d08cd73b7", [:rebar3], [], "hexpm"}, "junit_formatter": {:hex, :junit_formatter, "3.0.0", "13950d944dbd295da7d8cc4798b8faee808a8bb9b637c88069954eac078ac9da", [:mix], [], "hexpm", "d77b7b9a1601185b18dfe7682b27c46d5d12721f12fdc75180a6fc573b4e64b1"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, -- cgit v1.2.1 From 42403914a8c86a26cf58363f0eaf35551400aa30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bessenyei=20Bal=C3=A1zs=20Don=C3=A1t?= Date: Fri, 19 Jun 2020 19:31:37 +0200 Subject: Allow drilldown for search to always be specified as list of lists To use multiple `drilldown` parameters users had to define `drilldown` multiple times to be able supply them. This caused interoperability issues as most languages require defining query parameters and request bodies as associative arrays, maps or dictionaries where the keys are unique. This change enables defining `drilldown` as a list of lists so that other languages can define multiple drilldown keys and values. Co-authored-by: Robert Newson --- src/dreyfus/src/dreyfus_httpd.erl | 2 + src/dreyfus/test/elixir/test/search_test.exs | 201 +++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 src/dreyfus/test/elixir/test/search_test.exs diff --git a/src/dreyfus/src/dreyfus_httpd.erl b/src/dreyfus/src/dreyfus_httpd.erl index f0a130ef2..007dace8f 100644 --- a/src/dreyfus/src/dreyfus_httpd.erl +++ b/src/dreyfus/src/dreyfus_httpd.erl @@ -239,6 +239,8 @@ validate_index_query(counts, Value, Args) -> Args#index_query_args{counts=Value}; validate_index_query(ranges, Value, Args) -> Args#index_query_args{ranges=Value}; +validate_index_query(drilldown, [[_|_]|_] = Value, Args) -> + Args#index_query_args{drilldown=Value}; validate_index_query(drilldown, Value, Args) -> DrillDown = Args#index_query_args.drilldown, Args#index_query_args{drilldown=[Value|DrillDown]}; diff --git a/src/dreyfus/test/elixir/test/search_test.exs b/src/dreyfus/test/elixir/test/search_test.exs new file mode 100644 index 000000000..e524a5cf4 --- /dev/null +++ b/src/dreyfus/test/elixir/test/search_test.exs @@ -0,0 +1,201 @@ +defmodule SearchTest do + use CouchTestCase + + @moduletag :search + + @moduledoc """ + Test search + """ + + def create_search_docs(db_name) do + resp = Couch.post("/#{db_name}/_bulk_docs", + headers: ["Content-Type": "application/json"], + body: %{:docs => [ + %{"item" => "apple", "place" => "kitchen", "state" => "new"}, + %{"item" => "banana", "place" => "kitchen", "state" => "new"}, + %{"item" => "carrot", "place" => "kitchen", "state" => "old"}, + %{"item" => "date", "place" => "lobby", "state" => "unknown"}, + ]} + ) + assert resp.status_code in [201, 202] + end + + def create_ddoc(db_name, opts \\ %{}) do + default_ddoc = %{ + indexes: %{ + fruits: %{ + analyzer: %{name: "standard"}, + index: "function (doc) {\n index(\"item\", doc.item, {facet: true});\n index(\"place\", doc.place, {facet: true});\n index(\"state\", doc.state, {facet: true});\n}" + } + } + } + + ddoc = Enum.into(opts, default_ddoc) + + resp = Couch.put("/#{db_name}/_design/inventory", body: ddoc) + assert resp.status_code in [201, 202] + assert Map.has_key?(resp.body, "ok") == true + end + + def get_items (resp) do + %{:body => %{"rows" => rows}} = resp + Enum.map(rows, fn row -> row["doc"]["item"] end) + end + + @tag :with_db + test "search returns all items for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot", "date"]) + end + + @tag :with_db + test "drilldown single key single value for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", drilldown: :jiffy.encode(["place", "kitchen"]), include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot"]) + end + + @tag :with_db + test "drilldown single key multiple values for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", drilldown: :jiffy.encode(["state", "new", "unknown"]), include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "date"]) + end + + @tag :with_db + test "drilldown multiple keys single values for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", drilldown: :jiffy.encode([["state", "old"], ["item", "apple"]]), include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == [] + end + + @tag :with_db + test "drilldown multiple query definitions for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits?q=*:*&drilldown=[\"state\",\"old\"]&drilldown=[\"item\",\"apple\"]&include_docs=true" + resp = Couch.get(url) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == [] + end + + + @tag :with_db + test "search returns all items for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot", "date"]) + end + + @tag :with_db + test "drilldown single key single value for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{query: "*:*", drilldown: ["place", "kitchen"], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot"]) + end + + @tag :with_db + test "drilldown single key multiple values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{query: "*:*", drilldown: ["state", "new", "unknown"], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "date"]) + end + + @tag :with_db + test "drilldown multiple keys single values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", drilldown: [["state", "old"], ["item", "apple"]], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == [] + end + + @tag :with_db + test "drilldown three keys single values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", drilldown: [["place", "kitchen"], ["state", "new"], ["item", "apple"]], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == ["apple"] + end + + @tag :with_db + test "drilldown multiple keys multiple values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", drilldown: [["state", "old", "new"], ["item", "apple"]], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == ["apple"] + end + + @tag :with_db + test "drilldown multiple query definitions for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: "{\"include_docs\": true, \"q\": \"*:*\", \"drilldown\": [\"state\", \"old\"], \"drilldown\": [\"item\", \"apple\"]}") + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == ["apple"] + end +end -- cgit v1.2.1 From afbe32ed13c316acb188b2fdd6648e7991a04d07 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 22 Jun 2020 14:04:00 -0700 Subject: Add max_bulk_get_count configuration option --- rel/overlay/etc/default.ini | 4 ++++ src/chttpd/src/chttpd.erl | 2 ++ src/chttpd/src/chttpd_db.erl | 5 +++++ src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl | 24 +++++++++++++++++++++- 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 40a3b3179..1c37765be 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -59,6 +59,10 @@ max_document_size = 8000000 ; bytes ; returns a 413 error for the whole request ;max_bulk_docs_count = 10000 ; +; Maximum number of documents in a _bulk_get request. Anything larger +; returns a 413 error for the whole request +;max_bulk_get_count = 10000 +; ; Maximum attachment size. ; max_attachment_size = infinity ; diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index e8639ed8d..eca936fed 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -958,6 +958,8 @@ error_info({request_entity_too_large, {attachment, AttName}}) -> {413, <<"attachment_too_large">>, AttName}; error_info({request_entity_too_large, {bulk_docs, Max}}) when is_integer(Max) -> {413, <<"max_bulk_docs_count_exceeded">>, integer_to_binary(Max)}; +error_info({request_entity_too_large, {bulk_get, Max}}) when is_integer(Max) -> + {413, <<"max_bulk_get_count_exceeded">>, integer_to_binary(Max)}; error_info({request_entity_too_large, DocID}) -> {413, <<"document_too_large">>, DocID}; error_info({error, security_migration_updates_disabled}) -> diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 5af98fe3a..fdaf4af8c 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -560,6 +560,11 @@ db_req(#httpd{method='POST', path_parts=[_, <<"_bulk_get">>], undefined -> throw({bad_request, <<"Missing JSON list of 'docs'.">>}); Docs -> + MaxDocs = config:get_integer("couchdb", "max_bulk_get_count", 10000), + case length(Docs) =< MaxDocs of + true -> ok; + false -> throw({request_entity_too_large, {bulk_get, MaxDocs}}) + end, #doc_query_args{ options = Options } = bulk_get_parse_doc_query(Req), diff --git a/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl b/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl index 2b04050a2..2826cda24 100644 --- a/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl +++ b/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl @@ -30,6 +30,7 @@ setup() -> ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), ok = config:set("couchdb", "max_document_size", "50"), ok = config:set("couchdb", "max_bulk_docs_count", "2"), + ok = config:set("couchdb", "max_bulk_get_count", "2"), TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), @@ -41,7 +42,9 @@ teardown(Url) -> delete_db(Url), ok = config:delete("admins", ?USER, _Persist=false), ok = config:delete("couchdb", "max_document_size"), - ok = config:delete("couchdb", "max_bulk_docs_count"). + ok = config:delete("couchdb", "max_bulk_docs_count"), + ok = config:delete("couchdb", "max_bulk_get_count"), + ok. create_db(Url) -> {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), @@ -70,6 +73,7 @@ all_test_() -> fun put_single_doc/1, fun bulk_doc/1, fun bulk_docs_too_many_docs/1, + fun bulk_get_too_many_docs/1, fun put_post_doc_attach_inline/1, fun put_multi_part_related/1, fun post_multi_part_form/1 @@ -120,6 +124,24 @@ bulk_docs_too_many_docs(Url) -> ?_assertEqual({413, ExpectJson}, {Code, ResultJson}). +bulk_get_too_many_docs(Url) -> + Docs = lists:map(fun(_) -> + {ok, 201, _, Body} = test_request:post(Url, + [?CONTENT_JSON, ?AUTH], "{}"), + {Props} = ?JSON_DECODE(Body), + {lists:keydelete(<<"ok">>, 1, Props)} + end, [1, 2, 3, 4]), + + {ok, Code, _, ResultBody} = test_request:post(Url ++ "/_bulk_get/", + [?CONTENT_JSON, ?AUTH], ?JSON_ENCODE({[{<<"docs">>, Docs}]})), + ResultJson = ?JSON_DECODE(ResultBody), + ExpectJson = {[ + {<<"error">>,<<"max_bulk_get_count_exceeded">>}, + {<<"reason">>,<<"2">>} + ]}, + ?_assertEqual({413, ExpectJson}, {Code, ResultJson}). + + put_post_doc_attach_inline(Url) -> Body1 = "{\"body\":\"This is a body.\",", Body2 = lists:concat(["{\"body\":\"This is a body it should fail", -- cgit v1.2.1 From c155bd544f106589e6137753e492e2329dfd1fb9 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Fri, 26 Jun 2020 17:29:45 +0200 Subject: Tests already ported to elixir --- test/javascript/tests/reduce_builtin.js | 1 + test/javascript/tests/reduce_false.js | 1 + 2 files changed, 2 insertions(+) diff --git a/test/javascript/tests/reduce_builtin.js b/test/javascript/tests/reduce_builtin.js index 4686841e3..77d8d1b34 100644 --- a/test/javascript/tests/reduce_builtin.js +++ b/test/javascript/tests/reduce_builtin.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.reduce_builtin = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); diff --git a/test/javascript/tests/reduce_false.js b/test/javascript/tests/reduce_false.js index 81b4c8a4f..69d8b0cf4 100644 --- a/test/javascript/tests/reduce_false.js +++ b/test/javascript/tests/reduce_false.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.reduce_false = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 5c49e0fbb36feec77d43bc8e23693796b250b887 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Fri, 26 Jun 2020 17:34:00 +0200 Subject: Skip tests as temporary views are not supported --- test/javascript/tests/reduce_false_temp.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/javascript/tests/reduce_false_temp.js b/test/javascript/tests/reduce_false_temp.js index 51b23bd6b..a13b4ab18 100644 --- a/test/javascript/tests/reduce_false_temp.js +++ b/test/javascript/tests/reduce_false_temp.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.skip = true; couchTests.reduce_false_temp = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 8d20399ceddd3325de1cec027c50497277620876 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:42:15 +0100 Subject: Initial commit A B+Tree (all values stored in leaves) with configurable order, where all data is stored in FoundationDB. The tree is balanced at all times. A bidirectional linked list is maintained between leaf nodes for efficient range queries in either direction. The FoundationDB keys are currently random UUID's. TODO 1. Rewrite inner node ids (non-root, non-leaf) so we can safely cache them outside of a transaction. 2. Store reduction values on inner nodes. 3. Chunkify large values over multiple rows? 4. Sequential node ids? --- rebar.config | 4 + src/ebtree.erl | 625 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 629 insertions(+) create mode 100644 rebar.config create mode 100644 src/ebtree.erl diff --git a/rebar.config b/rebar.config new file mode 100644 index 000000000..66db28481 --- /dev/null +++ b/rebar.config @@ -0,0 +1,4 @@ +{erl_opts, [debug_info]}. +{deps, [ + {erlfdb, {git, "https://github.com/apache/couchdb-erlfdb", {tag, "v1.2.2"}}} +]}. diff --git a/src/ebtree.erl b/src/ebtree.erl new file mode 100644 index 000000000..7c586b57b --- /dev/null +++ b/src/ebtree.erl @@ -0,0 +1,625 @@ +-module(ebtree). + +-export([ + init/3, + open/2, + insert/4, + delete/3, + lookup/3, + range/6, + reverse_range/6, + validate_tree/2 +]). + +-record(node, { + id, + level = 0, + prev, + next, + members = [] %% [{Key0, Value0 | Pointer0}, {Key1, Value1 | Pointer1}, ...] +}). + +-record(tree, { + prefix, + min, + max +}). + +-define(META, 0). +-define(META_ORDER, 0). + +-define(NODE, 1). +-define(NODE_ROOT_ID, <<0>>). + +-define(underflow(Tree, Node), Tree#tree.min > length(Node#node.members)). +-define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). +-define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). + +-compile(export_all). + +init(Db, Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> + erlfdb:transactional(Db, fun(Tx) -> + erlfdb:clear_range_startswith(Tx, Prefix), + set_meta(Tx, Prefix, ?META_ORDER, Order), + set_node(Tx, to_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}), + ok + end). + + +open(Db, Prefix) -> + erlfdb:transactional(Db, fun(Tx) -> + Order = get_meta(Tx, Prefix, ?META_ORDER), + to_tree(Prefix, Order) + end). + + +%% lookup + +lookup(Db, #tree{} = Tree, Key) -> + erlfdb:transactional(Db, fun(Tx) -> + lookup(Tx, Tree, get_node_wait(Tx, Tree, ?NODE_ROOT_ID), Key) + end). + +lookup(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> + find_value(Node, Key); + +lookup(Tx, #tree{} = Tree, #node{} = Node, Key) -> + ChildId = find_child_id(Node, Key), + lookup(Tx, Tree, get_node_wait(Tx, Tree, ChildId), Key). + +%% range (inclusive of both ends) + +range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> + erlfdb:transactional(Db, fun(Tx) -> + range(Tx, Tree, get_node_wait(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) + end). + +range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> + InRange = [{K, V} || {K, V} <- Node#node.members, K >= StartKey, K =< EndKey], + Acc1 = Fun(InRange, Acc0), + LastKey = last_key(Node), + case Node#node.next /= undefined andalso EndKey >= LastKey of + true -> + range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.next), StartKey, EndKey, Fun, Acc1); + false -> + Acc1 + end; + +range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> + ChildId = find_child_id(Node, StartKey), + range(Tx, Tree, get_node_wait(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). + +%% reverse range (inclusive of both ends) + +reverse_range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> + erlfdb:transactional(Db, fun(Tx) -> + reverse_range(Tx, Tree, get_node_wait(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) + end). + +reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> + InRange = [{K, V} || {K, V} <- Node#node.members, K >= StartKey, K =< EndKey], + Acc1 = Fun(lists:reverse(InRange), Acc0), + {FirstKey, _} = hd(Node#node.members), + case Node#node.prev /= undefined andalso StartKey =< FirstKey of + true -> + reverse_range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.prev), StartKey, EndKey, Fun, Acc1); + false -> + Acc1 + end; + +reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> + ChildId = find_child_id(Node, EndKey), + reverse_range(Tx, Tree, get_node_wait(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). + + +%% insert + +insert(Db, #tree{} = Tree, Key, Value) -> + erlfdb:transactional(Db, fun(Tx) -> + Root0 = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + case ?is_full(Tree, Root0) of + true -> + OldRoot = Root0#node{id = new_node_id()}, + LastKey = last_key(OldRoot), + Root1 = #node{ + id = ?NODE_ROOT_ID, + level = Root0#node.level + 1, + members = [{LastKey, OldRoot#node.id}]}, + Root2 = split_child(Tx, Tree, Root1, OldRoot), + insert_nonfull(Tx, Tree, Root2, Key, Value); + false -> + insert_nonfull(Tx, Tree, Root0, Key, Value) + end + end), + Tree. + +split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> + {LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members), + + LeftId = new_node_id(), + RightId = new_node_id(), + + LeftChild = remove_pointers_if_not_leaf(#node{ + id = LeftId, + level = Child#node.level, + prev = Child#node.prev, + next = RightId, + members = LeftMembers + }), + + RightChild = remove_pointers_if_not_leaf(#node{ + id = RightId, + level = Child#node.level, + prev = LeftId, + next = Child#node.next, + members = RightMembers + }), + + update_prev_neighbour(Tx, Tree, LeftChild), + update_next_neighbour(Tx, Tree, RightChild), + + %% adjust parent members + LastLeftKey = last_key(LeftMembers), + LastRightKey = last_key(RightMembers), + + Parent1 = Parent0#node{ + members = lists:keymerge(1, [{LastLeftKey, LeftId}, {LastRightKey, RightId}], + lists:keydelete(Child#node.id, 2, Parent0#node.members)) + }, + + clear_node(Tx, Tree, Child), + set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), + Parent1. + + +update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) -> + ok; + +update_prev_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> + Left = get_node_wait(Tx, Tree, Node#node.prev), + set_node(Tx, Tree, Left#node{next = Node#node.id}). + + +update_next_neighbour(_Tx, #tree{} = _Tree, #node{next = undefined} = _Node) -> + ok; + +update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> + Left = get_node_wait(Tx, Tree, Node#node.next), + set_node(Tx, Tree, Left#node{prev = Node#node.id}). + + +insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> + Node1 = Node0#node{ + members = lists:ukeymerge(1, [{Key, Value}], Node0#node.members) + }, + set_node(Tx, Tree, Node1); + +insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> + ChildId0 = find_child_id(Node0, Key), + Child0 = get_node_wait(Tx, Tree, ChildId0), + Node1 = case ?is_full(Tree, Child0) of + true -> + split_child(Tx, Tree, Node0, Child0); + false -> + Node0 + end, + ChildId1 = find_child_id(Node1, Key), + Child1 = get_node_wait(Tx, Tree, ChildId1), + insert_nonfull(Tx, Tree, Child1, Key, Value), + {CurrentKey, ChildId1} = lists:keyfind(ChildId1, 2, Node1#node.members), + Node2 = Node1#node{ + members = lists:keyreplace(ChildId1, 2, Node1#node.members, {max(Key, CurrentKey), ChildId1}) + }, + set_node(Tx, Tree, Node2). + +%% delete + +delete(Db, #tree{} = Tree, Key) -> + erlfdb:transactional(Db, fun(Tx) -> + Root0 = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + case delete(Tx, Tree, Root0, Key) of + % if only one child, make it the new root. + #node{level = L, members = [_]} = Root1 when L > 0 -> + [{_, ChildId}] = Root1#node.members, + Root2 = get_node_wait(Tx, Tree, ChildId), + clear_node(Tx, Tree, Root2), + set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); + Root1 -> + set_node(Tx, Tree, Root1) + end + end), + Tree. + + +delete(Tx, #tree{} = Tree, #node{level = 0} = Node, Key) -> + Node#node{ + members = lists:keydelete(Key, 1, Node#node.members) + }; + +delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> + ChildId0 = find_child_id(Parent0, Key), + Child0 = get_node_wait(Tx, Tree, ChildId0), + Child1 = delete(Tx, Tree, Child0, Key), + case ?underflow(Tree, Child1) of + true -> + SiblingId = find_sibling_id(Parent0, ChildId0, Key), + Sibling = get_node_wait(Tx, Tree, SiblingId), + NewNodes = case ?at_min(Tree, Sibling) of + true -> + Merged = merge(Child1, Sibling), + update_prev_neighbour(Tx, Tree, Merged), + update_next_neighbour(Tx, Tree, Merged), + [Merged]; + false -> + {Left, Right} = rebalance(Child1, Sibling), + update_prev_neighbour(Tx, Tree, Left), + update_next_neighbour(Tx, Tree, Right), + [Left, Right] + end, + + %% remove old nodes and insert new nodes + Members0 = Parent0#node.members, + Members1 = lists:keydelete(ChildId0, 2, Members0), + Members2 = lists:keydelete(Sibling#node.id, 2, Members1), + Members3 = lists:foldl(fun(N, Acc) -> + lists:merge([{last_key(N), N#node.id}], Acc) + end, Members2, NewNodes), + Parent1 = Parent0#node{ + %% TODO change id + members = Members3 + }, + + clear_nodes(Tx, Tree, [Child0, Sibling]), + set_nodes(Tx, Tree, NewNodes), + Parent1; + false -> + set_node(Tx, Tree, Child1), + Parent0 + end. + + +merge(#node{members = RightM} = Right, #node{members = LeftM} = Left) when RightM > LeftM -> + merge(Left, Right); + +merge(#node{level = Level} = Left, #node{level = Level} = Right) -> + #node{ + id = new_node_id(), + level = Level, + prev = Left#node.prev, + next = Right#node.next, + members = lists:append(Left#node.members, Right#node.members) + }. + + +rebalance(#node{members = RightM} = Right, #node{members = LeftM} = Left) when RightM > LeftM -> + rebalance(Left, Right); + +rebalance(#node{level = Level} = Left0, #node{level = Level} = Right0) -> + Members = lists:append(Left0#node.members, Right0#node.members), + {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), + + Left1Id = new_node_id(), + Right1Id = new_node_id(), + + Left1 = Left0#node{ + id = Left1Id, + next = Right1Id, + members = LeftMembers + }, + Right1 = Right0#node{ + id = Right1Id, + prev = Left1Id, + members = RightMembers + }, + {Left1, Right1}. + + +%% lookup functions + +find_value(#node{level = 0} = Node, Key) -> + lists:keyfind(Key, 1, Node#node.members). + + +find_child_id(#node{level = L} = Node, Key) when L > 0 -> + find_child_id_int(Node#node.members, Key). + +find_child_id_int([{K, V}], Key) when Key > K -> + V; + +find_child_id_int([{K, V} | _Rest], Key) when Key =< K -> + V; + +find_child_id_int([_ | Rest], Key) -> + find_child_id_int(Rest, Key). + + +find_sibling_id(#node{level = L} = Node0, Id, Key) when L > 0 -> + Node1 = Node0#node{members = lists:keydelete(Id, 2, Node0#node.members)}, + find_child_id(Node1, Key). + +%% metadata functions + +get_meta(Tx, #tree{} = Tree, MetaKey) -> + get_meta(Tx, Tree#tree.prefix, MetaKey); + +get_meta(Tx, Prefix, MetaKey) when is_binary(Prefix) -> + decode_value(erlfdb:wait(erlfdb:get(Tx, meta_key(Prefix, MetaKey)))). + + +set_meta(Tx, Prefix, MetaKey, MetaValue) -> + erlfdb:set( + Tx, + meta_key(Prefix, MetaKey), + encode_value(MetaValue) + ). + +meta_key(Prefix, MetaKey) when is_binary(Prefix) -> + erlfdb_tuple:pack({?META, MetaKey}, Prefix). + +%% node persistence functions + +get_nodes(Tx, #tree{} = Tree, [Ids]) -> + Futures = lists:map(fun(Id) -> {Id, get_node_future(Tx, Tree, Id)} end, Ids), + lists:map(fun({Id, Future}) -> get_node(Id, Future) end, Futures). + + +get_node_wait(Tx, #tree{} = Tree, Id) -> + get_node(Id, get_node_future(Tx, Tree, Id)). + + +get_node(Id, Future) -> + decode_node(Id, erlfdb:wait(Future)). + + +get_node_future(Tx, #tree{} = Tree, Id) -> + Key = node_key(Tree#tree.prefix, Id), + erlfdb:get(Tx, Key). + + +clear_nodes(Tx, #tree{} = Tree, Nodes) -> + lists:foreach(fun(Node) -> + clear_node(Tx, Tree, Node) + end, Nodes). + + +clear_node(Tx, #tree{} = Tree, #node{} = Node) -> + Key = node_key(Tree#tree.prefix, Node#node.id), + erlfdb:clear(Tx, Key). + + +set_nodes(Tx, #tree{} = Tree, Nodes) -> + lists:foreach(fun(Node) -> + set_node(Tx, Tree, Node) + end, Nodes). + + +set_node(Tx, #tree{} = Tree, #node{} = Node) -> + validate_node(Tree, Node), + Key = node_key(Tree#tree.prefix, Node#node.id), + Value = encode_node(Node), + erlfdb:set(Tx, Key, Value). + + +node_key(Prefix, Id) when is_binary(Prefix), is_binary(Id) -> + erlfdb_tuple:pack({?NODE, Id}, Prefix). + + +validate_tree(Db, #tree{} = Tree) -> + erlfdb:transactional(Db, fun(Tx) -> + Root = get_node_wait(Db, Tree, ?NODE_ROOT_ID), + validate_tree(Tx, Tree, Root) + end). + +validate_tree(_Tx, #tree{} = Tree, #node{level = 0} = Node) -> + print_node(Node), + validate_node(Tree, Node); + +validate_tree(Tx, #tree{} = Tree, #node{} = Node) -> + print_node(Node), + validate_node(Tree, Node), + validate_tree(Tx, Tree, Node#node.members); + +validate_tree(_Tx, #tree{} = _Tree, []) -> + ok; + +validate_tree(Tx, #tree{} = Tree, [{_, NodeId} | Rest]) -> + Node = get_node_wait(Tx, Tree, NodeId), + validate_tree(Tx, Tree, Node), + validate_tree(Tx, Tree, Rest). + + +validate_node(#tree{} = Tree, #node{} = Node) -> + NumKeys = length(Node#node.members), + IsRoot = ?NODE_ROOT_ID == Node#node.id, + if + Node#node.id == undefined -> + erlang:error({node_without_id, Node}); + not IsRoot andalso NumKeys < Tree#tree.min -> + erlang:error({too_few_keys, Node}); + NumKeys > Tree#tree.max -> + erlang:error({too_many_keys, Node}); + true -> + ok + end. + + +%% data marshalling functions (encodes unnecesary fields as a NIL_REF) + +encode_node(#node{prev = undefined} = Node) -> + encode_node(Node#node{prev = []}); + +encode_node(#node{next = undefined} = Node) -> + encode_node(Node#node{next = []}); + +encode_node(#node{} = Node) -> + encode_value(Node#node{id = []}). + + +decode_node(Id, Bin) when is_binary(Bin) -> + decode_node(Id, decode_value(Bin)); + +decode_node(Id, #node{prev = []} = Node) -> + decode_node(Id, Node#node{prev = undefined}); + +decode_node(Id, #node{next = []} = Node) -> + decode_node(Id, Node#node{next = undefined}); + +decode_node(Id, #node{} = Node) -> + Node#node{id = Id}. + + +encode_value(Value) -> + term_to_binary(Value, [compressed, {minor_version, 2}]). + + +decode_value(Bin) when is_binary(Bin) -> + binary_to_term(Bin, [safe]). + +%% private functions + +to_tree(Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> + #tree{ + prefix = Prefix, + min = Order div 2, + max = Order + }. + + +last_key(#node{} = Node) -> + last_key(Node#node.members); + +last_key(Members) when is_list(Members) -> + element(1, lists:last(Members)). + + +new_node_id() -> + crypto:strong_rand_bytes(16). + + +new_node_id_unless_root(#node{id = ?NODE_ROOT_ID} = Node) -> + Node; + +new_node_id_unless_root(#node{} = Node) -> + Node#node{id = new_node_id()}. + + +%% remove prev/next pointers for nonleaf nodes +remove_pointers_if_not_leaf(#node{level = 0} = Node) -> + Node; + +remove_pointers_if_not_leaf(#node{} = Node) -> + Node#node{prev = undefined, next = undefined}. + + +print_node(#node{level = 0} = Node) -> + io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~w}~n~n", + [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), Node#node.members]); + +print_node(#node{} = Node) -> + io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n", + [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), + [io_lib:format("{~w, ~s} ", [K, b64(V)]) || {K, V} <- Node#node.members]]). + + +b64(undefined) -> + undefined; + +b64(Bin) -> + base64:encode(Bin). + +%% tests + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +lookup_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + Tree = ?MODULE:open(Db, <<1,2,3>>), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], + lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys). + + +delete_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + Tree = ?MODULE:open(Db, <<1,2,3>>), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], + lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual(false, ?MODULE:lookup(Db, Tree, Key)) end, Keys). + + +range_after_delete_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + Tree = ?MODULE:open(Db, <<1,2,3>>), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], + lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, lists:seq(1, 100, 2)), + ?assertEqual(50, ?MODULE:range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)), + ?assertEqual(50, ?MODULE:reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). + + +intense_lookup_test_() -> + [ + {timeout, 1000, fun() -> lookup_test_fun(1000, 20) end}, + {timeout, 1000, fun() -> lookup_test_fun(1000, 50) end}, + {timeout, 1000, fun() -> lookup_test_fun(1000, 500) end} + ]. + + +lookup_test_fun(Max, Order) -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, Order), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max, 2)])], + T0 = erlang:monotonic_time(), + Tree = lists:foldl(fun(Key, T) -> ?MODULE:insert(Db, T, Key, Key + 1) end, ?MODULE:open(Db, <<1,2,3>>), Keys), + T1 = erlang:monotonic_time(), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys), + T2 = erlang:monotonic_time(), + ?debugFmt("~B order. ~B iterations. insert rate: ~.2f/s, lookup rate: ~.2f/s", + [Order, Max, Max / sec(T1 - T0), Max / sec(T2 - T1)]). + + +range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 10), + Max = 1000, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + Tree = lists:foldl(fun(Key, T) -> ?MODULE:insert(Db, T, Key, Key + 1) end, ?MODULE:open(Db, <<1,2,3>>), Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), + ?assertEqual(EndKey - StartKey + 1, + ?MODULE:range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) + ) end, + lists:seq(1, 1000)) + end}. + + +reverse_range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 10), + Max = 1000, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + Tree = lists:foldl(fun(Key, T) -> ?MODULE:insert(Db, T, Key, Key + 1) end, ?MODULE:open(Db, <<1,2,3>>), Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), + ?assertEqual(EndKey - StartKey + 1, + ?MODULE:reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) + ) end, + lists:seq(1, 1000)) + end}. + + +sec(Native) -> + max(1, erlang:convert_time_unit(Native, native, second)). + +-endif. -- cgit v1.2.1 From 6044f0adca38abea54f06dc61eeb96ee3d6059a7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:46:32 +0100 Subject: readme --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 000000000..874fc0fe6 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +A B+Tree (all values stored in leaves) with configurable order, where +all data is stored in FoundationDB. + +The tree is balanced at all times. A bidirectional linked list is +maintained between leaf nodes for efficient range queries in either +direction. + +The FoundationDB keys are currently random UUID's. + +TODO + +1. Rewrite inner node ids (non-root, non-leaf) so we can safely cache + them outside of a transaction. +2. Store reduction values on inner nodes. +3. Chunkify large values over multiple rows? +4. Sequential node ids? -- cgit v1.2.1 From cbacdecf69c1773565004c1d4ab0be8971d09cec Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:48:58 +0100 Subject: Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 874fc0fe6..0fe04c338 100644 --- a/README.md +++ b/README.md @@ -14,3 +14,4 @@ TODO 2. Store reduction values on inner nodes. 3. Chunkify large values over multiple rows? 4. Sequential node ids? +5. encode values in a non-erlang way? -- cgit v1.2.1 From 51fd72aff8d2a73cfd4beae43968766d03dd4bf8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:50:13 +0100 Subject: Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0fe04c338..9f9b9e2ba 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,8 @@ all data is stored in FoundationDB. The tree is balanced at all times. A bidirectional linked list is maintained between leaf nodes for efficient range queries in either -direction. +direction. You can pass in an fdb Db or open Tx, the latter is vastly +efficient for multiple inserts, so batch if you can. The FoundationDB keys are currently random UUID's. -- cgit v1.2.1 From 5b77d53f856290bf45bec19d16a2535f995337e8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:54:48 +0100 Subject: oops forgot the .app --- src/ebtree.app.src | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/ebtree.app.src diff --git a/src/ebtree.app.src b/src/ebtree.app.src new file mode 100644 index 000000000..bd5a00663 --- /dev/null +++ b/src/ebtree.app.src @@ -0,0 +1,14 @@ +{application, ebtree, + [{description, "An OTP library"}, + {vsn, "0.1.0"}, + {registered, []}, + {applications, + [kernel, + stdlib + ]}, + {env,[]}, + {modules, []}, + + {licenses, ["Apache 2.0"]}, + {links, []} + ]}. -- cgit v1.2.1 From b66184a6e09de0bbb823be35b46ce0014cf8f468 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:56:33 +0100 Subject: get vsn from git --- src/ebtree.app.src | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ebtree.app.src b/src/ebtree.app.src index bd5a00663..995238785 100644 --- a/src/ebtree.app.src +++ b/src/ebtree.app.src @@ -1,6 +1,6 @@ {application, ebtree, [{description, "An OTP library"}, - {vsn, "0.1.0"}, + {vsn, git}, {registered, []}, {applications, [kernel, -- cgit v1.2.1 From d36e80126bb71caabb4974d0152fbde403f0a9fb Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 11:01:29 +0100 Subject: fix compilation warnings --- src/ebtree.erl | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 7c586b57b..aa59b26b7 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -35,7 +35,6 @@ -define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). -define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). --compile(export_all). init(Db, Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> erlfdb:transactional(Db, fun(Tx) -> @@ -231,7 +230,7 @@ delete(Db, #tree{} = Tree, Key) -> Tree. -delete(Tx, #tree{} = Tree, #node{level = 0} = Node, Key) -> +delete(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> Node#node{ members = lists:keydelete(Key, 1, Node#node.members) }; @@ -358,11 +357,6 @@ meta_key(Prefix, MetaKey) when is_binary(Prefix) -> %% node persistence functions -get_nodes(Tx, #tree{} = Tree, [Ids]) -> - Futures = lists:map(fun(Id) -> {Id, get_node_future(Tx, Tree, Id)} end, Ids), - lists:map(fun({Id, Future}) -> get_node(Id, Future) end, Futures). - - get_node_wait(Tx, #tree{} = Tree, Id) -> get_node(Id, get_node_future(Tx, Tree, Id)). @@ -496,13 +490,6 @@ new_node_id() -> crypto:strong_rand_bytes(16). -new_node_id_unless_root(#node{id = ?NODE_ROOT_ID} = Node) -> - Node; - -new_node_id_unless_root(#node{} = Node) -> - Node#node{id = new_node_id()}. - - %% remove prev/next pointers for nonleaf nodes remove_pointers_if_not_leaf(#node{level = 0} = Node) -> Node; -- cgit v1.2.1 From 844ee87d8c2ecbc71a0979047bb4906bce8ddfbd Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 29 Jun 2020 08:28:37 -0700 Subject: Start all required deps automatically in test --- src/couch_epi/test/eunit/couch_epi_tests.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/couch_epi/test/eunit/couch_epi_tests.erl b/src/couch_epi/test/eunit/couch_epi_tests.erl index 12d8610c1..23b9e6103 100644 --- a/src/couch_epi/test/eunit/couch_epi_tests.erl +++ b/src/couch_epi/test/eunit/couch_epi_tests.erl @@ -162,7 +162,8 @@ start_epi(Plugins) -> Module end, Plugins), application:set_env(couch_epi, plugins, PluginsModules), - application:start(couch_epi). + {ok, _} = application:ensure_all_started(couch_epi), + ok. setup(data_file) -> error_logger:tty(false), -- cgit v1.2.1 From c6940d857d86c83c1aa69f068b1c503428b7b6e8 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Sun, 28 Jun 2020 20:51:26 +0200 Subject: Port reader_acl test into elixir test suite --- test/elixir/README.md | 4 +- test/elixir/test/reader_acl_test.exs | 254 +++++++++++++++++++++++++++++++++++ test/javascript/tests/reader_acl.js | 1 + 3 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/reader_acl_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index dfa4c62b3..80879afdc 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -62,11 +62,11 @@ X means done, - means partially - [X] Port multiple_rows.js - [X] Port proxyauth.js - [X] Port purge.js - - [ ] Port reader_acl.js + - [X] Port reader_acl.js - [X] Port recreate_doc.js - [X] Port reduce_builtin.js - [X] Port reduce_false.js - - [ ] Port reduce_false_temp.js + - [ ] ~~Port reduce_false_temp.js~~ - [X] Port reduce.js - [X] Port replication.js - [X] Port replicator_db_bad_rep_id.js diff --git a/test/elixir/test/reader_acl_test.exs b/test/elixir/test/reader_acl_test.exs new file mode 100644 index 000000000..f65e7cbf6 --- /dev/null +++ b/test/elixir/test/reader_acl_test.exs @@ -0,0 +1,254 @@ +defmodule ReaderACLTest do + use CouchTestCase + + @moduletag :authentication + + @users_db_name "custom-users" + @password "funnybone" + + @moduletag config: [ + { + "chttpd_auth", + "authentication_db", + @users_db_name + }, + { + "couch_httpd_auth", + "authentication_db", + @users_db_name + } + ] + setup do + # Create db if not exists + Couch.put("/#{@users_db_name}") + + # create a user with top-secret-clearance + user_doc = + prepare_user_doc([ + {:name, "bond@apache.org"}, + {:password, @password}, + {:roles, ["top-secret"]} + ]) + + {:ok, _} = create_doc(@users_db_name, user_doc) + + # create a user with top-secret-clearance + user_doc = + prepare_user_doc([ + {:name, "juanjo@apache.org"}, + {:password, @password} + ]) + + {:ok, _} = create_doc(@users_db_name, user_doc) + + on_exit(&tear_down/0) + + :ok + end + + defp tear_down do + delete_db(@users_db_name) + end + + defp login(user, password) do + sess = Couch.login(user, password) + assert sess.cookie, "Login correct is expected" + sess + end + + defp logout(session) do + assert Couch.Session.logout(session).body["ok"] + end + + defp open_as(db_name, doc_id, options) do + use_session = Keyword.get(options, :use_session) + user = Keyword.get(options, :user) + expect_response = Keyword.get(options, :expect_response, 200) + expect_message = Keyword.get(options, :error_message) + + session = use_session || login(user, @password) + + resp = + Couch.Session.get( + session, + "/#{db_name}/#{URI.encode(doc_id)}" + ) + + if use_session == nil do + logout(session) + end + + assert resp.status_code == expect_response + + if expect_message != nil do + assert resp.body["error"] == expect_message + end + + resp.body + end + + defp set_security(db_name, security, expect_response \\ 200) do + resp = Couch.put("/#{db_name}/_security", body: security) + assert resp.status_code == expect_response + end + + @tag :with_db + test "unrestricted db can be read", context do + db_name = context[:db_name] + + doc = %{_id: "baz", foo: "bar"} + {:ok, _} = create_doc(db_name, doc) + + # any user can read unrestricted db + open_as(db_name, "baz", user: "juanjo@apache.org") + open_as(db_name, "baz", user: "bond@apache.org") + end + + @tag :with_db + test "restricted db can be read by authorized users", context do + db_name = context[:db_name] + + doc = %{_id: "baz", foo: "bar"} + {:ok, _} = create_doc(db_name, doc) + + security = %{ + members: %{ + roles: ["super-secret-club"], + names: ["joe", "barb"] + } + } + + set_security(db_name, security) + + # can't read it as bond is missing the needed role + open_as(db_name, "baz", user: "bond@apache.org", expect_response: 403) + + # make anyone with the top-secret role an admin + # db admins are automatically members + security = %{ + admins: %{ + roles: ["top-secret"], + names: [] + }, + members: %{ + roles: ["super-secret-club"], + names: ["joe", "barb"] + } + } + + set_security(db_name, security) + + # db admin can read + open_as(db_name, "baz", user: "bond@apache.org") + + # admin now adds the top-secret role to the db's members + # and removes db-admins + security = %{ + admins: %{ + roles: [], + names: [] + }, + members: %{ + roles: ["super-secret-club", "top-secret"], + names: ["joe", "barb"] + } + } + + set_security(db_name, security) + + # server _admin can always read + resp = Couch.get("/#{db_name}/baz") + assert resp.status_code == 200 + + open_as(db_name, "baz", user: "bond@apache.org") + end + + @tag :with_db + test "works with readers (backwards compat with 1.0)", context do + db_name = context[:db_name] + + doc = %{_id: "baz", foo: "bar"} + {:ok, _} = create_doc(db_name, doc) + + security = %{ + admins: %{ + roles: [], + names: [] + }, + readers: %{ + roles: ["super-secret-club", "top-secret"], + names: ["joe", "barb"] + } + } + + set_security(db_name, security) + open_as(db_name, "baz", user: "bond@apache.org") + end + + @tag :with_db + test "can't set non string reader names or roles", context do + db_name = context[:db_name] + + security = %{ + members: %{ + roles: ["super-secret-club", %{"top-secret": "awesome"}], + names: ["joe", "barb"] + } + } + + set_security(db_name, security, 500) + + security = %{ + members: %{ + roles: ["super-secret-club", "top-secret"], + names: ["joe", 22] + } + } + + set_security(db_name, security, 500) + + security = %{ + members: %{ + roles: ["super-secret-club", "top-secret"], + names: "joe" + } + } + + set_security(db_name, security, 500) + end + + @tag :with_db + test "members can query views", context do + db_name = context[:db_name] + + doc = %{_id: "baz", foo: "bar"} + {:ok, _} = create_doc(db_name, doc) + + security = %{ + admins: %{ + roles: [], + names: [] + }, + members: %{ + roles: ["super-secret-club", "top-secret"], + names: ["joe", "barb"] + } + } + + set_security(db_name, security) + + view = %{ + _id: "_design/foo", + views: %{ + bar: %{ + map: "function(doc){emit(null, null)}" + } + } + } + + {:ok, _} = create_doc(db_name, view) + + # members can query views + open_as(db_name, "_design/foo/_view/bar", user: "bond@apache.org") + end +end diff --git a/test/javascript/tests/reader_acl.js b/test/javascript/tests/reader_acl.js index 8dc28aae9..d5a923549 100644 --- a/test/javascript/tests/reader_acl.js +++ b/test/javascript/tests/reader_acl.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.reader_acl = function(debug) { // this tests read access control -- cgit v1.2.1 From 2528903f38a5a2ac57b87bf46141d8298587da1c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 18:47:37 +0100 Subject: Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f9b9e2ba..d2c7a304e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ all data is stored in FoundationDB. The tree is balanced at all times. A bidirectional linked list is maintained between leaf nodes for efficient range queries in either direction. You can pass in an fdb Db or open Tx, the latter is vastly -efficient for multiple inserts, so batch if you can. +more efficient for multiple inserts, so batch if you can. The FoundationDB keys are currently random UUID's. -- cgit v1.2.1 From 2e6cfa26034c3638a9acc8c49d9038e097691ab8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 29 Jun 2020 10:59:13 +0100 Subject: Add incremental reduce on non-leaf nodes. --- README.md | 7 ++- src/ebtree.erl | 161 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 140 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 9f9b9e2ba..2821f9043 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ TODO 1. Rewrite inner node ids (non-root, non-leaf) so we can safely cache them outside of a transaction. -2. Store reduction values on inner nodes. -3. Chunkify large values over multiple rows? -4. Sequential node ids? -5. encode values in a non-erlang way? +2. Chunkify large values over multiple rows? +3. Sequential node ids? +4. encode values in a non-erlang way? diff --git a/src/ebtree.erl b/src/ebtree.erl index aa59b26b7..62911a33f 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -3,26 +3,36 @@ -export([ init/3, open/2, + open/3, insert/4, delete/3, lookup/3, range/6, reverse_range/6, + reduce/2, validate_tree/2 ]). +%% built-in reduce functions +-export([ + reduce_sum/2, + reduce_count/2, + reduce_stats/2 +]). + -record(node, { id, level = 0, prev, next, - members = [] %% [{Key0, Value0 | Pointer0}, {Key1, Value1 | Pointer1}, ...] + members = [] %% [{Key0, Value0} | {Key0, Pointer0, Reduction0}, ...] }). -record(tree, { prefix, min, - max + max, + reduce_fun }). -define(META, 0). @@ -40,15 +50,18 @@ init(Db, Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Or erlfdb:transactional(Db, fun(Tx) -> erlfdb:clear_range_startswith(Tx, Prefix), set_meta(Tx, Prefix, ?META_ORDER, Order), - set_node(Tx, to_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}), + set_node(Tx, to_tree(Prefix, Order, undefined), #node{id = ?NODE_ROOT_ID}), ok end). open(Db, Prefix) -> + open(Db, Prefix, fun reduce_noop/2). + +open(Db, Prefix, ReduceFun) when is_function(ReduceFun, 2) -> erlfdb:transactional(Db, fun(Tx) -> Order = get_meta(Tx, Prefix, ?META_ORDER), - to_tree(Prefix, Order) + to_tree(Prefix, Order, ReduceFun) end). @@ -66,6 +79,15 @@ lookup(Tx, #tree{} = Tree, #node{} = Node, Key) -> ChildId = find_child_id(Node, Key), lookup(Tx, Tree, get_node_wait(Tx, Tree, ChildId), Key). +%% reduce lookup + +reduce(Db, #tree{} = Tree) -> + erlfdb:transactional(Db, fun(Tx) -> + Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + reduce_node(Tree, Root) + end). + + %% range (inclusive of both ends) range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> @@ -161,11 +183,14 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> LastLeftKey = last_key(LeftMembers), LastRightKey = last_key(RightMembers), + %% adjust parent reductions + LeftReduction = reduce_node(Tree, LeftChild), + RightReduction = reduce_node(Tree, RightChild), + Parent1 = Parent0#node{ - members = lists:keymerge(1, [{LastLeftKey, LeftId}, {LastRightKey, RightId}], + members = lists:keymerge(1, [{LastLeftKey, LeftId, LeftReduction}, {LastRightKey, RightId, RightReduction}], lists:keydelete(Child#node.id, 2, Parent0#node.members)) }, - clear_node(Tx, Tree, Child), set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), Parent1. @@ -191,7 +216,8 @@ insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> Node1 = Node0#node{ members = lists:ukeymerge(1, [{Key, Value}], Node0#node.members) }, - set_node(Tx, Tree, Node1); + set_node(Tx, Tree, Node1), + reduce_node(Tree, Node1); insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> ChildId0 = find_child_id(Node0, Key), @@ -204,12 +230,15 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> end, ChildId1 = find_child_id(Node1, Key), Child1 = get_node_wait(Tx, Tree, ChildId1), - insert_nonfull(Tx, Tree, Child1, Key, Value), - {CurrentKey, ChildId1} = lists:keyfind(ChildId1, 2, Node1#node.members), + NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), + {CurrentKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 2, Node1#node.members), Node2 = Node1#node{ - members = lists:keyreplace(ChildId1, 2, Node1#node.members, {max(Key, CurrentKey), ChildId1}) + members = lists:keyreplace(ChildId1, 2, Node1#node.members, + {erlang:max(Key, CurrentKey), ChildId1, NewReduction}) }, - set_node(Tx, Tree, Node2). + set_node(Tx, Tree, Node2), + reduce_node(Tree, Node2). + %% delete @@ -219,7 +248,7 @@ delete(Db, #tree{} = Tree, Key) -> case delete(Tx, Tree, Root0, Key) of % if only one child, make it the new root. #node{level = L, members = [_]} = Root1 when L > 0 -> - [{_, ChildId}] = Root1#node.members, + [{_, ChildId, _}] = Root1#node.members, Root2 = get_node_wait(Tx, Tree, ChildId), clear_node(Tx, Tree, Root2), set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); @@ -256,13 +285,14 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> [Left, Right] end, - %% remove old nodes and insert new nodes + %% remove old members and insert new members Members0 = Parent0#node.members, Members1 = lists:keydelete(ChildId0, 2, Members0), Members2 = lists:keydelete(Sibling#node.id, 2, Members1), Members3 = lists:foldl(fun(N, Acc) -> - lists:merge([{last_key(N), N#node.id}], Acc) + lists:merge([{last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) end, Members2, NewNodes), + Parent1 = Parent0#node{ %% TODO change id members = Members3 @@ -273,7 +303,11 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Parent1; false -> set_node(Tx, Tree, Child1), - Parent0 + {ChildKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 2, Parent0#node.members), + Parent0#node{ + members = lists:keyreplace(ChildId0, 1, Parent0#node.members, + {ChildKey, Child1#node.id, reduce_node(Tree, Child1)}) + } end. @@ -322,10 +356,10 @@ find_value(#node{level = 0} = Node, Key) -> find_child_id(#node{level = L} = Node, Key) when L > 0 -> find_child_id_int(Node#node.members, Key). -find_child_id_int([{K, V}], Key) when Key > K -> +find_child_id_int([{K, V, _R}], Key) when Key > K -> V; -find_child_id_int([{K, V} | _Rest], Key) when Key =< K -> +find_child_id_int([{K, V, _R} | _Rest], Key) when Key =< K -> V; find_child_id_int([_ | Rest], Key) -> @@ -416,8 +450,8 @@ validate_tree(Tx, #tree{} = Tree, #node{} = Node) -> validate_tree(_Tx, #tree{} = _Tree, []) -> ok; -validate_tree(Tx, #tree{} = Tree, [{_, NodeId} | Rest]) -> - Node = get_node_wait(Tx, Tree, NodeId), +validate_tree(Tx, #tree{} = Tree, [NodeTuple | Rest]) -> + Node = get_node_wait(Tx, Tree, element(2, NodeTuple)), validate_tree(Tx, Tree, Node), validate_tree(Tx, Tree, Rest). @@ -469,13 +503,70 @@ encode_value(Value) -> decode_value(Bin) when is_binary(Bin) -> binary_to_term(Bin, [safe]). + +%% built-in reduce functions. + +reduce_noop(_KVs, _Rereduce) -> + []. + + +reduce_sum(KVs, false) -> + {_, Vs} = lists:unzip(KVs), + lists:sum(Vs); + +reduce_sum(Rs, true) -> + lists:sum(Rs). + + +reduce_count(KVs, false) -> + length(KVs); + +reduce_count(Rs, true) -> + lists:sum(Rs). + + +reduce_stats(KVs, false) -> + {_, Vs} = lists:unzip(KVs), + { + lists:sum(Vs), + lists:min(Vs), + lists:max(Vs), + length(Vs), + lists:sum([V * V || V <- Vs]) + }; + +reduce_stats(Rs, true) -> + lists:foldl( + fun({Sum, Min, Max, Count, SumSqr}, + {SumAcc, MinAcc, MaxAcc, CountAcc, SumSqrAcc}) -> + { + Sum + SumAcc, + erlang:min(Min, MinAcc), + erlang:max(Max, MaxAcc), + Count + CountAcc, + SumSqr + SumSqrAcc + } end, hd(Rs), tl(Rs)). + + +reduce_node(#tree{} = Tree, #node{level = 0} = Node) -> + #tree{reduce_fun = ReduceFun} = Tree, + ReduceFun(Node#node.members, false); + +reduce_node(#tree{} = Tree, #node{} = Node) -> + #tree{reduce_fun = ReduceFun} = Tree, + Rs = [R || {_K, _V, R} <- Node#node.members], + ReduceFun(Rs, true). + + %% private functions -to_tree(Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> +to_tree(Prefix, Order, ReduceFun) + when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> #tree{ prefix = Prefix, min = Order div 2, - max = Order + max = Order, + reduce_fun = ReduceFun }. @@ -500,12 +591,13 @@ remove_pointers_if_not_leaf(#node{} = Node) -> print_node(#node{level = 0} = Node) -> io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~w}~n~n", - [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), Node#node.members]); + [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), + Node#node.members]); print_node(#node{} = Node) -> io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n", [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), - [io_lib:format("{~w, ~s} ", [K, b64(V)]) || {K, V} <- Node#node.members]]). + [io_lib:format("{~w, ~s, ~w}, ", [K, b64(V), R]) || {K, V, R} <- Node#node.members]]). b64(undefined) -> @@ -551,6 +643,27 @@ range_after_delete_test() -> ?assertEqual(50, ?MODULE:reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). +reduce_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + Tree = ?MODULE:open(Db, <<1,2,3>>, fun reduce_sum/2), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), ?MODULE:reduce(Db, Tree)). + + +reduce_after_delete_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + Tree = ?MODULE:open(Db, <<1,2,3>>, fun reduce_sum/2), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), ?MODULE:reduce(Db, Tree)), + lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, Keys), + ?assertEqual(0, ?MODULE:reduce(Db, Tree)). + intense_lookup_test_() -> [ {timeout, 1000, fun() -> lookup_test_fun(1000, 20) end}, @@ -607,6 +720,6 @@ reverse_range_test_() -> sec(Native) -> - max(1, erlang:convert_time_unit(Native, native, second)). + erlang:max(1, erlang:convert_time_unit(Native, native, second)). -endif. -- cgit v1.2.1 From eaf6e744bf286cdca8b07ea63303dd3920bcff2a Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 29 Jun 2020 18:02:39 +0200 Subject: Port view_update_seq.js into elixir --- test/elixir/lib/couch/db_test.ex | 2 +- test/elixir/test/view_update_seq_test.exs | 142 ++++++++++++++++++++++++++++++ test/javascript/tests/view_update_seq.js | 1 + 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/view_update_seq_test.exs diff --git a/test/elixir/lib/couch/db_test.ex b/test/elixir/lib/couch/db_test.ex index a61db1424..23f10937d 100644 --- a/test/elixir/lib/couch/db_test.ex +++ b/test/elixir/lib/couch/db_test.ex @@ -341,7 +341,7 @@ defmodule Couch.DBTest do Couch.get("/#{db_name}/_design/#{view_root}/_view/#{view_name}", query: options) _ -> - Couch.post("/#{db_name}/_design/#{view_root}/_view/#{view_name}", + Couch.post("/#{db_name}/_design/#{view_root}/_view/#{view_name}", query: options, body: %{"keys" => keys} ) end diff --git a/test/elixir/test/view_update_seq_test.exs b/test/elixir/test/view_update_seq_test.exs new file mode 100644 index 000000000..38b42c7a7 --- /dev/null +++ b/test/elixir/test/view_update_seq_test.exs @@ -0,0 +1,142 @@ +defmodule ViewUpdateSeqTest do + use CouchTestCase + + @moduletag :view_update_seq + + @moduledoc """ + This is a port of the view_update_seq.js test suite. + """ + + @design_doc %{ + _id: "_design/test", + language: "javascript", + autoupdate: false, + views: %{ + all_docs: %{ + map: "function(doc) { emit(doc.integer, doc.string) }" + }, + summate: %{ + map: + "function (doc) { if (typeof doc.integer === 'number') { emit(doc.integer, doc.integer)}; }", + reduce: "function (keys, values) { return sum(values); };" + } + } + } + + defp seq_int(seq) do + {int, _} = + seq + |> String.split("-") + |> Enum.at(0) + |> Integer.parse() + + int + end + + @tag :with_db + test "db info update seq", context do + db_name = context[:db_name] + + info = info(db_name) + assert seq_int(info["update_seq"]) == 0 + + create_doc(db_name, @design_doc) + + info = info(db_name) + assert seq_int(info["update_seq"]) == 1 + end + + @tag :with_db + test "_all_docs update seq", context do + db_name = context[:db_name] + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:update_seq => true}) + assert seq_int(resp.body["update_seq"]) == 0 + + create_doc(db_name, @design_doc) + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:update_seq => true}) + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 1 + + docs = make_docs(0..99) + bulk_save(db_name, docs) + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:limit => 1}) + assert length(resp.body["rows"]) == 1 + assert Map.has_key?(resp.body, "update_seq") == false + + resp = Couch.get("/#{db_name}/_all_docs", query: %{:limit => 1, :update_seq => true}) + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 101 + end + + @tag :with_db + test "view update seq", context do + db_name = context[:db_name] + + create_doc(db_name, @design_doc) + docs = make_docs(0..99) + bulk_save(db_name, docs) + + resp = view(db_name, "test/all_docs", %{:limit => 1, :update_seq => true}) + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 101 + + resp = view(db_name, "test/all_docs", %{:limit => 1, :update_seq => false}) + assert length(resp.body["rows"]) == 1 + assert Map.has_key?(resp.body, "update_seq") == false + + resp = view(db_name, "test/summate", %{:update_seq => true}) + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 101 + + save(db_name, %{"_id" => "A", "integer" => 1}) + + resp = + view(db_name, "test/all_docs", %{:limit => 1, :stale => "ok", :update_seq => true}) + + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 101 + + save(db_name, %{"_id" => "AA", "integer" => 2}) + + resp = + view(db_name, "test/all_docs", %{ + :limit => 1, + :stale => "update_after", + :update_seq => true + }) + + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 101 + + retry_until(fn -> + resp = + view(db_name, "test/all_docs", %{:limit => 1, :stale => "ok", :update_seq => true}) + + assert length(resp.body["rows"]) == 1 + seq_int(resp.body["update_seq"]) == 103 + end) + + resp = + view(db_name, "test/all_docs", %{:limit => 1, :stale => "ok", :update_seq => true}) + + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 103 + + resp = view(db_name, "test/all_docs", %{:limit => 1, :update_seq => true}) + + assert length(resp.body["rows"]) == 1 + assert seq_int(resp.body["update_seq"]) == 103 + + resp = view(db_name, "test/all_docs", %{:update_seq => true}, ["0", "1"]) + assert seq_int(resp.body["update_seq"]) == 103 + + resp = view(db_name, "test/all_docs", %{:update_seq => true}, ["0", "1"]) + assert seq_int(resp.body["update_seq"]) == 103 + + resp = view(db_name, "test/summate", %{:group => true, :update_seq => true}, [0, 1]) + assert seq_int(resp.body["update_seq"]) == 103 + end +end diff --git a/test/javascript/tests/view_update_seq.js b/test/javascript/tests/view_update_seq.js index c14453f05..8b3a3fb84 100644 --- a/test/javascript/tests/view_update_seq.js +++ b/test/javascript/tests/view_update_seq.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_update_seq = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From b6446c38b1b876b3f162172e752682dbfb0ec5df Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 30 Jun 2020 12:59:13 +0100 Subject: Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a2335b7f1..a2ddbb0db 100644 --- a/README.md +++ b/README.md @@ -15,3 +15,4 @@ TODO 2. Chunkify large values over multiple rows? 3. Sequential node ids? 4. encode values in a non-erlang way? +5. custom ordering (couch_ejson_compare:less_json_ids etc) -- cgit v1.2.1 From cbf60dc83c22a04d450b32feda6e361b83f13997 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 30 Jun 2020 14:01:01 +0100 Subject: fix bug in reduce handling during delete --- src/ebtree.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 62911a33f..1344e88fb 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -305,7 +305,7 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> set_node(Tx, Tree, Child1), {ChildKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 2, Parent0#node.members), Parent0#node{ - members = lists:keyreplace(ChildId0, 1, Parent0#node.members, + members = lists:keyreplace(ChildId0, 2, Parent0#node.members, {ChildKey, Child1#node.id, reduce_node(Tree, Child1)}) } end. -- cgit v1.2.1 From 4b726ddce39b93bca86cd54ad5931627c7f55e39 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 30 Jun 2020 15:04:43 +0100 Subject: add gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..04f4f25d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.erlfdb/ +_build/ +rebar.lock -- cgit v1.2.1 From 75bab7c58b9651db6fb262d933c5a601b0f1a0dc Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 30 Jun 2020 17:21:05 +0100 Subject: Pluggable collation --- README.md | 1 - src/ebtree.erl | 128 ++++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 96 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index a2ddbb0db..a2335b7f1 100644 --- a/README.md +++ b/README.md @@ -15,4 +15,3 @@ TODO 2. Chunkify large values over multiple rows? 3. Sequential node ids? 4. encode values in a non-erlang way? -5. custom ordering (couch_ejson_compare:less_json_ids etc) diff --git a/src/ebtree.erl b/src/ebtree.erl index 1344e88fb..ceb34e751 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -32,6 +32,7 @@ prefix, min, max, + collate_fun, reduce_fun }). @@ -50,18 +51,25 @@ init(Db, Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Or erlfdb:transactional(Db, fun(Tx) -> erlfdb:clear_range_startswith(Tx, Prefix), set_meta(Tx, Prefix, ?META_ORDER, Order), - set_node(Tx, to_tree(Prefix, Order, undefined), #node{id = ?NODE_ROOT_ID}), + set_node(Tx, init_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}), ok end). open(Db, Prefix) -> - open(Db, Prefix, fun reduce_noop/2). + open(Db, Prefix, []). -open(Db, Prefix, ReduceFun) when is_function(ReduceFun, 2) -> - erlfdb:transactional(Db, fun(Tx) -> +open(Db, Prefix, Options) -> + ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), + CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), + + erlfdb:transactional(Db, fun(Tx) -> Order = get_meta(Tx, Prefix, ?META_ORDER), - to_tree(Prefix, Order, ReduceFun) + Tree = init_tree(Prefix, Order), + Tree#tree{ + reduce_fun = ReduceFun, + collate_fun = CollateFun + } end). @@ -76,7 +84,7 @@ lookup(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> find_value(Node, Key); lookup(Tx, #tree{} = Tree, #node{} = Node, Key) -> - ChildId = find_child_id(Node, Key), + ChildId = find_child_id(Tree, Node, Key), lookup(Tx, Tree, get_node_wait(Tx, Tree, ChildId), Key). %% reduce lookup @@ -107,7 +115,7 @@ range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) end; range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> - ChildId = find_child_id(Node, StartKey), + ChildId = find_child_id(Tree, Node, StartKey), range(Tx, Tree, get_node_wait(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). %% reverse range (inclusive of both ends) @@ -129,7 +137,7 @@ reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun end; reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> - ChildId = find_child_id(Node, EndKey), + ChildId = find_child_id(Tree, Node, EndKey), reverse_range(Tx, Tree, get_node_wait(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). @@ -188,8 +196,10 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> RightReduction = reduce_node(Tree, RightChild), Parent1 = Parent0#node{ - members = lists:keymerge(1, [{LastLeftKey, LeftId, LeftReduction}, {LastRightKey, RightId, RightReduction}], - lists:keydelete(Child#node.id, 2, Parent0#node.members)) + members = + merge(Tree, [{LastLeftKey, LeftId, LeftReduction}], + merge(Tree, [{LastRightKey, RightId, RightReduction}], + lists:keydelete(Child#node.id, 2, Parent0#node.members))) }, clear_node(Tx, Tree, Child), set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), @@ -214,13 +224,13 @@ update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> Node1 = Node0#node{ - members = lists:ukeymerge(1, [{Key, Value}], Node0#node.members) + members = merge(Tree, [{Key, Value}], Node0#node.members) }, set_node(Tx, Tree, Node1), reduce_node(Tree, Node1); insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> - ChildId0 = find_child_id(Node0, Key), + ChildId0 = find_child_id(Tree, Node0, Key), Child0 = get_node_wait(Tx, Tree, ChildId0), Node1 = case ?is_full(Tree, Child0) of true -> @@ -228,13 +238,14 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> false -> Node0 end, - ChildId1 = find_child_id(Node1, Key), + ChildId1 = find_child_id(Tree, Node1, Key), Child1 = get_node_wait(Tx, Tree, ChildId1), NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), {CurrentKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 2, Node1#node.members), + [_, NewKey] = sort(Tree, [Key, CurrentKey]), Node2 = Node1#node{ members = lists:keyreplace(ChildId1, 2, Node1#node.members, - {erlang:max(Key, CurrentKey), ChildId1, NewReduction}) + {NewKey, ChildId1, NewReduction}) }, set_node(Tx, Tree, Node2), reduce_node(Tree, Node2). @@ -265,12 +276,12 @@ delete(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> }; delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> - ChildId0 = find_child_id(Parent0, Key), + ChildId0 = find_child_id(Tree, Parent0, Key), Child0 = get_node_wait(Tx, Tree, ChildId0), Child1 = delete(Tx, Tree, Child0, Key), case ?underflow(Tree, Child1) of true -> - SiblingId = find_sibling_id(Parent0, ChildId0, Key), + SiblingId = find_sibling_id(Tree, Parent0, ChildId0, Key), Sibling = get_node_wait(Tx, Tree, SiblingId), NewNodes = case ?at_min(Tree, Sibling) of true -> @@ -290,7 +301,7 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Members1 = lists:keydelete(ChildId0, 2, Members0), Members2 = lists:keydelete(Sibling#node.id, 2, Members1), Members3 = lists:foldl(fun(N, Acc) -> - lists:merge([{last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) + merge(Tree, [{last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) end, Members2, NewNodes), Parent1 = Parent0#node{ @@ -353,22 +364,25 @@ find_value(#node{level = 0} = Node, Key) -> lists:keyfind(Key, 1, Node#node.members). -find_child_id(#node{level = L} = Node, Key) when L > 0 -> - find_child_id_int(Node#node.members, Key). - -find_child_id_int([{K, V, _R}], Key) when Key > K -> - V; +find_child_id(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> + find_child_id_int(Tree, Node#node.members, Key). -find_child_id_int([{K, V, _R} | _Rest], Key) when Key =< K -> +find_child_id_int(#tree{} = _Tree, [{_K, V, _R}], _Key) -> V; -find_child_id_int([_ | Rest], Key) -> - find_child_id_int(Rest, Key). +find_child_id_int(#tree{} = Tree, [{K, V, _R} | Rest], Key) -> + #tree{collate_fun = CollateFun} = Tree, + case CollateFun(Key, K) of + true -> + V; + false -> + find_child_id_int(Tree, Rest, Key) + end. -find_sibling_id(#node{level = L} = Node0, Id, Key) when L > 0 -> +find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> Node1 = Node0#node{members = lists:keydelete(Id, 2, Node0#node.members)}, - find_child_id(Node1, Key). + find_child_id(Tree, Node1, Key). %% metadata functions @@ -558,15 +572,40 @@ reduce_node(#tree{} = Tree, #node{} = Node) -> ReduceFun(Rs, true). +%% collation functions + +merge(#tree{} = Tree, List1, List2) -> + #tree{collate_fun = CollateFun} = Tree, + lists:merge(collation_wrapper_fun(CollateFun), List1, List2). + +sort(#tree{} = Tree, List) -> + #tree{collate_fun = CollateFun} = Tree, + lists:sort(collation_wrapper_fun(CollateFun), List). + + +collation_wrapper_fun(CollateFun) -> + fun + ({K1, _V1}, {K2, _V2}) -> + CollateFun(K1, K2); + ({K1, _V1, _R1}, {K2, _V2, _R2}) -> + CollateFun(K1, K2); + (K1, K2) -> + CollateFun(K1, K2) + end. + + +collate_raw(K1, K2) -> + K1 =< K2. + + %% private functions -to_tree(Prefix, Order, ReduceFun) +init_tree(Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> #tree{ prefix = Prefix, min = Order div 2, - max = Order, - reduce_fun = ReduceFun + max = Order }. @@ -646,7 +685,7 @@ range_after_delete_test() -> reduce_test() -> Db = erlfdb_util:get_test_db([empty]), ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>, fun reduce_sum/2), + Tree = ?MODULE:open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key) end, Keys), @@ -656,7 +695,7 @@ reduce_test() -> reduce_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>, fun reduce_sum/2), + Tree = ?MODULE:open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key) end, Keys), @@ -664,6 +703,31 @@ reduce_after_delete_test() -> lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, Keys), ?assertEqual(0, ?MODULE:reduce(Db, Tree)). + +raw_collation_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + Tree = ?MODULE:open(Db, <<1,2,3>>), + ?MODULE:insert(Db, Tree, null, null), + ?MODULE:insert(Db, Tree, 1, 1), + ?assertEqual([{1, 1}, {null, null}], ?MODULE:range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). + + +custom_collation_test() -> + Db = erlfdb_util:get_test_db([empty]), + ?MODULE:init(Db, <<1,2,3>>, 4), + CollateFun = fun + (null, 1) -> + true; + (1, null) -> + false + end, + Tree = ?MODULE:open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + ?MODULE:insert(Db, Tree, null, null), + ?MODULE:insert(Db, Tree, 1, 1), + ?assertEqual([{null, null}, {1, 1}], ?MODULE:range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). + + intense_lookup_test_() -> [ {timeout, 1000, fun() -> lookup_test_fun(1000, 20) end}, -- cgit v1.2.1 From 33b610efdbcd72ca16e87563c7dec0fa2a218b04 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 30 Jun 2020 23:09:08 +0100 Subject: nah --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2335b7f1..4d8c4ce30 100644 --- a/README.md +++ b/README.md @@ -14,4 +14,4 @@ TODO them outside of a transaction. 2. Chunkify large values over multiple rows? 3. Sequential node ids? -4. encode values in a non-erlang way? + -- cgit v1.2.1 From 692d7473e267d0e9f36af2b2d34d710b2b68dfa8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Jul 2020 11:42:34 +0100 Subject: remove ?MODULE: nonsense --- src/ebtree.erl | 92 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index ceb34e751..301c3fd17 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -652,80 +652,80 @@ b64(Bin) -> lookup_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], - lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key + 1) end, Keys), - lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys). + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys). delete_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], - lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key + 1) end, Keys), - lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys), - lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, Keys), - lists:foreach(fun(Key) -> ?assertEqual(false, ?MODULE:lookup(Db, Tree, Key)) end, Keys). + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual(false, lookup(Db, Tree, Key)) end, Keys). range_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], - lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key + 1) end, Keys), - lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys), - lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, lists:seq(1, 100, 2)), - ?assertEqual(50, ?MODULE:range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)), - ?assertEqual(50, ?MODULE:reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, lists:seq(1, 100, 2)), + ?assertEqual(50, range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)), + ?assertEqual(50, reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). reduce_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key) end, Keys), - ?assertEqual(round(Max * ((1 + Max) / 2)), ?MODULE:reduce(Db, Tree)). + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), reduce(Db, Tree)). reduce_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - lists:foreach(fun(Key) -> ?MODULE:insert(Db, Tree, Key, Key) end, Keys), - ?assertEqual(round(Max * ((1 + Max) / 2)), ?MODULE:reduce(Db, Tree)), - lists:foreach(fun(Key) -> ?MODULE:delete(Db, Tree, Key) end, Keys), - ?assertEqual(0, ?MODULE:reduce(Db, Tree)). + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), reduce(Db, Tree)), + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), + ?assertEqual(0, reduce(Db, Tree)). raw_collation_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), - Tree = ?MODULE:open(Db, <<1,2,3>>), - ?MODULE:insert(Db, Tree, null, null), - ?MODULE:insert(Db, Tree, 1, 1), - ?assertEqual([{1, 1}, {null, null}], ?MODULE:range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>), + insert(Db, Tree, null, null), + insert(Db, Tree, 1, 1), + ?assertEqual([{1, 1}, {null, null}], range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). custom_collation_test() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 4), + init(Db, <<1,2,3>>, 4), CollateFun = fun (null, 1) -> true; (1, null) -> false end, - Tree = ?MODULE:open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), - ?MODULE:insert(Db, Tree, null, null), - ?MODULE:insert(Db, Tree, 1, 1), - ?assertEqual([{null, null}, {1, 1}], ?MODULE:range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). + Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + insert(Db, Tree, null, null), + insert(Db, Tree, 1, 1), + ?assertEqual([{null, null}, {1, 1}], range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). intense_lookup_test_() -> @@ -738,12 +738,12 @@ intense_lookup_test_() -> lookup_test_fun(Max, Order) -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, Order), + init(Db, <<1,2,3>>, Order), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max, 2)])], T0 = erlang:monotonic_time(), - Tree = lists:foldl(fun(Key, T) -> ?MODULE:insert(Db, T, Key, Key + 1) end, ?MODULE:open(Db, <<1,2,3>>), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>), Keys), T1 = erlang:monotonic_time(), - lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, ?MODULE:lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), T2 = erlang:monotonic_time(), ?debugFmt("~B order. ~B iterations. insert rate: ~.2f/s, lookup rate: ~.2f/s", [Order, Max, Max / sec(T1 - T0), Max / sec(T2 - T1)]). @@ -752,15 +752,15 @@ lookup_test_fun(Max, Order) -> range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 10), + init(Db, <<1,2,3>>, 10), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - Tree = lists:foldl(fun(Key, T) -> ?MODULE:insert(Db, T, Key, Key + 1) end, ?MODULE:open(Db, <<1,2,3>>), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>), Keys), lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), ?assertEqual(EndKey - StartKey + 1, - ?MODULE:range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) + range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) ) end, lists:seq(1, 1000)) end}. @@ -769,15 +769,15 @@ range_test_() -> reverse_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - ?MODULE:init(Db, <<1,2,3>>, 10), + init(Db, <<1,2,3>>, 10), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - Tree = lists:foldl(fun(Key, T) -> ?MODULE:insert(Db, T, Key, Key + 1) end, ?MODULE:open(Db, <<1,2,3>>), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>), Keys), lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), ?assertEqual(EndKey - StartKey + 1, - ?MODULE:reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) + reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) ) end, lists:seq(1, 1000)) end}. -- cgit v1.2.1 From 16e7a442f22cdea863526088d911dbd442d91ba3 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Jul 2020 11:38:25 +0100 Subject: improve range tests --- src/ebtree.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 301c3fd17..9ec4f2517 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -759,8 +759,8 @@ range_test_() -> lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), - ?assertEqual(EndKey - StartKey + 1, - range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) + ?assertEqual([{K, K + 1} || K <- lists:seq(StartKey, EndKey)], + range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) ) end, lists:seq(1, 1000)) end}. @@ -776,8 +776,8 @@ reverse_range_test_() -> lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), - ?assertEqual(EndKey - StartKey + 1, - reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> length(E) + A end, 0) + ?assertEqual([{K, K + 1} || K <- lists:seq(EndKey, StartKey, -1)], + reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) ) end, lists:seq(1, 1000)) end}. -- cgit v1.2.1 From 88371b5aed63e5b3e02f2e5b1b6d17cb049aefae Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Jul 2020 12:50:44 +0100 Subject: fix range when using non-standard collation --- src/ebtree.erl | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 9ec4f2517..44907a749 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -104,10 +104,11 @@ range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> end). range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> - InRange = [{K, V} || {K, V} <- Node#node.members, K >= StartKey, K =< EndKey], + InRange = [{K, V} || {K, V} <- Node#node.members, + less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], Acc1 = Fun(InRange, Acc0), LastKey = last_key(Node), - case Node#node.next /= undefined andalso EndKey >= LastKey of + case Node#node.next /= undefined andalso less_than_or_equal(Tree, LastKey, EndKey) of true -> range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.next), StartKey, EndKey, Fun, Acc1); false -> @@ -574,6 +575,11 @@ reduce_node(#tree{} = Tree, #node{} = Node) -> %% collation functions +less_than_or_equal(#tree{} = Tree, A, B) -> + #tree{collate_fun = CollateFun} = Tree, + CollateFun(A, B). + + merge(#tree{} = Tree, List1, List2) -> #tree{collate_fun = CollateFun} = Tree, lists:merge(collation_wrapper_fun(CollateFun), List1, List2). @@ -716,16 +722,11 @@ raw_collation_test() -> custom_collation_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), - CollateFun = fun - (null, 1) -> - true; - (1, null) -> - false - end, + CollateFun = fun(A, B) -> B =< A end, Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), - insert(Db, Tree, null, null), insert(Db, Tree, 1, 1), - ?assertEqual([{null, null}, {1, 1}], range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). + insert(Db, Tree, 2, 2), + ?assertEqual([{2, 2}, {1, 1}], range(Db, Tree, 3, 0, fun(E, A) -> A ++ E end, [])). intense_lookup_test_() -> @@ -783,6 +784,32 @@ reverse_range_test_() -> end}. +custom_collation_range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + init(Db, <<1,2,3>>, 10), + Max = 1000, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + CollateFun = fun(A, B) -> B =< A end, + Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = sort(Tree, [rand:uniform(Max), rand:uniform(Max)]), + Seq = if + StartKey < EndKey -> + lists:seq(StartKey, EndKey); + true -> + lists:seq(StartKey, EndKey, -1) + end, + ?assertEqual([{K, K + 1} || K <- Seq], + range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) + ) end, + lists:seq(1, 1000)) + end}. + + + sec(Native) -> erlang:max(1, erlang:convert_time_unit(Native, native, second)). -- cgit v1.2.1 From b95cbb880a646283b03a71fda2f058621598a462 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Jul 2020 12:59:48 +0100 Subject: fix reverse_range when using non-standard collation --- src/ebtree.erl | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 44907a749..0734cbf2d 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -127,10 +127,11 @@ reverse_range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> end). reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> - InRange = [{K, V} || {K, V} <- Node#node.members, K >= StartKey, K =< EndKey], + InRange = [{K, V} || {K, V} <- Node#node.members, + less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], Acc1 = Fun(lists:reverse(InRange), Acc0), {FirstKey, _} = hd(Node#node.members), - case Node#node.prev /= undefined andalso StartKey =< FirstKey of + case Node#node.prev /= undefined andalso less_than_or_equal(Tree, StartKey, FirstKey) of true -> reverse_range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.prev), StartKey, EndKey, Fun, Acc1); false -> @@ -809,6 +810,30 @@ custom_collation_range_test_() -> end}. +custom_collation_reverse_range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + init(Db, <<1,2,3>>, 10), + Max = 1000, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + CollateFun = fun(A, B) -> B =< A end, + Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = sort(Tree, [rand:uniform(Max), rand:uniform(Max)]), + Seq = if + StartKey < EndKey -> + lists:seq(StartKey, EndKey); + true -> + lists:seq(StartKey, EndKey, -1) + end, + ?assertEqual([{K, K + 1} || K <- lists:reverse(Seq)], + reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) + ) end, + lists:seq(1, 1000)) + end}. + sec(Native) -> erlang:max(1, erlang:convert_time_unit(Native, native, second)). -- cgit v1.2.1 From 4e176c30a8bca2bcfa45408c5f87030d4b9f0ff2 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 3 Jul 2020 07:53:34 +0100 Subject: Accidentally introduced duplicates with custom collation --- src/ebtree.erl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 0734cbf2d..e4ac14cbc 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -199,8 +199,8 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> Parent1 = Parent0#node{ members = - merge(Tree, [{LastLeftKey, LeftId, LeftReduction}], - merge(Tree, [{LastRightKey, RightId, RightReduction}], + umerge(Tree, [{LastLeftKey, LeftId, LeftReduction}], + umerge(Tree, [{LastRightKey, RightId, RightReduction}], lists:keydelete(Child#node.id, 2, Parent0#node.members))) }, clear_node(Tx, Tree, Child), @@ -226,7 +226,7 @@ update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> Node1 = Node0#node{ - members = merge(Tree, [{Key, Value}], Node0#node.members) + members = umerge(Tree, [{Key, Value}], Node0#node.members) }, set_node(Tx, Tree, Node1), reduce_node(Tree, Node1); @@ -303,7 +303,7 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Members1 = lists:keydelete(ChildId0, 2, Members0), Members2 = lists:keydelete(Sibling#node.id, 2, Members1), Members3 = lists:foldl(fun(N, Acc) -> - merge(Tree, [{last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) + umerge(Tree, [{last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) end, Members2, NewNodes), Parent1 = Parent0#node{ @@ -475,6 +475,8 @@ validate_tree(Tx, #tree{} = Tree, [NodeTuple | Rest]) -> validate_node(#tree{} = Tree, #node{} = Node) -> NumKeys = length(Node#node.members), IsRoot = ?NODE_ROOT_ID == Node#node.id, + OutOfOrder = Node#node.members /= sort(Tree, Node#node.members), + Duplicates = Node#node.members /= usort(Tree, Node#node.members), if Node#node.id == undefined -> erlang:error({node_without_id, Node}); @@ -482,6 +484,10 @@ validate_node(#tree{} = Tree, #node{} = Node) -> erlang:error({too_few_keys, Node}); NumKeys > Tree#tree.max -> erlang:error({too_many_keys, Node}); + OutOfOrder -> + erlang:error({out_of_order, Node}); + Duplicates -> + erlang:error({duplicates, Node}); true -> ok end. @@ -581,15 +587,20 @@ less_than_or_equal(#tree{} = Tree, A, B) -> CollateFun(A, B). -merge(#tree{} = Tree, List1, List2) -> +umerge(#tree{} = Tree, List1, List2) -> #tree{collate_fun = CollateFun} = Tree, - lists:merge(collation_wrapper_fun(CollateFun), List1, List2). + lists:umerge(collation_wrapper_fun(CollateFun), List1, List2). + sort(#tree{} = Tree, List) -> #tree{collate_fun = CollateFun} = Tree, lists:sort(collation_wrapper_fun(CollateFun), List). +usort(#tree{} = Tree, List) -> + #tree{collate_fun = CollateFun} = Tree, + lists:usort(collation_wrapper_fun(CollateFun), List). + collation_wrapper_fun(CollateFun) -> fun ({K1, _V1}, {K2, _V2}) -> -- cgit v1.2.1 From fb378e59f174640eaa0e3adefd21fafe5a8613e5 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 3 Jul 2020 14:00:40 +0100 Subject: fix delete with custom collation --- src/ebtree.erl | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index e4ac14cbc..b4af12716 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -287,12 +287,12 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Sibling = get_node_wait(Tx, Tree, SiblingId), NewNodes = case ?at_min(Tree, Sibling) of true -> - Merged = merge(Child1, Sibling), + Merged = merge(Tree, Child1, Sibling), update_prev_neighbour(Tx, Tree, Merged), update_next_neighbour(Tx, Tree, Merged), [Merged]; false -> - {Left, Right} = rebalance(Child1, Sibling), + {Left, Right} = rebalance(Tree, Child1, Sibling), update_prev_neighbour(Tx, Tree, Left), update_next_neighbour(Tx, Tree, Right), [Left, Right] @@ -324,10 +324,9 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> end. -merge(#node{members = RightM} = Right, #node{members = LeftM} = Left) when RightM > LeftM -> - merge(Left, Right); +merge(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> + [Left, Right] = sort(Tree, [Node1, Node2]), -merge(#node{level = Level} = Left, #node{level = Level} = Right) -> #node{ id = new_node_id(), level = Level, @@ -337,10 +336,9 @@ merge(#node{level = Level} = Left, #node{level = Level} = Right) -> }. -rebalance(#node{members = RightM} = Right, #node{members = LeftM} = Left) when RightM > LeftM -> - rebalance(Left, Right); +rebalance(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> + [Left0, Right0] = sort(Tree, [Node1, Node2]), -rebalance(#node{level = Level} = Left0, #node{level = Level} = Right0) -> Members = lists:append(Left0#node.members, Right0#node.members), {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), @@ -603,6 +601,8 @@ usort(#tree{} = Tree, List) -> collation_wrapper_fun(CollateFun) -> fun + (#node{} = N1, #node{} = N2) -> + CollateFun(first_key(N1), first_key(N2)); ({K1, _V1}, {K2, _V2}) -> CollateFun(K1, K2); ({K1, _V1, _R1}, {K2, _V2, _R2}) -> @@ -627,6 +627,13 @@ init_tree(Prefix, Order) }. +first_key(#node{} = Node) -> + first_key(Node#node.members); + +first_key(Members) when is_list(Members) -> + element(1, hd(Members)). + + last_key(#node{} = Node) -> last_key(Node#node.members); -- cgit v1.2.1 From 9d691c282f0e6a1b57682ba9db85a15ab705e308 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 3 Jul 2020 07:22:34 +0100 Subject: remember firstkey of subtree --- src/ebtree.erl | 69 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index b4af12716..5114abc87 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -25,7 +25,7 @@ level = 0, prev, next, - members = [] %% [{Key0, Value0} | {Key0, Pointer0, Reduction0}, ...] + members = [] %% [{Key0, Value0} | {FirstKey0, LastKey0, Pointer0, Reduction0}, ...] }). -record(tree, { @@ -130,7 +130,7 @@ reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun InRange = [{K, V} || {K, V} <- Node#node.members, less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], Acc1 = Fun(lists:reverse(InRange), Acc0), - {FirstKey, _} = hd(Node#node.members), + FirstKey = first_key(Node), case Node#node.prev /= undefined andalso less_than_or_equal(Tree, StartKey, FirstKey) of true -> reverse_range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.prev), StartKey, EndKey, Fun, Acc1); @@ -151,11 +151,12 @@ insert(Db, #tree{} = Tree, Key, Value) -> case ?is_full(Tree, Root0) of true -> OldRoot = Root0#node{id = new_node_id()}, + FirstKey = first_key(OldRoot), LastKey = last_key(OldRoot), Root1 = #node{ id = ?NODE_ROOT_ID, level = Root0#node.level + 1, - members = [{LastKey, OldRoot#node.id}]}, + members = [{FirstKey, LastKey, OldRoot#node.id, []}]}, Root2 = split_child(Tx, Tree, Root1, OldRoot), insert_nonfull(Tx, Tree, Root2, Key, Value); false -> @@ -190,7 +191,9 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> update_next_neighbour(Tx, Tree, RightChild), %% adjust parent members + FirstLeftKey = first_key(LeftMembers), LastLeftKey = last_key(LeftMembers), + FirstRightKey = first_key(RightMembers), LastRightKey = last_key(RightMembers), %% adjust parent reductions @@ -199,9 +202,9 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> Parent1 = Parent0#node{ members = - umerge(Tree, [{LastLeftKey, LeftId, LeftReduction}], - umerge(Tree, [{LastRightKey, RightId, RightReduction}], - lists:keydelete(Child#node.id, 2, Parent0#node.members))) + umerge(Tree, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], + umerge(Tree, [{FirstRightKey, LastRightKey, RightId, RightReduction}], + lists:keydelete(Child#node.id, 3, Parent0#node.members))) }, clear_node(Tx, Tree, Child), set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), @@ -243,11 +246,12 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> ChildId1 = find_child_id(Tree, Node1, Key), Child1 = get_node_wait(Tx, Tree, ChildId1), NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), - {CurrentKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 2, Node1#node.members), - [_, NewKey] = sort(Tree, [Key, CurrentKey]), + {CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members), + [NewFirstKey, _] = sort(Tree, [Key, CurrentFirstKey]), + [_, NewLastKey] = sort(Tree, [Key, CurrentLastKey]), Node2 = Node1#node{ - members = lists:keyreplace(ChildId1, 2, Node1#node.members, - {NewKey, ChildId1, NewReduction}) + members = lists:keyreplace(ChildId1, 3, Node1#node.members, + {NewFirstKey, NewLastKey, ChildId1, NewReduction}) }, set_node(Tx, Tree, Node2), reduce_node(Tree, Node2). @@ -261,7 +265,7 @@ delete(Db, #tree{} = Tree, Key) -> case delete(Tx, Tree, Root0, Key) of % if only one child, make it the new root. #node{level = L, members = [_]} = Root1 when L > 0 -> - [{_, ChildId, _}] = Root1#node.members, + [{_, _, ChildId, _}] = Root1#node.members, Root2 = get_node_wait(Tx, Tree, ChildId), clear_node(Tx, Tree, Root2), set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); @@ -300,10 +304,10 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> %% remove old members and insert new members Members0 = Parent0#node.members, - Members1 = lists:keydelete(ChildId0, 2, Members0), - Members2 = lists:keydelete(Sibling#node.id, 2, Members1), + Members1 = lists:keydelete(ChildId0, 3, Members0), + Members2 = lists:keydelete(Sibling#node.id, 3, Members1), Members3 = lists:foldl(fun(N, Acc) -> - umerge(Tree, [{last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) + umerge(Tree, [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) end, Members2, NewNodes), Parent1 = Parent0#node{ @@ -316,10 +320,10 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Parent1; false -> set_node(Tx, Tree, Child1), - {ChildKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 2, Parent0#node.members), + {_OldFirstKey, _OldLastKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 3, Parent0#node.members), Parent0#node{ - members = lists:keyreplace(ChildId0, 2, Parent0#node.members, - {ChildKey, Child1#node.id, reduce_node(Tree, Child1)}) + members = lists:keyreplace(ChildId0, 3, Parent0#node.members, + {first_key(Child1), last_key(Child1), Child1#node.id, reduce_node(Tree, Child1)}) } end. @@ -367,21 +371,21 @@ find_value(#node{level = 0} = Node, Key) -> find_child_id(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> find_child_id_int(Tree, Node#node.members, Key). -find_child_id_int(#tree{} = _Tree, [{_K, V, _R}], _Key) -> - V; +find_child_id_int(#tree{} = _Tree, [{_F, _L, P, _R}], _Key) -> + P; -find_child_id_int(#tree{} = Tree, [{K, V, _R} | Rest], Key) -> +find_child_id_int(#tree{} = Tree, [{_F, L, P, _R} | Rest], Key) -> #tree{collate_fun = CollateFun} = Tree, - case CollateFun(Key, K) of + case CollateFun(Key, L) of true -> - V; + P; false -> find_child_id_int(Tree, Rest, Key) end. find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> - Node1 = Node0#node{members = lists:keydelete(Id, 2, Node0#node.members)}, + Node1 = Node0#node{members = lists:keydelete(Id, 3, Node0#node.members)}, find_child_id(Tree, Node1, Key). %% metadata functions @@ -464,8 +468,8 @@ validate_tree(Tx, #tree{} = Tree, #node{} = Node) -> validate_tree(_Tx, #tree{} = _Tree, []) -> ok; -validate_tree(Tx, #tree{} = Tree, [NodeTuple | Rest]) -> - Node = get_node_wait(Tx, Tree, element(2, NodeTuple)), +validate_tree(Tx, #tree{} = Tree, [{_F, _L, P, _R} | Rest]) -> + Node = get_node_wait(Tx, Tree, P), validate_tree(Tx, Tree, Node), validate_tree(Tx, Tree, Rest). @@ -574,7 +578,7 @@ reduce_node(#tree{} = Tree, #node{level = 0} = Node) -> reduce_node(#tree{} = Tree, #node{} = Node) -> #tree{reduce_fun = ReduceFun} = Tree, - Rs = [R || {_K, _V, R} <- Node#node.members], + Rs = [R || {_F, _L, _V, R} <- Node#node.members], ReduceFun(Rs, true). @@ -605,8 +609,8 @@ collation_wrapper_fun(CollateFun) -> CollateFun(first_key(N1), first_key(N2)); ({K1, _V1}, {K2, _V2}) -> CollateFun(K1, K2); - ({K1, _V1, _R1}, {K2, _V2, _R2}) -> - CollateFun(K1, K2); + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> + CollateFun(L1, L2); (K1, K2) -> CollateFun(K1, K2) end. @@ -638,7 +642,12 @@ last_key(#node{} = Node) -> last_key(Node#node.members); last_key(Members) when is_list(Members) -> - element(1, lists:last(Members)). + case lists:last(Members) of + {K, _V} -> + K; + {_F, L, _P, _R} -> + L + end. new_node_id() -> @@ -661,7 +670,7 @@ print_node(#node{level = 0} = Node) -> print_node(#node{} = Node) -> io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n", [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), - [io_lib:format("{~w, ~s, ~w}, ", [K, b64(V), R]) || {K, V, R} <- Node#node.members]]). + [io_lib:format("{~w, ~w, ~s, ~w}, ", [F, L, b64(V), R]) || {F, L, V, R} <- Node#node.members]]). b64(undefined) -> -- cgit v1.2.1 From 9054e749df529249842860c72aa97a3d44f0767e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Jul 2020 13:11:26 +0100 Subject: document existence of reduce functionality --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 4d8c4ce30..edcf80e73 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,9 @@ maintained between leaf nodes for efficient range queries in either direction. You can pass in an fdb Db or open Tx, the latter is vastly more efficient for multiple inserts, so batch if you can. +A reduction function can be specified, the B+Tree calculates and stores +intermediate reduction values on the inner nodes for performance. + The FoundationDB keys are currently random UUID's. TODO -- cgit v1.2.1 From e6b7a57f6d71f4f23621e9c84f13457976dcc401 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 1 Jul 2020 13:15:18 +0100 Subject: rename reduce to full_reduce to better represent its function --- src/ebtree.erl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 5114abc87..84884394f 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -9,7 +9,7 @@ lookup/3, range/6, reverse_range/6, - reduce/2, + full_reduce/2, validate_tree/2 ]). @@ -87,9 +87,9 @@ lookup(Tx, #tree{} = Tree, #node{} = Node, Key) -> ChildId = find_child_id(Tree, Node, Key), lookup(Tx, Tree, get_node_wait(Tx, Tree, ChildId), Key). -%% reduce lookup +%% full reduce -reduce(Db, #tree{} = Tree) -> +full_reduce(Db, #tree{} = Tree) -> erlfdb:transactional(Db, fun(Tx) -> Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), reduce_node(Tree, Root) @@ -716,26 +716,26 @@ range_after_delete_test() -> ?assertEqual(50, reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). -reduce_test() -> +full_reduce_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), - ?assertEqual(round(Max * ((1 + Max) / 2)), reduce(Db, Tree)). + ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)). -reduce_after_delete_test() -> +full_reduce_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), - ?assertEqual(round(Max * ((1 + Max) / 2)), reduce(Db, Tree)), + ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)), lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), - ?assertEqual(0, reduce(Db, Tree)). + ?assertEqual(0, full_reduce(Db, Tree)). raw_collation_test() -> -- cgit v1.2.1 From 0eedd8bcd4f7d2a86d5caded3a0bc0a963c24dc7 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Thu, 2 Jul 2020 21:06:24 +0200 Subject: fix: set gen_server:call() timeout to infinity on ioq bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before the bypass existed, ioq would call `gen_server:call()` on hehalf of it calling module with the queueing logic in between. Commit e641a740 introduced a way to bypass any queues, but the delegated `gen_server:call()` there was added without a timeout parameter, leading to a default timeout of 5000ms. A problem manifests here when operations that are sent through ioq that take longer than that 5000ms timeout. In practice, these operations should be very rare and this timeout should be a help on overloaded systems. However, one sure-fire way to cause an issue on an otherwise idle machine is raise the max_document_size and store unreasonably large documents, think 50MB+ of raw JSON). Not that we recommend this, but folks have run this fine on 2.x before the ioq changes and it isn’t too hard to support here. By adding an `infinity` timeout delegated `gen_server:call()` in the queue bypasse case, this no longer applies. Thanks to Joan @woahli Touzet, Bob @rnewson Newson and Paul @davisp Davis for helping to track this down. --- src/ioq/src/ioq.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ioq/src/ioq.erl b/src/ioq/src/ioq.erl index 81d94a36f..99b3ce385 100644 --- a/src/ioq/src/ioq.erl +++ b/src/ioq/src/ioq.erl @@ -45,7 +45,7 @@ call(Fd, Msg, Metadata) -> Priority = io_class(Msg, Metadata), case bypass(Priority) of true -> - gen_server:call(Fd, Msg); + gen_server:call(Fd, Msg, infinity); false -> queued_call(Fd, Msg, Priority) end. -- cgit v1.2.1 From 46d9c82661bf22f79b3a405cf571171ffe2b747f Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 2 Jul 2020 10:59:32 +0100 Subject: arbitrary reductions --- src/ebtree.erl | 109 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 15 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 84884394f..8dcc0464f 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -9,6 +9,7 @@ lookup/3, range/6, reverse_range/6, + reduce/4, full_reduce/2, validate_tree/2 ]). @@ -95,6 +96,51 @@ full_reduce(Db, #tree{} = Tree) -> reduce_node(Tree, Root) end). +%% arbitrary range reduce + +reduce(Db, #tree{} = Tree, StartKey, EndKey) -> + erlfdb:transactional(Db, fun(Tx) -> + Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + reduce(Db, Tree, Root, StartKey, EndKey) + end). + + +reduce(_Db, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey) -> + %% calculate the partial reduce for in-range members. + InRange = [{K, V} || {K, V} <- Node#node.members, + less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], + reduce_values(Tree, InRange, false); + +reduce(Db, #tree{} = Tree, #node{} = Node, StartKey, EndKey) -> + StartChildKey = find_child_key(Tree, Node, StartKey), + EndChildKey = find_child_key(Tree, Node, EndKey), + InRange = [{K, P, R} || {K, P, R} <- Node#node.members, + less_than_or_equal(Tree, StartChildKey, K), less_than_or_equal(Tree, K, EndChildKey)], + {_K, P, _R} = hd(InRange), + FirstNode = get_node_wait(Db, Tree, P), + FirstReduce = reduce(Db, Tree, FirstNode, StartKey, EndKey), + Reductions = reduce_inner(Db, Tree, StartKey, EndKey, tl(InRange), []), + reduce_values(Tree, [FirstReduce | Reductions], true). + + +reduce_inner(_Db, #tree{} = _Tree, _StartKey, _EndKey, [], Acc) -> + Acc; + +%% maybe descend into last item +reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{K, P, R}], Acc) -> + case less_than_or_equal(Tree, EndKey, K) of + true -> + Node = get_node_wait(Db, Tree, P), + [reduce(Db, Tree, Node, StartKey, EndKey) | Acc]; + false -> + [R | Acc] + end; + + +%% use the reduction for any fully-spanned node +reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{_K, _P, R} | Rest], Acc) -> + reduce_inner(Db, Tree, StartKey, EndKey, Rest, [R | Acc]). + %% range (inclusive of both ends) @@ -368,26 +414,38 @@ find_value(#node{level = 0} = Node, Key) -> lists:keyfind(Key, 1, Node#node.members). -find_child_id(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> - find_child_id_int(Tree, Node#node.members, Key). +find_child_id(#tree{} = Tree, #node{} = Node, Key) -> + {_F, _L, P, _R} = find_child(Tree, Node, Key), + P. + + +find_child_key(#tree{} = Tree, #node{} = Node, Key) -> + {_F, L, _P, _R} = find_child(Tree, Node, Key), + L. + + +find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> + Node1 = Node0#node{members = lists:keydelete(Id, 3, Node0#node.members)}, + find_child_id(Tree, Node1, Key). -find_child_id_int(#tree{} = _Tree, [{_F, _L, P, _R}], _Key) -> - P; -find_child_id_int(#tree{} = Tree, [{_F, L, P, _R} | Rest], Key) -> +find_child(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> + find_child_int(Tree, Node#node.members, Key). + + +find_child_int(#tree{} = _Tree, [Child], _Key) -> + Child; + +find_child_int(#tree{} = Tree, [{_F, L, P, _R} = Child| Rest], Key) -> #tree{collate_fun = CollateFun} = Tree, case CollateFun(Key, L) of true -> - P; + Child; false -> - find_child_id_int(Tree, Rest, Key) + find_child_int(Tree, Rest, Key) end. -find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> - Node1 = Node0#node{members = lists:keydelete(Id, 3, Node0#node.members)}, - find_child_id(Tree, Node1, Key). - %% metadata functions get_meta(Tx, #tree{} = Tree, MetaKey) -> @@ -573,13 +631,16 @@ reduce_stats(Rs, true) -> reduce_node(#tree{} = Tree, #node{level = 0} = Node) -> - #tree{reduce_fun = ReduceFun} = Tree, - ReduceFun(Node#node.members, false); + reduce_values(Tree, Node#node.members, false); reduce_node(#tree{} = Tree, #node{} = Node) -> + Rs = [R || {_F, _L, _P, R} <- Node#node.members], + reduce_values(Tree, Rs, true). + + +reduce_values(#tree{} = Tree, Values, Rereduce) when is_list(Values) -> #tree{reduce_fun = ReduceFun} = Tree, - Rs = [R || {_F, _L, _V, R} <- Node#node.members], - ReduceFun(Rs, true). + ReduceFun(Values, Rereduce). %% collation functions @@ -738,6 +799,24 @@ full_reduce_after_delete_test() -> ?assertEqual(0, full_reduce(Db, Tree)). +reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + Expected = fun(S, E) -> lists:sum(lists:seq(S, E)) end, + [ + ?_test(?assertEqual(Expected(1, 5), reduce(Db, Tree, 1, 5))), + ?_test(?assertEqual(Expected(50, 60), reduce(Db, Tree, 50, 60))), + ?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), + ?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), + ?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), + ?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))) + ]. + + raw_collation_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From 7f82e50a53ace172e6cb54ac36375ab7f6825f5c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 2 Jul 2020 19:26:34 +0100 Subject: add fold/4 --- src/ebtree.erl | 63 +++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 8dcc0464f..9d5e50351 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -9,6 +9,7 @@ lookup/3, range/6, reverse_range/6, + fold/4, reduce/4, full_reduce/2, validate_tree/2 @@ -96,7 +97,47 @@ full_reduce(Db, #tree{} = Tree) -> reduce_node(Tree, Root) end). -%% arbitrary range reduce +%% fold + +fold(Db, #tree{} = Tree, Fun, Acc) -> + {_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> + Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + fold(Db, Tree, Root, Fun, Acc) + end), + Reduce. + +fold(Db, #tree{} = Tree, #node{} = Node, Fun, Acc) -> + fold(Db, #tree{} = Tree, Node#node.members, Fun, Acc); + + +fold(_Db, #tree{} = _Tree, [], _Fun, Acc) -> + {ok, Acc}; + +fold(Db, #tree{} = Tree, [{K, V} | Rest], Fun, Acc0) -> + case Fun({visit, K, V}, Acc0) of + {ok, Acc1} -> + fold(Db, Tree, Rest, Fun, Acc1); + {stop, Acc1} -> + {stop, Acc1} + end; + +fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> + case Fun({traverse, F, L, R}, Acc0) of + {ok, Acc1} -> + Node = get_node_wait(Db, Tree, P), + case fold(Db, Tree, Node, Fun, Acc1) of + {ok, Acc2} -> + fold(Db, Tree, Rest, Fun, Acc2); + {stop, Acc2} -> + {stop, Acc2} + end; + {skip, Acc1} -> + fold(Db, Tree, Rest, Fun, Acc1); + {stop, Acc1} -> + {stop, Acc1} + end. + +%% reduce reduce(Db, #tree{} = Tree, StartKey, EndKey) -> erlfdb:transactional(Db, fun(Tx) -> @@ -114,9 +155,9 @@ reduce(_Db, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey) -> reduce(Db, #tree{} = Tree, #node{} = Node, StartKey, EndKey) -> StartChildKey = find_child_key(Tree, Node, StartKey), EndChildKey = find_child_key(Tree, Node, EndKey), - InRange = [{K, P, R} || {K, P, R} <- Node#node.members, - less_than_or_equal(Tree, StartChildKey, K), less_than_or_equal(Tree, K, EndChildKey)], - {_K, P, _R} = hd(InRange), + InRange = [{F, L, P, R} || {F, L, P, R} <- Node#node.members, + less_than_or_equal(Tree, StartChildKey, L), less_than_or_equal(Tree, L, EndChildKey)], + {_F, _L, P, _R} = hd(InRange), FirstNode = get_node_wait(Db, Tree, P), FirstReduce = reduce(Db, Tree, FirstNode, StartKey, EndKey), Reductions = reduce_inner(Db, Tree, StartKey, EndKey, tl(InRange), []), @@ -127,8 +168,8 @@ reduce_inner(_Db, #tree{} = _Tree, _StartKey, _EndKey, [], Acc) -> Acc; %% maybe descend into last item -reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{K, P, R}], Acc) -> - case less_than_or_equal(Tree, EndKey, K) of +reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{_F, L, P, R}], Acc) -> + case less_than_or_equal(Tree, EndKey, L) of true -> Node = get_node_wait(Db, Tree, P), [reduce(Db, Tree, Node, StartKey, EndKey) | Acc]; @@ -138,7 +179,7 @@ reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{K, P, R}], Acc) -> %% use the reduction for any fully-spanned node -reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{_K, _P, R} | Rest], Acc) -> +reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{_F, _L, _P, R} | Rest], Acc) -> reduce_inner(Db, Tree, StartKey, EndKey, Rest, [R | Acc]). @@ -415,13 +456,11 @@ find_value(#node{level = 0} = Node, Key) -> find_child_id(#tree{} = Tree, #node{} = Node, Key) -> - {_F, _L, P, _R} = find_child(Tree, Node, Key), - P. + element(3, find_child(Tree, Node, Key)). find_child_key(#tree{} = Tree, #node{} = Node, Key) -> - {_F, L, _P, _R} = find_child(Tree, Node, Key), - L. + element(2, find_child(Tree, Node, Key)). find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> @@ -436,7 +475,7 @@ find_child(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> find_child_int(#tree{} = _Tree, [Child], _Key) -> Child; -find_child_int(#tree{} = Tree, [{_F, L, P, _R} = Child| Rest], Key) -> +find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> #tree{collate_fun = CollateFun} = Tree, case CollateFun(Key, L) of true -> -- cgit v1.2.1 From 3828d8b10a6cbf58f1a158a0e6c038cf5eead443 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 3 Jul 2020 18:47:35 +0100 Subject: implement full_reduce using fold --- src/ebtree.erl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 9d5e50351..237addf6a 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -92,10 +92,14 @@ lookup(Tx, #tree{} = Tree, #node{} = Node, Key) -> %% full reduce full_reduce(Db, #tree{} = Tree) -> - erlfdb:transactional(Db, fun(Tx) -> - Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), - reduce_node(Tree, Root) - end). + Fun = fun + ({visit, _K, V}, {Acc, _}) -> + {ok, {[V | Acc], false}}; + ({traverse, _F, _L, R}, {Acc, _}) -> + {skip, {[R | Acc], true}} + end, + {Values, Rereduce} = fold(Db, Tree, Fun, {[], false}), + reduce_values(Tree, Values, Rereduce). %% fold -- cgit v1.2.1 From 03e1c6decabe3cf69a805167732e369dabf4adc9 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 3 Jul 2020 19:33:53 +0100 Subject: handy comparison funs --- src/ebtree.erl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/ebtree.erl b/src/ebtree.erl index 237addf6a..f7d536daf 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -688,6 +688,24 @@ reduce_values(#tree{} = Tree, Values, Rereduce) when is_list(Values) -> %% collation functions +greater_than(#tree{} = Tree, A, B) -> + not less_than_or_equal(Tree, A, B). + + +greater_than_or_equal(#tree{} = Tree, A, A) -> + true; + +greater_than_or_equal(#tree{} = Tree, A, B) -> + greater_than(Tree, A, B). + + +less_than(#tree{} = Tree, A, A) -> + false; + +less_than(#tree{} = Tree, A, B) -> + less_than_or_equal(Tree, A, B). + + less_than_or_equal(#tree{} = Tree, A, B) -> #tree{collate_fun = CollateFun} = Tree, CollateFun(A, B). -- cgit v1.2.1 From 38269f5f3f5c74310f37f004220c1c4841577713 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 10:39:30 +0100 Subject: reimplement lookup as a fold --- src/ebtree.erl | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index f7d536daf..5dfcb5f77 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -78,16 +78,27 @@ open(Db, Prefix, Options) -> %% lookup lookup(Db, #tree{} = Tree, Key) -> - erlfdb:transactional(Db, fun(Tx) -> - lookup(Tx, Tree, get_node_wait(Tx, Tree, ?NODE_ROOT_ID), Key) - end). - -lookup(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> - find_value(Node, Key); - -lookup(Tx, #tree{} = Tree, #node{} = Node, Key) -> - ChildId = find_child_id(Tree, Node, Key), - lookup(Tx, Tree, get_node_wait(Tx, Tree, ChildId), Key). + Fun = fun + ({visit, K, V}, Acc) when K =:= Key -> + {stop, {K, V}}; + ({visit, K, V}, Acc) -> + case greater_than(Tree, K, Key) of + true -> + {stop, Acc}; + false -> + {ok, Acc} + end; + ({traverse, F, L, R}, Acc) -> + case {greater_than(Tree, F, Key), less_than_or_equal(Tree, Key, L)} of + {true, _} -> + {stop, Acc}; + {false, true} -> + {ok, Acc}; + {false, false} -> + {skip, Acc} + end + end, + fold(Db, Tree, Fun, false). %% full reduce -- cgit v1.2.1 From 5b3537808a47db0a60cac7d1cdd198fcbdfed1cd Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 10:39:41 +0100 Subject: unused variables --- src/ebtree.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 5dfcb5f77..142010866 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -703,14 +703,14 @@ greater_than(#tree{} = Tree, A, B) -> not less_than_or_equal(Tree, A, B). -greater_than_or_equal(#tree{} = Tree, A, A) -> +greater_than_or_equal(#tree{} = _Tree, A, A) -> true; greater_than_or_equal(#tree{} = Tree, A, B) -> greater_than(Tree, A, B). -less_than(#tree{} = Tree, A, A) -> +less_than(#tree{} = _Tree, A, A) -> false; less_than(#tree{} = Tree, A, B) -> -- cgit v1.2.1 From a1714f103a0b023f720443ca37b836381b36e01d Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 18:27:15 +0100 Subject: reimplement reduce as fold --- src/ebtree.erl | 77 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 142010866..596db26d5 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -79,16 +79,16 @@ open(Db, Prefix, Options) -> lookup(Db, #tree{} = Tree, Key) -> Fun = fun - ({visit, K, V}, Acc) when K =:= Key -> + ({visit, K, V}, _Acc) when K =:= Key -> {stop, {K, V}}; - ({visit, K, V}, Acc) -> + ({visit, K, _V}, Acc) -> case greater_than(Tree, K, Key) of true -> {stop, Acc}; false -> {ok, Acc} end; - ({traverse, F, L, R}, Acc) -> + ({traverse, F, L, _R}, Acc) -> case {greater_than(Tree, F, Key), less_than_or_equal(Tree, Key, L)} of {true, _} -> {stop, Acc}; @@ -155,47 +155,38 @@ fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> %% reduce reduce(Db, #tree{} = Tree, StartKey, EndKey) -> - erlfdb:transactional(Db, fun(Tx) -> - Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), - reduce(Db, Tree, Root, StartKey, EndKey) - end). - - -reduce(_Db, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey) -> - %% calculate the partial reduce for in-range members. - InRange = [{K, V} || {K, V} <- Node#node.members, - less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], - reduce_values(Tree, InRange, false); - -reduce(Db, #tree{} = Tree, #node{} = Node, StartKey, EndKey) -> - StartChildKey = find_child_key(Tree, Node, StartKey), - EndChildKey = find_child_key(Tree, Node, EndKey), - InRange = [{F, L, P, R} || {F, L, P, R} <- Node#node.members, - less_than_or_equal(Tree, StartChildKey, L), less_than_or_equal(Tree, L, EndChildKey)], - {_F, _L, P, _R} = hd(InRange), - FirstNode = get_node_wait(Db, Tree, P), - FirstReduce = reduce(Db, Tree, FirstNode, StartKey, EndKey), - Reductions = reduce_inner(Db, Tree, StartKey, EndKey, tl(InRange), []), - reduce_values(Tree, [FirstReduce | Reductions], true). - - -reduce_inner(_Db, #tree{} = _Tree, _StartKey, _EndKey, [], Acc) -> - Acc; - -%% maybe descend into last item -reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{_F, L, P, R}], Acc) -> - case less_than_or_equal(Tree, EndKey, L) of + Fun = fun + ({visit, Key, Value}, {MapAcc, ReduceAcc}) -> + InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), + case InRange of + true -> + {ok, {[{Key, Value} | MapAcc], ReduceAcc}}; + false -> + {ok, {MapAcc, ReduceAcc}} + end; + ({traverse, FirstKey, LastKey, Reduction}, {MapAcc, ReduceAcc}) -> + BeforeStart = less_than(Tree, LastKey, StartKey), + AfterEnd = greater_than(Tree, FirstKey, EndKey), + Whole = greater_than_or_equal(Tree, FirstKey, StartKey) andalso less_than_or_equal(Tree, LastKey, EndKey), + if + BeforeStart -> + {skip, {MapAcc, ReduceAcc}}; + AfterEnd -> + {stop, {MapAcc, ReduceAcc}}; + Whole -> + {skip, {MapAcc, [Reduction | ReduceAcc]}}; + true -> + {ok, {MapAcc, ReduceAcc}} + end + end, + {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}), + if + MapValues /= [] -> + MapReduction = reduce_values(Tree, MapValues, false), + reduce_values(Tree, [MapReduction | ReduceValues], true); true -> - Node = get_node_wait(Db, Tree, P), - [reduce(Db, Tree, Node, StartKey, EndKey) | Acc]; - false -> - [R | Acc] - end; - - -%% use the reduction for any fully-spanned node -reduce_inner(Db, #tree{} = Tree, StartKey, EndKey, [{_F, _L, _P, R} | Rest], Acc) -> - reduce_inner(Db, Tree, StartKey, EndKey, Rest, [R | Acc]). + reduce_values(Tree, ReduceValues, true) + end. %% range (inclusive of both ends) -- cgit v1.2.1 From 56686a816076a7ffba7cd3c07b08b115295eb99f Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 18:27:25 +0100 Subject: enable coverage --- rebar.config | 1 + 1 file changed, 1 insertion(+) diff --git a/rebar.config b/rebar.config index 66db28481..b9dfae661 100644 --- a/rebar.config +++ b/rebar.config @@ -1,4 +1,5 @@ {erl_opts, [debug_info]}. +{cover_enabled, true}. {deps, [ {erlfdb, {git, "https://github.com/apache/couchdb-erlfdb", {tag, "v1.2.2"}}} ]}. -- cgit v1.2.1 From fd7b579cec61b600f88c6212e11d2d5be4813b8b Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 18:27:43 +0100 Subject: collation fun tests --- src/ebtree.erl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/ebtree.erl b/src/ebtree.erl index 596db26d5..f1c60c848 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -808,6 +808,22 @@ b64(Bin) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +collation_fun_test_() -> + Tree = #tree{collate_fun = fun collate_raw/2}, + [ + ?_test(?assert(greater_than(Tree, 4, 3))), + ?_test(?assertNot(greater_than(Tree, 3, 4))), + ?_test(?assert(greater_than_or_equal(Tree, 3, 3))), + ?_test(?assert(greater_than_or_equal(Tree, 3, 3))), + ?_test(?assert(less_than(Tree, 3, 4))), + ?_test(?assertNot(less_than(Tree, 3, 3))), + ?_test(?assertNot(less_than(Tree, 4, 3))), + ?_test(?assert(less_than_or_equal(Tree, 3, 3))), + ?_test(?assert(less_than_or_equal(Tree, 3, 4))), + ?_test(?assertNot(less_than_or_equal(Tree, 4, 3))) + ]. + + lookup_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From d6edc5e5a0161d76984344571c226401e30a166f Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 18:29:05 +0100 Subject: remove unused functions --- src/ebtree.erl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index f1c60c848..decf51251 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -457,18 +457,10 @@ rebalance(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = N %% lookup functions -find_value(#node{level = 0} = Node, Key) -> - lists:keyfind(Key, 1, Node#node.members). - - find_child_id(#tree{} = Tree, #node{} = Node, Key) -> element(3, find_child(Tree, Node, Key)). -find_child_key(#tree{} = Tree, #node{} = Node, Key) -> - element(2, find_child(Tree, Node, Key)). - - find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> Node1 = Node0#node{members = lists:keydelete(Id, 3, Node0#node.members)}, find_child_id(Tree, Node1, Key). -- cgit v1.2.1 From 2ca9f7f9be4f317145926be01024fa4951ad30d0 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 22:34:37 +0100 Subject: Add upper bound to intermediate reduce list size --- src/ebtree.erl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index decf51251..cb048acca 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -160,7 +160,7 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), case InRange of true -> - {ok, {[{Key, Value} | MapAcc], ReduceAcc}}; + {ok, maybe_reduce(Tree, {[{Key, Value} | MapAcc], ReduceAcc})}; false -> {ok, {MapAcc, ReduceAcc}} end; @@ -174,7 +174,7 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> AfterEnd -> {stop, {MapAcc, ReduceAcc}}; Whole -> - {skip, {MapAcc, [Reduction | ReduceAcc]}}; + {skip, maybe_reduce(Tree, {MapAcc, [Reduction | ReduceAcc]})}; true -> {ok, {MapAcc, ReduceAcc}} end @@ -189,6 +189,17 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> end. +maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(MapAcc) > 100 -> + Reduction = reduce_values(Tree, MapAcc, false), + maybe_reduce(Tree, {[], [Reduction | ReduceAcc]}); + +maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(ReduceAcc) > 100 -> + Reduction = reduce_values(Tree, ReduceAcc, true), + {MapAcc, [Reduction]}; + +maybe_reduce(#tree{} = _Tree, Acc) -> + Acc. + %% range (inclusive of both ends) range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> -- cgit v1.2.1 From c4cb82d868924ae462c820545169b4eda6a93ec9 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 4 Jul 2020 22:57:46 +0100 Subject: make upper bound of interactive reduce match tree maximum --- src/ebtree.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index cb048acca..de6012263 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -189,11 +189,11 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> end. -maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(MapAcc) > 100 -> +maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(MapAcc) > Tree#tree.max -> Reduction = reduce_values(Tree, MapAcc, false), maybe_reduce(Tree, {[], [Reduction | ReduceAcc]}); -maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(ReduceAcc) > 100 -> +maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(ReduceAcc) > Tree#tree.max -> Reduction = reduce_values(Tree, ReduceAcc, true), {MapAcc, [Reduction]}; -- cgit v1.2.1 From 0ea8ddd685972852f60e0a93f26f652e78a07673 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 15:15:05 +0100 Subject: move full_reduce --- src/ebtree.erl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index de6012263..cb50de074 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -100,18 +100,6 @@ lookup(Db, #tree{} = Tree, Key) -> end, fold(Db, Tree, Fun, false). -%% full reduce - -full_reduce(Db, #tree{} = Tree) -> - Fun = fun - ({visit, _K, V}, {Acc, _}) -> - {ok, {[V | Acc], false}}; - ({traverse, _F, _L, R}, {Acc, _}) -> - {skip, {[R | Acc], true}} - end, - {Values, Rereduce} = fold(Db, Tree, Fun, {[], false}), - reduce_values(Tree, Values, Rereduce). - %% fold fold(Db, #tree{} = Tree, Fun, Acc) -> @@ -152,6 +140,18 @@ fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> {stop, Acc1} end. +%% full reduce + +full_reduce(Db, #tree{} = Tree) -> + Fun = fun + ({visit, _K, V}, {Acc, _}) -> + {ok, {[V | Acc], false}}; + ({traverse, _F, _L, R}, {Acc, _}) -> + {skip, {[R | Acc], true}} + end, + {Values, Rereduce} = fold(Db, Tree, Fun, {[], false}), + reduce_values(Tree, Values, Rereduce). + %% reduce reduce(Db, #tree{} = Tree, StartKey, EndKey) -> -- cgit v1.2.1 From c6689c2ac7be58eb9835f189ba5c8b62793ae6bb Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 15:15:20 +0100 Subject: more tests --- src/ebtree.erl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index cb50de074..1ed963c58 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -833,7 +833,8 @@ lookup_test() -> Tree = open(Db, <<1,2,3>>), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), - lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys). + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), + ?assertEqual(false, lookup(Db, Tree, 101)). delete_test() -> @@ -859,14 +860,19 @@ range_after_delete_test() -> ?assertEqual(50, reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). -full_reduce_test() -> +full_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), - Max = 100, - Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), - ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)). + TestFun = fun(Max) -> + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)) + end, + [ + ?_test(TestFun(4)), + ?_test(TestFun(8)) + ]. full_reduce_after_delete_test() -> -- cgit v1.2.1 From 8cac976de74bbea042b5ed9d76ca7ba30bdd24f8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 15:38:36 +0100 Subject: fix bug in full_reduce when root is a leaf --- src/ebtree.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 1ed963c58..c4b123e21 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -144,8 +144,8 @@ fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> full_reduce(Db, #tree{} = Tree) -> Fun = fun - ({visit, _K, V}, {Acc, _}) -> - {ok, {[V | Acc], false}}; + ({visit, K, V}, {Acc, _}) -> + {ok, {[{K, V} | Acc], false}}; ({traverse, _F, _L, R}, {Acc, _}) -> {skip, {[R | Acc], true}} end, -- cgit v1.2.1 From cb20ab4a2d12ef43f9f0b19615047c31625a9b32 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 15:52:53 +0100 Subject: more tests --- src/ebtree.erl | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index c4b123e21..16af6ed9f 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -887,7 +887,24 @@ full_reduce_after_delete_test() -> ?assertEqual(0, full_reduce(Db, Tree)). -reduce_test_() -> +count_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_count/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + Expected = fun(S, E) -> E - S + 1 end, + [ + ?_test(?assertEqual(Expected(1, 5), reduce(Db, Tree, 1, 5))), + ?_test(?assertEqual(Expected(50, 60), reduce(Db, Tree, 50, 60))), + ?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), + ?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), + ?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), + ?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))) + ]. + +sum_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), @@ -905,6 +922,23 @@ reduce_test_() -> ]. +stats_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_stats/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + [ + ?_test(?assertEqual({15,1,5,5,55}, reduce(Db, Tree, 1, 5))), + ?_test(?assertEqual({605,50,60,11,33385}, reduce(Db, Tree, 50, 60))), + ?_test(?assertEqual({3276,21,83,63,191184}, reduce(Db, Tree, 21, 83))), + ?_test(?assertEqual({1,1,1,1,1}, reduce(Db, Tree, 1, 1))), + ?_test(?assertEqual({5050,1,100,100,338350}, reduce(Db, Tree, 0, 200))), + ?_test(?assertEqual({18,5,7,3,110}, reduce(Db, Tree, 5, 7))) + ]. + + raw_collation_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From 60a35a978d972f1dd0e824ec2e6d1698b3f41504 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 16:00:37 +0100 Subject: reduce every 50. flip flop. --- src/ebtree.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 16af6ed9f..8894da5b2 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -189,11 +189,11 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> end. -maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(MapAcc) > Tree#tree.max -> +maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(MapAcc) > 50 -> Reduction = reduce_values(Tree, MapAcc, false), maybe_reduce(Tree, {[], [Reduction | ReduceAcc]}); -maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(ReduceAcc) > Tree#tree.max -> +maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(ReduceAcc) > 50 -> Reduction = reduce_values(Tree, ReduceAcc, true), {MapAcc, [Reduction]}; -- cgit v1.2.1 From 976fa8f5292e69f0eaa9c045b324dbdb1cc8b198 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 16:12:13 +0100 Subject: follow reduce's style in full_reduce --- src/ebtree.erl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 8894da5b2..4f4337b0f 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -144,13 +144,18 @@ fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> full_reduce(Db, #tree{} = Tree) -> Fun = fun - ({visit, K, V}, {Acc, _}) -> - {ok, {[{K, V} | Acc], false}}; - ({traverse, _F, _L, R}, {Acc, _}) -> - {skip, {[R | Acc], true}} + ({visit, K, V}, {MapAcc, ReduceAcc}) -> + {ok, {[{K, V} | MapAcc], ReduceAcc}}; + ({traverse, _F, _L, R}, {MapAcc, ReduceAcc}) -> + {skip, {MapAcc, [R | ReduceAcc]}} end, - {Values, Rereduce} = fold(Db, Tree, Fun, {[], false}), - reduce_values(Tree, Values, Rereduce). + case fold(Db, Tree, Fun, {[], []}) of + {MapAcc, []} -> + reduce_values(Tree, MapAcc, false); + {[], ReduceAcc} -> + reduce_values(Tree, ReduceAcc, true) + end. + %% reduce -- cgit v1.2.1 From 87bb8134d7b6ce6a4d8a0debe301c227d16e3f89 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 5 Jul 2020 17:34:49 +0100 Subject: remove premature optimization even thought it was neat --- src/ebtree.erl | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 4f4337b0f..e6d7337fb 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -165,7 +165,7 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), case InRange of true -> - {ok, maybe_reduce(Tree, {[{Key, Value} | MapAcc], ReduceAcc})}; + {ok, {[{Key, Value} | MapAcc], ReduceAcc}}; false -> {ok, {MapAcc, ReduceAcc}} end; @@ -179,7 +179,7 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> AfterEnd -> {stop, {MapAcc, ReduceAcc}}; Whole -> - {skip, maybe_reduce(Tree, {MapAcc, [Reduction | ReduceAcc]})}; + {skip, {MapAcc, [Reduction | ReduceAcc]}}; true -> {ok, {MapAcc, ReduceAcc}} end @@ -194,17 +194,6 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> end. -maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(MapAcc) > 50 -> - Reduction = reduce_values(Tree, MapAcc, false), - maybe_reduce(Tree, {[], [Reduction | ReduceAcc]}); - -maybe_reduce(#tree{} = Tree, {MapAcc, ReduceAcc}) when length(ReduceAcc) > 50 -> - Reduction = reduce_values(Tree, ReduceAcc, true), - {MapAcc, [Reduction]}; - -maybe_reduce(#tree{} = _Tree, Acc) -> - Acc. - %% range (inclusive of both ends) range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> -- cgit v1.2.1 From d598fc6212508195589a9d6ba890921100110e70 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 6 Jul 2020 11:55:20 +0100 Subject: minor optimize when visting leaf --- src/ebtree.erl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index e6d7337fb..ca47a41dc 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -162,11 +162,14 @@ full_reduce(Db, #tree{} = Tree) -> reduce(Db, #tree{} = Tree, StartKey, EndKey) -> Fun = fun ({visit, Key, Value}, {MapAcc, ReduceAcc}) -> + AfterEnd = greater_than(Tree, Key, EndKey), InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), - case InRange of + if + AfterEnd -> + {stop, {MapAcc, ReduceAcc}}; + InRange -> + {ok, {[{Key, Value} | MapAcc], ReduceAcc}}; true -> - {ok, {[{Key, Value} | MapAcc], ReduceAcc}}; - false -> {ok, {MapAcc, ReduceAcc}} end; ({traverse, FirstKey, LastKey, Reduction}, {MapAcc, ReduceAcc}) -> -- cgit v1.2.1 From 465e7b29fb64673c34f1141480e8f04fb05ee600 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 6 Jul 2020 13:32:15 +0100 Subject: extract final reduce function --- src/ebtree.erl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index ca47a41dc..6730e9b03 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -188,13 +188,15 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> end end, {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}), - if - MapValues /= [] -> - MapReduction = reduce_values(Tree, MapValues, false), - reduce_values(Tree, [MapReduction | ReduceValues], true); - true -> - reduce_values(Tree, ReduceValues, true) - end. + final_reduce(Tree, MapValues, ReduceValues). + + +final_reduce(#tree{} = Tree, [], ReduceValues) when is_list(ReduceValues) -> + reduce_values(Tree, ReduceValues, true); + +final_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_list(ReduceValues) -> + final_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). + %% range (inclusive of both ends) -- cgit v1.2.1 From 6ae6cc230478c43ae27b3c8203bae7152bd0962e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 6 Jul 2020 11:42:43 +0100 Subject: Arbitrary group reduce --- src/ebtree.erl | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 6730e9b03..c61a30d00 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -12,6 +12,7 @@ fold/4, reduce/4, full_reduce/2, + group_reduce/5, validate_tree/2 ]). @@ -188,16 +189,65 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> end end, {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}), - final_reduce(Tree, MapValues, ReduceValues). + do_reduce(Tree, MapValues, ReduceValues). -final_reduce(#tree{} = Tree, [], ReduceValues) when is_list(ReduceValues) -> +do_reduce(#tree{} = Tree, [], ReduceValues) when is_list(ReduceValues) -> reduce_values(Tree, ReduceValues, true); -final_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_list(ReduceValues) -> - final_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). +do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_list(ReduceValues) -> + do_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). +%% group reduce - produces reductions for contiguous keys in the same group. + +group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun) -> + NoGroupYet = erlang:make_ref(), + Fun = fun + ({visit, Key, Value}, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}) -> + AfterEnd = greater_than(Tree, Key, EndKey), + InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), + KeyGroup = GroupKeyFun(Key), + SameGroup = CurrentGroup =:= KeyGroup, + if + AfterEnd -> + {stop, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}}; + SameGroup -> + {ok, {CurrentGroup, GroupAcc, [{Key, Value} | MapAcc], ReduceAcc}}; + InRange andalso CurrentGroup =:= NoGroupYet -> + {ok, {KeyGroup, GroupAcc, [{Key, Value}], []}}; + InRange -> + %% implicit end of current group and start of a new one + GroupValue = do_reduce(Tree, MapAcc, ReduceAcc), + {ok, {KeyGroup, [{CurrentGroup, GroupValue} | GroupAcc], [{Key, Value}], []}}; + true -> + {ok, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}} + end; + ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}) -> + BeforeStart = less_than(Tree, LastKey, StartKey), + AfterEnd = greater_than(Tree, FirstKey, EndKey), + Whole = CurrentGroup =:= GroupKeyFun(FirstKey) andalso CurrentGroup =:= GroupKeyFun(LastKey), + if + BeforeStart -> + {skip, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}}; + AfterEnd -> + {stop, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}}; + Whole -> + {skip, {CurrentGroup, GroupAcc, MapAcc, [Reduction | ReduceAcc]}}; + true -> + {ok, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}} + end + end, + {CurrentGroup, GroupAcc0, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, [], [], []}), + GroupAcc1 = if + MapValues /= [] orelse ReduceValues /= [] -> + FinalGroup = do_reduce(Tree, MapValues, ReduceValues), + [{CurrentGroup, FinalGroup} | GroupAcc0]; + true -> + GroupAcc0 + end, + lists:reverse(GroupAcc1). + %% range (inclusive of both ends) @@ -938,6 +988,20 @@ stats_reduce_test_() -> ]. +group_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + init(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + GroupKeyFun = fun(Key) -> lists:sublist(Key, 2) end, + lists:foreach(fun(Key) -> insert(Db, Tree, [Key rem 4, Key rem 3, Key], Key) end, Keys), + [ + ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], + group_reduce(Db, Tree, [1], [2], GroupKeyFun))) + ]. + + raw_collation_test() -> Db = erlfdb_util:get_test_db([empty]), init(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From f1f8f602a4b31479688fe112c3fb3d1a69334ecc Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 6 Jul 2020 08:10:57 -0700 Subject: Fix auth injection in test helper Previously the auth was injected unconidtionally. There were two problems: 1. no way to specify `basic_auth` credentials 2. no way to disable injection Due to #2 it was impossible to use other user than hardcoded `adm` () in `Couch.login/3`. --- test/elixir/lib/couch.ex | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/elixir/lib/couch.ex b/test/elixir/lib/couch.ex index ed5862331..a8439417d 100644 --- a/test/elixir/lib/couch.ex +++ b/test/elixir/lib/couch.ex @@ -129,7 +129,12 @@ defmodule Couch do end def set_auth_options(options) do - if Keyword.get(options, :cookie) == nil do + no_auth? = Keyword.get(options, :no_auth) == true + cookie? = Keyword.has_key?(options, :cookie) + basic_auth? = Keyword.has_key?(options, :basic_auth) + if cookie? or no_auth? or basic_auth? do + Keyword.delete(options, :no_auth) + else headers = Keyword.get(options, :headers, []) if headers[:basic_auth] != nil or headers[:authorization] != nil do @@ -139,8 +144,6 @@ defmodule Couch do password = System.get_env("EX_PASSWORD") || "pass" Keyword.put(options, :basic_auth, {username, password}) end - else - options end end @@ -177,7 +180,8 @@ defmodule Couch do Couch.post( "/_session", body: %{:username => user, :password => pass}, - base_url: base_url + base_url: base_url, + no_auth: true ) if Map.get(options, :expect, :success) == :success do -- cgit v1.2.1 From c48f4fe342bab5ebc163a863f44b65666d18d019 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 6 Jul 2020 17:30:06 +0100 Subject: sequential ids --- src/ebtree.erl | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index c61a30d00..1f4cd381a 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -41,9 +41,10 @@ -define(META, 0). -define(META_ORDER, 0). +-define(META_NEXT_ID, 1). -define(NODE, 1). --define(NODE_ROOT_ID, <<0>>). +-define(NODE_ROOT_ID, 0). -define(underflow(Tree, Node), Tree#tree.min > length(Node#node.members)). -define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). @@ -54,6 +55,7 @@ init(Db, Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Or erlfdb:transactional(Db, fun(Tx) -> erlfdb:clear_range_startswith(Tx, Prefix), set_meta(Tx, Prefix, ?META_ORDER, Order), + set_meta(Tx, Prefix, ?META_NEXT_ID, 1), set_node(Tx, init_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}), ok end). @@ -303,7 +305,7 @@ insert(Db, #tree{} = Tree, Key, Value) -> Root0 = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), case ?is_full(Tree, Root0) of true -> - OldRoot = Root0#node{id = new_node_id()}, + OldRoot = Root0#node{id = new_node_id(Tx, Tree)}, FirstKey = first_key(OldRoot), LastKey = last_key(OldRoot), Root1 = #node{ @@ -321,8 +323,8 @@ insert(Db, #tree{} = Tree, Key, Value) -> split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> {LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members), - LeftId = new_node_id(), - RightId = new_node_id(), + LeftId = new_node_id(Tx, Tree), + RightId = new_node_id(Tx, Tree), LeftChild = remove_pointers_if_not_leaf(#node{ id = LeftId, @@ -444,12 +446,12 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Sibling = get_node_wait(Tx, Tree, SiblingId), NewNodes = case ?at_min(Tree, Sibling) of true -> - Merged = merge(Tree, Child1, Sibling), + Merged = merge(Tx, Tree, Child1, Sibling), update_prev_neighbour(Tx, Tree, Merged), update_next_neighbour(Tx, Tree, Merged), [Merged]; false -> - {Left, Right} = rebalance(Tree, Child1, Sibling), + {Left, Right} = rebalance(Tx, Tree, Child1, Sibling), update_prev_neighbour(Tx, Tree, Left), update_next_neighbour(Tx, Tree, Right), [Left, Right] @@ -481,11 +483,11 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> end. -merge(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> +merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> [Left, Right] = sort(Tree, [Node1, Node2]), #node{ - id = new_node_id(), + id = new_node_id(Tx, Tree), level = Level, prev = Left#node.prev, next = Right#node.next, @@ -493,14 +495,14 @@ merge(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2 }. -rebalance(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> +rebalance(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> [Left0, Right0] = sort(Tree, [Node1, Node2]), Members = lists:append(Left0#node.members, Right0#node.members), {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), - Left1Id = new_node_id(), - Right1Id = new_node_id(), + Left1Id = new_node_id(Tx, Tree), + Right1Id = new_node_id(Tx, Tree), Left1 = Left0#node{ id = Left1Id, @@ -601,7 +603,7 @@ set_node(Tx, #tree{} = Tree, #node{} = Node) -> erlfdb:set(Tx, Key, Value). -node_key(Prefix, Id) when is_binary(Prefix), is_binary(Id) -> +node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) -> erlfdb_tuple:pack({?NODE, Id}, Prefix). @@ -826,8 +828,10 @@ last_key(Members) when is_list(Members) -> end. -new_node_id() -> - crypto:strong_rand_bytes(16). +new_node_id(Tx, Tree) -> + NextId = get_meta(Tx, Tree, ?META_NEXT_ID), + set_meta(Tx, Tree#tree.prefix, ?META_NEXT_ID, NextId + 1), + NextId. %% remove prev/next pointers for nonleaf nodes @@ -838,22 +842,10 @@ remove_pointers_if_not_leaf(#node{} = Node) -> Node#node{prev = undefined, next = undefined}. -print_node(#node{level = 0} = Node) -> - io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~w}~n~n", - [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), - Node#node.members]); - print_node(#node{} = Node) -> - io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n", - [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), - [io_lib:format("{~w, ~w, ~s, ~w}, ", [F, L, b64(V), R]) || {F, L, V, R} <- Node#node.members]]). - - -b64(undefined) -> - undefined; + io:format("#node{id = ~w, level = ~w, prev = ~w, next = ~w, members = ~w}~n~n", + [Node#node.id, Node#node.level, Node#node.prev, Node#node.next, Node#node.members]). -b64(Bin) -> - base64:encode(Bin). %% tests -- cgit v1.2.1 From 46cc29e39f15bcf69da34b833b0f38681e0a6966 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 6 Jul 2020 19:07:01 +0100 Subject: ensure erlfdb is started before ebtree --- src/ebtree.app.src | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ebtree.app.src b/src/ebtree.app.src index 995238785..1420a2d90 100644 --- a/src/ebtree.app.src +++ b/src/ebtree.app.src @@ -4,7 +4,8 @@ {registered, []}, {applications, [kernel, - stdlib + stdlib, + erlfdb ]}, {env,[]}, {modules, []}, -- cgit v1.2.1 From 23b4aa78e09cceb9424ab1b0b9891755ecb46ba7 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Wed, 1 Jul 2020 08:36:25 +0200 Subject: Port view_collation_raw.js to elixir --- test/elixir/README.md | 2 +- test/elixir/test/view_collation_raw_test.exs | 159 +++++++++++++++++++++++++++ test/javascript/tests/view_collation_raw.js | 1 + 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/view_collation_raw_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 80879afdc..44cca52d9 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -99,7 +99,7 @@ X means done, - means partially - [X] Port utf8.js - [X] Port uuids.js - [X] Port view_collation.js - - [ ] Port view_collation_raw.js + - [X] Port view_collation_raw.js - [ ] Port view_compaction.js - [ ] Port view_conflicts.js - [ ] Port view_errors.js diff --git a/test/elixir/test/view_collation_raw_test.exs b/test/elixir/test/view_collation_raw_test.exs new file mode 100644 index 000000000..ee272d72e --- /dev/null +++ b/test/elixir/test/view_collation_raw_test.exs @@ -0,0 +1,159 @@ +defmodule ViewCollationRawTest do + use CouchTestCase + + @moduledoc """ + Test CouchDB View Raw Collation Behavior + This is a port of the view_collation_raw.js suite + """ + + @values [ + # Then numbers + 1, + 2, + 3, + 4, + false, + :null, + true, + + # Then objects, compared each key value in the list until different. + # Larger objects sort after their subset objects + {[a: 1]}, + {[a: 2]}, + {[b: 1]}, + {[b: 2]}, + # Member order does matter for collation + {[b: 2, a: 1]}, + {[b: 2, c: 2]}, + + # Then arrays, compared element by element until different. + # Longer arrays sort after their prefixes + ["a"], + ["b"], + ["b", "c"], + ["b", "c", "a"], + ["b", "d"], + ["b", "d", "e"], + + # Then text, case sensitive + "A", + "B", + "a", + "aa", + "b", + "ba", + "bb" + ] + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + {docs, _} = + Enum.flat_map_reduce(@values, 1, fn value, idx -> + doc = %{:_id => Integer.to_string(idx), :foo => value} + {[doc], idx + 1} + end) + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}) + Enum.each(resp.body, &assert(&1["ok"])) + + map_fun = "function(doc) { emit(doc.foo, null); }" + + map_doc = %{ + :language => "javascript", + :views => %{:test => %{:map => map_fun, :options => %{:collation => "raw"}}} + } + + resp = Couch.put("/#{db_name}/_design/test", body: map_doc) + assert resp.body["ok"] + + {:ok, [db_name: db_name]} + end + + test "ascending collation order", context do + retry_until(fn -> + resp = Couch.get(url(context)) + pairs = Enum.zip(resp.body["rows"], @values) + + Enum.each(pairs, fn {row, value} -> + assert row["key"] == convert(value) + end) + end) + end + + test "raw semantics in key ranges", context do + retry_until(fn -> + resp = + Couch.get(url(context), + query: %{"startkey" => :jiffy.encode("Z"), "endkey" => :jiffy.encode("a")} + ) + + assert length(resp.body["rows"]) == 1 + assert Enum.at(resp.body["rows"], 0)["key"] == "a" + end) + end + + test "descending collation order", context do + retry_until(fn -> + resp = Couch.get(url(context), query: %{"descending" => "true"}) + pairs = Enum.zip(resp.body["rows"], Enum.reverse(@values)) + + Enum.each(pairs, fn {row, value} -> + assert row["key"] == convert(value) + end) + end) + end + + test "key query option", context do + Enum.each(@values, fn value -> + retry_until(fn -> + resp = Couch.get(url(context), query: %{:key => :jiffy.encode(value)}) + assert length(resp.body["rows"]) == 1 + assert Enum.at(resp.body["rows"], 0)["key"] == convert(value) + end) + end) + end + + test "inclusive_end=true", context do + query = %{:endkey => :jiffy.encode("b"), :inclusive_end => true} + resp = Couch.get(url(context), query: query) + assert Enum.at(resp.body["rows"], -1)["key"] == "b" + + query = Map.put(query, :descending, true) + resp = Couch.get(url(context), query: query) + assert Enum.at(resp.body["rows"], -1)["key"] == "b" + end + + test "inclusive_end=false", context do + query = %{:endkey => :jiffy.encode("b"), :inclusive_end => false} + resp = Couch.get(url(context), query: query) + assert Enum.at(resp.body["rows"], -1)["key"] == "aa" + + query = Map.put(query, :descending, true) + resp = Couch.get(url(context), query: query) + assert Enum.at(resp.body["rows"], -1)["key"] == "ba" + + query = %{ + :endkey => :jiffy.encode("b"), + :endkey_docid => 10, + :inclusive_end => false + } + + resp = Couch.get(url(context), query: query) + assert Enum.at(resp.body["rows"], -1)["key"] == "aa" + + query = Map.put(query, :endkey_docid, 11) + resp = Couch.get(url(context), query: query) + assert Enum.at(resp.body["rows"], -1)["key"] == "aa" + end + + def url(context) do + "/#{context[:db_name]}/_design/test/_view/test" + end + + def convert(value) do + :jiffy.decode(:jiffy.encode(value), [:return_maps]) + end +end diff --git a/test/javascript/tests/view_collation_raw.js b/test/javascript/tests/view_collation_raw.js index 9b02ff49d..ee990bc4c 100644 --- a/test/javascript/tests/view_collation_raw.js +++ b/test/javascript/tests/view_collation_raw.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_collation_raw = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From ac3f25f07a4359b4161c98e1b5fdb9e7093941ab Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 7 Jul 2020 12:00:31 +0100 Subject: Eliminate unnecessary node lookup --- src/ebtree.erl | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 1f4cd381a..4d799d896 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -312,7 +312,7 @@ insert(Db, #tree{} = Tree, Key, Value) -> id = ?NODE_ROOT_ID, level = Root0#node.level + 1, members = [{FirstKey, LastKey, OldRoot#node.id, []}]}, - Root2 = split_child(Tx, Tree, Root1, OldRoot), + {Root2, _, _} = split_child(Tx, Tree, Root1, OldRoot), insert_nonfull(Tx, Tree, Root2, Key, Value); false -> insert_nonfull(Tx, Tree, Root0, Key, Value) @@ -363,7 +363,7 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> }, clear_node(Tx, Tree, Child), set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), - Parent1. + {Parent1, LeftChild, RightChild}. update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) -> @@ -392,14 +392,21 @@ insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> ChildId0 = find_child_id(Tree, Node0, Key), Child0 = get_node_wait(Tx, Tree, ChildId0), - Node1 = case ?is_full(Tree, Child0) of + {Node1, Child1} = case ?is_full(Tree, Child0) of true -> - split_child(Tx, Tree, Node0, Child0); + {Parent, LeftChild, RightChild} = split_child(Tx, Tree, Node0, Child0), + ChildId = find_child_id(Tree, Parent, Key), + Child = if + ChildId =:= LeftChild#node.id -> + LeftChild; + ChildId =:= RightChild#node.id -> + RightChild + end, + {Parent, Child}; false -> - Node0 + {Node0, Child0} end, - ChildId1 = find_child_id(Tree, Node1, Key), - Child1 = get_node_wait(Tx, Tree, ChildId1), + ChildId1 = Child1#node.id, NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), {CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members), [NewFirstKey, _] = sort(Tree, [Key, CurrentFirstKey]), -- cgit v1.2.1 From ce22cbcc2c92de456f0a1d98c30d2ea17a3010c6 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Tue, 7 Jul 2020 09:04:15 +0200 Subject: Port view_compaction test to elixir --- test/elixir/README.md | 4 +- test/elixir/lib/couch/db_test.ex | 1 + test/elixir/test/view_compaction_test.exs | 105 ++++++++++++++++++++++++++++++ test/javascript/tests/view_compaction.js | 1 + 4 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/view_compaction_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 44cca52d9..cf529438d 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -100,7 +100,7 @@ X means done, - means partially - [X] Port uuids.js - [X] Port view_collation.js - [X] Port view_collation_raw.js - - [ ] Port view_compaction.js + - [X] Port view_compaction.js - [ ] Port view_conflicts.js - [ ] Port view_errors.js - [ ] Port view_include_docs.js @@ -110,7 +110,7 @@ X means done, - means partially - [X] Port view_offsets.js - [X] Port view_pagination.js - [ ] Port view_sandboxing.js - - [ ] Port view_update_seq.js + - [X] Port view_update_seq.js # Using ExUnit to write unit tests diff --git a/test/elixir/lib/couch/db_test.ex b/test/elixir/lib/couch/db_test.ex index 23f10937d..652fa6bb6 100644 --- a/test/elixir/lib/couch/db_test.ex +++ b/test/elixir/lib/couch/db_test.ex @@ -209,6 +209,7 @@ defmodule Couch.DBTest do ) assert resp.status_code in [201, 202] + resp end def query( diff --git a/test/elixir/test/view_compaction_test.exs b/test/elixir/test/view_compaction_test.exs new file mode 100644 index 000000000..d2bf060ba --- /dev/null +++ b/test/elixir/test/view_compaction_test.exs @@ -0,0 +1,105 @@ +defmodule ViewCompactionTest do + use CouchTestCase + + @moduledoc """ + Test CouchDB View Compaction Behavior + This is a port of the view_compaction.js suite + """ + @num_docs 1000 + + @ddoc %{ + _id: "_design/foo", + language: "javascript", + views: %{ + view1: %{ + map: "function(doc) { emit(doc._id, doc.value) }" + }, + view2: %{ + map: + "function(doc) { if (typeof(doc.integer) === 'number') {emit(doc._id, doc.integer);} }", + reduce: "function(keys, values, rereduce) { return sum(values); }" + } + } + } + + defp bulk_save_for_update(db_name, docs) do + resp = bulk_save(db_name, docs) + revs = resp.body + + Enum.map(docs, fn m -> + rev = Enum.at(revs, String.to_integer(m["_id"]))["rev"] + + m + |> Map.put("_rev", rev) + |> Map.update!("integer", &(&1 + 1)) + end) + end + + @tag :with_db + test "view compaction", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + docs = make_docs(0..(@num_docs - 1)) + docs = bulk_save_for_update(db_name, docs) + + resp = view(db_name, "foo/view1") + assert length(resp.body["rows"]) == @num_docs + + resp = view(db_name, "foo/view2") + assert length(resp.body["rows"]) == 1 + + resp = Couch.get("/#{db_name}/_design/foo/_info") + assert resp.body["view_index"]["update_seq"] == @num_docs + 1 + + docs = bulk_save_for_update(db_name, docs) + + resp = view(db_name, "foo/view1") + assert length(resp.body["rows"]) == @num_docs + + resp = view(db_name, "foo/view2") + assert length(resp.body["rows"]) == 1 + + resp = Couch.get("/#{db_name}/_design/foo/_info") + assert resp.body["view_index"]["update_seq"] == 2 * @num_docs + 1 + + bulk_save(db_name, docs) + resp = view(db_name, "foo/view1") + assert length(resp.body["rows"]) == @num_docs + + resp = view(db_name, "foo/view2") + assert length(resp.body["rows"]) == 1 + + resp = Couch.get("/#{db_name}/_design/foo/_info") + assert resp.body["view_index"]["update_seq"] == 3 * @num_docs + 1 + + disk_size_before_compact = resp.body["view_index"]["sizes"]["file"] + data_size_before_compact = resp.body["view_index"]["sizes"]["active"] + + assert is_integer(disk_size_before_compact) + assert data_size_before_compact < disk_size_before_compact + + resp = Couch.post("/#{db_name}/_compact/foo") + assert resp.body["ok"] == true + + retry_until(fn -> + resp = Couch.get("/#{db_name}/_design/foo/_info") + resp.body["view_index"]["compact_running"] == false + end) + + resp = view(db_name, "foo/view1") + assert length(resp.body["rows"]) == @num_docs + + resp = view(db_name, "foo/view2") + assert length(resp.body["rows"]) == 1 + + resp = Couch.get("/#{db_name}/_design/foo/_info") + assert resp.body["view_index"]["update_seq"] == 3 * @num_docs + 1 + + disk_size_after_compact = resp.body["view_index"]["sizes"]["file"] + data_size_after_compact = resp.body["view_index"]["sizes"]["active"] + assert disk_size_after_compact < disk_size_before_compact + assert is_integer(data_size_after_compact) + assert data_size_after_compact < disk_size_after_compact + end +end diff --git a/test/javascript/tests/view_compaction.js b/test/javascript/tests/view_compaction.js index d1a1e8790..f2af39058 100644 --- a/test/javascript/tests/view_compaction.js +++ b/test/javascript/tests/view_compaction.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_compaction = function(debug) { if (debug) debugger; -- cgit v1.2.1 From 2da3eeea0793943ed1515bd8c1bc6d988e23511c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 7 Jul 2020 13:51:42 +0100 Subject: swap get_node and get_node_wait Align with fabric2_ convention. --- src/ebtree.erl | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 4d799d896..1a1449148 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -107,7 +107,7 @@ lookup(Db, #tree{} = Tree, Key) -> fold(Db, #tree{} = Tree, Fun, Acc) -> {_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> - Root = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + Root = get_node(Tx, Tree, ?NODE_ROOT_ID), fold(Db, Tree, Root, Fun, Acc) end), Reduce. @@ -130,7 +130,7 @@ fold(Db, #tree{} = Tree, [{K, V} | Rest], Fun, Acc0) -> fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> case Fun({traverse, F, L, R}, Acc0) of {ok, Acc1} -> - Node = get_node_wait(Db, Tree, P), + Node = get_node(Db, Tree, P), case fold(Db, Tree, Node, Fun, Acc1) of {ok, Acc2} -> fold(Db, Tree, Rest, Fun, Acc2); @@ -255,7 +255,7 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun) -> range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> erlfdb:transactional(Db, fun(Tx) -> - range(Tx, Tree, get_node_wait(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) + range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) end). range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> @@ -265,20 +265,20 @@ range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) LastKey = last_key(Node), case Node#node.next /= undefined andalso less_than_or_equal(Tree, LastKey, EndKey) of true -> - range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.next), StartKey, EndKey, Fun, Acc1); + range(Tx, Tree, get_node(Tx, Tree, Node#node.next), StartKey, EndKey, Fun, Acc1); false -> Acc1 end; range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> ChildId = find_child_id(Tree, Node, StartKey), - range(Tx, Tree, get_node_wait(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). + range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). %% reverse range (inclusive of both ends) reverse_range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> erlfdb:transactional(Db, fun(Tx) -> - reverse_range(Tx, Tree, get_node_wait(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) + reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) end). reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> @@ -288,21 +288,21 @@ reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun FirstKey = first_key(Node), case Node#node.prev /= undefined andalso less_than_or_equal(Tree, StartKey, FirstKey) of true -> - reverse_range(Tx, Tree, get_node_wait(Tx, Tree, Node#node.prev), StartKey, EndKey, Fun, Acc1); + reverse_range(Tx, Tree, get_node(Tx, Tree, Node#node.prev), StartKey, EndKey, Fun, Acc1); false -> Acc1 end; reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> ChildId = find_child_id(Tree, Node, EndKey), - reverse_range(Tx, Tree, get_node_wait(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). + reverse_range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). %% insert insert(Db, #tree{} = Tree, Key, Value) -> erlfdb:transactional(Db, fun(Tx) -> - Root0 = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), case ?is_full(Tree, Root0) of true -> OldRoot = Root0#node{id = new_node_id(Tx, Tree)}, @@ -370,7 +370,7 @@ update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) -> ok; update_prev_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> - Left = get_node_wait(Tx, Tree, Node#node.prev), + Left = get_node(Tx, Tree, Node#node.prev), set_node(Tx, Tree, Left#node{next = Node#node.id}). @@ -378,7 +378,7 @@ update_next_neighbour(_Tx, #tree{} = _Tree, #node{next = undefined} = _Node) -> ok; update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> - Left = get_node_wait(Tx, Tree, Node#node.next), + Left = get_node(Tx, Tree, Node#node.next), set_node(Tx, Tree, Left#node{prev = Node#node.id}). @@ -391,7 +391,7 @@ insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> ChildId0 = find_child_id(Tree, Node0, Key), - Child0 = get_node_wait(Tx, Tree, ChildId0), + Child0 = get_node(Tx, Tree, ChildId0), {Node1, Child1} = case ?is_full(Tree, Child0) of true -> {Parent, LeftChild, RightChild} = split_child(Tx, Tree, Node0, Child0), @@ -423,12 +423,12 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> delete(Db, #tree{} = Tree, Key) -> erlfdb:transactional(Db, fun(Tx) -> - Root0 = get_node_wait(Tx, Tree, ?NODE_ROOT_ID), + Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), case delete(Tx, Tree, Root0, Key) of % if only one child, make it the new root. #node{level = L, members = [_]} = Root1 when L > 0 -> [{_, _, ChildId, _}] = Root1#node.members, - Root2 = get_node_wait(Tx, Tree, ChildId), + Root2 = get_node(Tx, Tree, ChildId), clear_node(Tx, Tree, Root2), set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); Root1 -> @@ -445,12 +445,12 @@ delete(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> ChildId0 = find_child_id(Tree, Parent0, Key), - Child0 = get_node_wait(Tx, Tree, ChildId0), + Child0 = get_node(Tx, Tree, ChildId0), Child1 = delete(Tx, Tree, Child0, Key), case ?underflow(Tree, Child1) of true -> SiblingId = find_sibling_id(Tree, Parent0, ChildId0, Key), - Sibling = get_node_wait(Tx, Tree, SiblingId), + Sibling = get_node(Tx, Tree, SiblingId), NewNodes = case ?at_min(Tree, Sibling) of true -> Merged = merge(Tx, Tree, Child1, Sibling), @@ -573,11 +573,11 @@ meta_key(Prefix, MetaKey) when is_binary(Prefix) -> %% node persistence functions -get_node_wait(Tx, #tree{} = Tree, Id) -> - get_node(Id, get_node_future(Tx, Tree, Id)). +get_node(Tx, #tree{} = Tree, Id) -> + get_node_wait(Id, get_node_future(Tx, Tree, Id)). -get_node(Id, Future) -> +get_node_wait(Id, Future) -> decode_node(Id, erlfdb:wait(Future)). @@ -616,7 +616,7 @@ node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) -> validate_tree(Db, #tree{} = Tree) -> erlfdb:transactional(Db, fun(Tx) -> - Root = get_node_wait(Db, Tree, ?NODE_ROOT_ID), + Root = get_node(Db, Tree, ?NODE_ROOT_ID), validate_tree(Tx, Tree, Root) end). @@ -633,7 +633,7 @@ validate_tree(_Tx, #tree{} = _Tree, []) -> ok; validate_tree(Tx, #tree{} = Tree, [{_F, _L, P, _R} | Rest]) -> - Node = get_node_wait(Tx, Tree, P), + Node = get_node(Tx, Tree, P), validate_tree(Tx, Tree, Node), validate_tree(Tx, Tree, Rest). -- cgit v1.2.1 From 952cab3ea8547cf0bf1e2d0f60d488c5c76462cc Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 7 Jul 2020 15:00:37 +0100 Subject: Remove separate init function open will now initialise the btree if it is not already set up. It is a fatal error to try to open an existing ebtree with the wrong order value. --- src/ebtree.erl | 104 +++++++++++++++++++++++++++------------------------------ 1 file changed, 49 insertions(+), 55 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 1a1449148..12861dadc 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -1,9 +1,8 @@ -module(ebtree). -export([ - init/3, - open/2, open/3, + open/4, insert/4, delete/3, lookup/3, @@ -51,31 +50,31 @@ -define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). -init(Db, Prefix, Order) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> - erlfdb:transactional(Db, fun(Tx) -> - erlfdb:clear_range_startswith(Tx, Prefix), - set_meta(Tx, Prefix, ?META_ORDER, Order), - set_meta(Tx, Prefix, ?META_NEXT_ID, 1), - set_node(Tx, init_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}), - ok - end). - +open(Db, Prefix, Order) -> + open(Db, Prefix, Order, []). -open(Db, Prefix) -> - open(Db, Prefix, []). - -open(Db, Prefix, Options) -> +open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), erlfdb:transactional(Db, fun(Tx) -> - Order = get_meta(Tx, Prefix, ?META_ORDER), - Tree = init_tree(Prefix, Order), - Tree#tree{ - reduce_fun = ReduceFun, - collate_fun = CollateFun - } - end). + case get_meta(Tx, Prefix, ?META_ORDER) of + not_found -> + erlfdb:clear_range_startswith(Tx, Prefix), + set_meta(Tx, Prefix, ?META_ORDER, Order), + set_meta(Tx, Prefix, ?META_NEXT_ID, 1), + set_node(Tx, init_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}); + Order -> + ok; + Else -> + erlang:error({order_mismatch, Else}) + end + end), + Tree = init_tree(Prefix, Order), + Tree#tree{ + reduce_fun = ReduceFun, + collate_fun = CollateFun + }. %% lookup @@ -557,8 +556,19 @@ find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> get_meta(Tx, #tree{} = Tree, MetaKey) -> get_meta(Tx, Tree#tree.prefix, MetaKey); + get_meta(Tx, Prefix, MetaKey) when is_binary(Prefix) -> - decode_value(erlfdb:wait(erlfdb:get(Tx, meta_key(Prefix, MetaKey)))). + Future = get_meta_future(Tx, Prefix, MetaKey), + case erlfdb:wait(Future) of + not_found -> + not_found; + Value -> + decode_value(Value) + end. + + +get_meta_future(Tx, Prefix, MetaKey) -> + erlfdb:get(Tx, meta_key(Prefix, MetaKey)). set_meta(Tx, Prefix, MetaKey, MetaValue) -> @@ -877,8 +887,7 @@ collation_fun_test_() -> lookup_test() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>), + Tree = open(Db, <<1,2,3>>, 4), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), @@ -887,8 +896,7 @@ lookup_test() -> delete_test() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>), + Tree = open(Db, <<1,2,3>>, 4), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), @@ -898,8 +906,7 @@ delete_test() -> range_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>), + Tree = open(Db, <<1,2,3>>, 4), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), @@ -910,8 +917,7 @@ range_after_delete_test() -> full_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), TestFun = fun(Max) -> Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), @@ -925,8 +931,7 @@ full_reduce_test_() -> full_reduce_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), @@ -937,8 +942,7 @@ full_reduce_after_delete_test() -> count_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_count/2}]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_count/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), @@ -954,8 +958,7 @@ count_reduce_test_() -> sum_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), @@ -972,8 +975,7 @@ sum_reduce_test_() -> stats_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_stats/2}]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_stats/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), @@ -989,8 +991,7 @@ stats_reduce_test_() -> group_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>, [{reduce_fun, fun reduce_sum/2}]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], GroupKeyFun = fun(Key) -> lists:sublist(Key, 2) end, @@ -1003,8 +1004,7 @@ group_reduce_test_() -> raw_collation_test() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), - Tree = open(Db, <<1,2,3>>), + Tree = open(Db, <<1,2,3>>, 4), insert(Db, Tree, null, null), insert(Db, Tree, 1, 1), ?assertEqual([{1, 1}, {null, null}], range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). @@ -1012,9 +1012,8 @@ raw_collation_test() -> custom_collation_test() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 4), CollateFun = fun(A, B) -> B =< A end, - Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + Tree = open(Db, <<1,2,3>>, 4, [{collate_fun, CollateFun}]), insert(Db, Tree, 1, 1), insert(Db, Tree, 2, 2), ?assertEqual([{2, 2}, {1, 1}], range(Db, Tree, 3, 0, fun(E, A) -> A ++ E end, [])). @@ -1030,10 +1029,9 @@ intense_lookup_test_() -> lookup_test_fun(Max, Order) -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, Order), Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max, 2)])], T0 = erlang:monotonic_time(), - Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, Order), Keys), T1 = erlang:monotonic_time(), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), T2 = erlang:monotonic_time(), @@ -1044,10 +1042,9 @@ lookup_test_fun(Max, Order) -> range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 10), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), @@ -1061,10 +1058,9 @@ range_test_() -> reverse_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 10), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), @@ -1078,11 +1074,10 @@ reverse_range_test_() -> custom_collation_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 10), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], CollateFun = fun(A, B) -> B =< A end, - Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( fun(_) -> @@ -1103,11 +1098,10 @@ custom_collation_range_test_() -> custom_collation_reverse_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - init(Db, <<1,2,3>>, 10), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], CollateFun = fun(A, B) -> B =< A end, - Tree = open(Db, <<1,2,3>>, [{collate_fun, CollateFun}]), + Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( fun(_) -> -- cgit v1.2.1 From 382096d082bb92d34baea9969750dac074bbc9b2 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 7 Jul 2020 17:47:02 +0100 Subject: Introduce min and max keys for open ranges Use ebtree:min() for startkey or ebtree:max() for endkey to include all keys on that end. use both to include all possible keys. It is not permitted to insert either special value into ebtree. --- src/ebtree.erl | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 12861dadc..c481e5a40 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -3,6 +3,8 @@ -export([ open/3, open/4, + min/0, + max/0, insert/4, delete/3, lookup/3, @@ -49,6 +51,10 @@ -define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). -define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). +%% two special 1-bit bitstrings that cannot appear in valid keys. +-define(MIN, <<0:1>>). +-define(MAX, <<1:1>>). + open(Db, Prefix, Order) -> open(Db, Prefix, Order, []). @@ -77,6 +83,13 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde }. +min() -> + ?MIN. + + +max() -> + ?MAX. + %% lookup lookup(Db, #tree{} = Tree, Key) -> @@ -203,7 +216,7 @@ do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_l %% group reduce - produces reductions for contiguous keys in the same group. group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun) -> - NoGroupYet = erlang:make_ref(), + NoGroupYet = ?MIN, Fun = fun ({visit, Key, Value}, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}) -> AfterEnd = greater_than(Tree, Key, EndKey), @@ -299,6 +312,12 @@ reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> %% insert +insert(_Db, #tree{} = _Tree, ?MIN, _Value) -> + erlang:error(min_not_allowed); + +insert(_Db, #tree{} = _Tree, ?MAX, _Value) -> + erlang:error(max_not_allowed); + insert(Db, #tree{} = Tree, Key, Value) -> erlfdb:transactional(Db, fun(Tx) -> Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), @@ -779,6 +798,18 @@ less_than(#tree{} = Tree, A, B) -> less_than_or_equal(Tree, A, B). +less_than_or_equal(#tree{} = _Tree, ?MIN, _B) -> + true; + +less_than_or_equal(#tree{} = _Tree, _A, ?MIN) -> + false; + +less_than_or_equal(#tree{} = _Tree, ?MAX, _B) -> + false; + +less_than_or_equal(#tree{} = _Tree, _A, ?MAX) -> + true; + less_than_or_equal(#tree{} = Tree, A, B) -> #tree{collate_fun = CollateFun} = Tree, CollateFun(A, B). @@ -998,7 +1029,11 @@ group_reduce_test_() -> lists:foreach(fun(Key) -> insert(Db, Tree, [Key rem 4, Key rem 3, Key], Key) end, Keys), [ ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], - group_reduce(Db, Tree, [1], [2], GroupKeyFun))) + group_reduce(Db, Tree, [1], [2], GroupKeyFun))), + + ?_test(?assertEqual([{[0,0],432}, {[0,1],468}, {[0,2],400}, {[1,0],408}, {[1,1],441}, {[1,2],376}, + {[2,0],384}, {[2,1],416}, {[2,2],450}, {[3,0],459}, {[3,1],392}, {[3,2],424}], + group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun))) ]. -- cgit v1.2.1 From a3aa8cb95ee6376cb37f71a5bdf17bd85e8ec2a7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 7 Jul 2020 18:15:01 +0100 Subject: Update README.md this changed from uuid with the 'sequential' PR merge. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index edcf80e73..b21c65f9f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,8 @@ more efficient for multiple inserts, so batch if you can. A reduction function can be specified, the B+Tree calculates and stores intermediate reduction values on the inner nodes for performance. -The FoundationDB keys are currently random UUID's. +The FoundationDB keys start with a user defined prefix and the opaque +node id. TODO -- cgit v1.2.1 From b2d7aa3c86d775d8815ae7066f91bee5767e2162 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 7 Jul 2020 21:29:34 +0100 Subject: reuse do_reduce in full_reduce --- src/ebtree.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index c481e5a40..a21b3435d 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -164,12 +164,8 @@ full_reduce(Db, #tree{} = Tree) -> ({traverse, _F, _L, R}, {MapAcc, ReduceAcc}) -> {skip, {MapAcc, [R | ReduceAcc]}} end, - case fold(Db, Tree, Fun, {[], []}) of - {MapAcc, []} -> - reduce_values(Tree, MapAcc, false); - {[], ReduceAcc} -> - reduce_values(Tree, ReduceAcc, true) - end. + {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}), + do_reduce(Tree, MapValues, ReduceValues). %% reduce -- cgit v1.2.1 From b602a961435608865218b16242107f84648a2615 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 8 Jul 2020 16:08:15 +0100 Subject: clarify the meaning of Fun in range/reverse_range --- src/ebtree.erl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index a21b3435d..2a4cd31c0 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -261,49 +261,49 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun) -> %% range (inclusive of both ends) -range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> +range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> erlfdb:transactional(Db, fun(Tx) -> - range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) + range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) end). -range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> +range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], - Acc1 = Fun(InRange, Acc0), + Acc1 = AccFun(InRange, Acc0), LastKey = last_key(Node), case Node#node.next /= undefined andalso less_than_or_equal(Tree, LastKey, EndKey) of true -> - range(Tx, Tree, get_node(Tx, Tree, Node#node.next), StartKey, EndKey, Fun, Acc1); + range(Tx, Tree, get_node(Tx, Tree, Node#node.next), StartKey, EndKey, AccFun, Acc1); false -> Acc1 end; -range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> +range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> ChildId = find_child_id(Tree, Node, StartKey), - range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). + range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). %% reverse range (inclusive of both ends) -reverse_range(Db, #tree{} = Tree, StartKey, EndKey, Fun, Acc0) -> +reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> erlfdb:transactional(Db, fun(Tx) -> - reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, Fun, Acc0) + reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) end). -reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, Fun, Acc0) -> +reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], - Acc1 = Fun(lists:reverse(InRange), Acc0), + Acc1 = AccFun(lists:reverse(InRange), Acc0), FirstKey = first_key(Node), case Node#node.prev /= undefined andalso less_than_or_equal(Tree, StartKey, FirstKey) of true -> - reverse_range(Tx, Tree, get_node(Tx, Tree, Node#node.prev), StartKey, EndKey, Fun, Acc1); + reverse_range(Tx, Tree, get_node(Tx, Tree, Node#node.prev), StartKey, EndKey, AccFun, Acc1); false -> Acc1 end; -reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, Fun, Acc) -> +reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> ChildId = find_child_id(Tree, Node, EndKey), - reverse_range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, Fun, Acc). + reverse_range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). %% insert -- cgit v1.2.1 From 06c1f2f9bcd7816af2e48fe27fcab4431bf7cdb2 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 8 Jul 2020 16:22:09 +0100 Subject: switch group_reduce to a user-defined acc function --- src/ebtree.erl | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 2a4cd31c0..024fbd375 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -13,7 +13,7 @@ fold/4, reduce/4, full_reduce/2, - group_reduce/5, + group_reduce/7, validate_tree/2 ]). @@ -211,52 +211,51 @@ do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_l %% group reduce - produces reductions for contiguous keys in the same group. -group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun) -> +group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> NoGroupYet = ?MIN, Fun = fun - ({visit, Key, Value}, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}) -> + ({visit, Key, Value}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> AfterEnd = greater_than(Tree, Key, EndKey), InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), KeyGroup = GroupKeyFun(Key), SameGroup = CurrentGroup =:= KeyGroup, if AfterEnd -> - {stop, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}}; + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; SameGroup -> - {ok, {CurrentGroup, GroupAcc, [{Key, Value} | MapAcc], ReduceAcc}}; + {ok, {CurrentGroup, UserAcc, [{Key, Value} | MapAcc], ReduceAcc}}; InRange andalso CurrentGroup =:= NoGroupYet -> - {ok, {KeyGroup, GroupAcc, [{Key, Value}], []}}; + {ok, {KeyGroup, UserAcc, [{Key, Value}], []}}; InRange -> %% implicit end of current group and start of a new one GroupValue = do_reduce(Tree, MapAcc, ReduceAcc), - {ok, {KeyGroup, [{CurrentGroup, GroupValue} | GroupAcc], [{Key, Value}], []}}; + {ok, {KeyGroup, UserAccFun({CurrentGroup, GroupValue}, UserAcc), [{Key, Value}], []}}; true -> - {ok, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}} + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} end; - ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}) -> + ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> BeforeStart = less_than(Tree, LastKey, StartKey), AfterEnd = greater_than(Tree, FirstKey, EndKey), Whole = CurrentGroup =:= GroupKeyFun(FirstKey) andalso CurrentGroup =:= GroupKeyFun(LastKey), if BeforeStart -> - {skip, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}}; + {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; AfterEnd -> - {stop, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}}; + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; Whole -> - {skip, {CurrentGroup, GroupAcc, MapAcc, [Reduction | ReduceAcc]}}; + {skip, {CurrentGroup, UserAcc, MapAcc, [Reduction | ReduceAcc]}}; true -> - {ok, {CurrentGroup, GroupAcc, MapAcc, ReduceAcc}} + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} end end, - {CurrentGroup, GroupAcc0, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, [], [], []}), - GroupAcc1 = if + {CurrentGroup, UserAcc1, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, UserAcc0, [], []}), + if MapValues /= [] orelse ReduceValues /= [] -> FinalGroup = do_reduce(Tree, MapValues, ReduceValues), - [{CurrentGroup, FinalGroup} | GroupAcc0]; + UserAccFun({CurrentGroup, FinalGroup}, UserAcc1); true -> - GroupAcc0 - end, - lists:reverse(GroupAcc1). + UserAcc1 + end. %% range (inclusive of both ends) @@ -1022,14 +1021,15 @@ group_reduce_test_() -> Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], GroupKeyFun = fun(Key) -> lists:sublist(Key, 2) end, + UserAccFun = fun({K,V}, Acc) -> Acc ++ [{K, V}] end, lists:foreach(fun(Key) -> insert(Db, Tree, [Key rem 4, Key rem 3, Key], Key) end, Keys), [ ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], - group_reduce(Db, Tree, [1], [2], GroupKeyFun))), + group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, []))), ?_test(?assertEqual([{[0,0],432}, {[0,1],468}, {[0,2],400}, {[1,0],408}, {[1,1],441}, {[1,2],376}, {[2,0],384}, {[2,1],416}, {[2,2],450}, {[3,0],459}, {[3,1],392}, {[3,2],424}], - group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun))) + group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))) ]. -- cgit v1.2.1 From e2175aa1e24ea0a7f6594067165792b0ad5fb1af Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 9 Jul 2020 18:03:00 +0100 Subject: Ensure all keys are in range during group_reduce --- src/ebtree.erl | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 024fbd375..a2ad0224c 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -216,7 +216,7 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User Fun = fun ({visit, Key, Value}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> AfterEnd = greater_than(Tree, Key, EndKey), - InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), + InRange = in_range(Tree, StartKey, Key, EndKey), KeyGroup = GroupKeyFun(Key), SameGroup = CurrentGroup =:= KeyGroup, if @@ -237,12 +237,14 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User BeforeStart = less_than(Tree, LastKey, StartKey), AfterEnd = greater_than(Tree, FirstKey, EndKey), Whole = CurrentGroup =:= GroupKeyFun(FirstKey) andalso CurrentGroup =:= GroupKeyFun(LastKey), + FirstInRange = in_range(Tree, StartKey, FirstKey, EndKey), + LastInRange = in_range(Tree, StartKey, LastKey, EndKey), if BeforeStart -> {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; AfterEnd -> {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; - Whole -> + Whole andalso FirstInRange andalso LastInRange -> {skip, {CurrentGroup, UserAcc, MapAcc, [Reduction | ReduceAcc]}}; true -> {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} @@ -775,6 +777,10 @@ reduce_values(#tree{} = Tree, Values, Rereduce) when is_list(Values) -> %% collation functions +in_range(#tree{} = Tree, StartOfRange, Key, EndOfRange) -> + greater_than_or_equal(Tree, Key, StartOfRange) andalso less_than_or_equal(Tree, Key, EndOfRange). + + greater_than(#tree{} = Tree, A, B) -> not less_than_or_equal(Tree, A, B). @@ -1015,7 +1021,7 @@ stats_reduce_test_() -> ]. -group_reduce_test_() -> +group_reduce_level_test_() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), Max = 100, @@ -1033,6 +1039,22 @@ group_reduce_test_() -> ]. +group_reduce_int_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_count/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + GroupKeyFun = fun(_Key) -> null end, + UserAccFun = fun({K,V}, Acc) -> Acc ++ [{K, V}] end, + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + [ + ?_test(?assertEqual([{null, 100}], group_reduce(Db, Tree, + ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{null, 99}], group_reduce(Db, Tree, 2, ebtree:max(), GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{null, 96}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, []))) + ]. + + raw_collation_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From 387b7c122a1685c03b5a0868df5f81424c2e47a0 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 9 Jul 2020 20:41:19 +0100 Subject: assert that non-leafs have no prev/next pointers --- src/ebtree.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ebtree.erl b/src/ebtree.erl index a2ad0224c..827d6fe5c 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -666,6 +666,7 @@ validate_tree(Tx, #tree{} = Tree, [{_F, _L, P, _R} | Rest]) -> validate_node(#tree{} = Tree, #node{} = Node) -> NumKeys = length(Node#node.members), + IsLeaf = Node#node.level =:= 0, IsRoot = ?NODE_ROOT_ID == Node#node.id, OutOfOrder = Node#node.members /= sort(Tree, Node#node.members), Duplicates = Node#node.members /= usort(Tree, Node#node.members), @@ -676,6 +677,10 @@ validate_node(#tree{} = Tree, #node{} = Node) -> erlang:error({too_few_keys, Node}); NumKeys > Tree#tree.max -> erlang:error({too_many_keys, Node}); + not IsLeaf andalso Node#node.prev /= undefined -> + erlang:error({non_leaf_with_prev, Node}); + not IsLeaf andalso Node#node.next /= undefined -> + erlang:error({non_leaf_with_next, Node}); OutOfOrder -> erlang:error({out_of_order, Node}); Duplicates -> -- cgit v1.2.1 From 7485892beb99b8ff793e4796322ef88bfd4b064d Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 9 Jul 2020 20:54:33 +0100 Subject: and then fix the bug where they got one --- src/ebtree.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 827d6fe5c..64a6257cb 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -526,16 +526,16 @@ rebalance(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} Left1Id = new_node_id(Tx, Tree), Right1Id = new_node_id(Tx, Tree), - Left1 = Left0#node{ + Left1 = remove_pointers_if_not_leaf(Left0#node{ id = Left1Id, next = Right1Id, members = LeftMembers - }, - Right1 = Right0#node{ + }), + Right1 = remove_pointers_if_not_leaf(Right0#node{ id = Right1Id, prev = Left1Id, members = RightMembers - }, + }), {Left1, Right1}. -- cgit v1.2.1 From d7b005123dd31950692bd26ef246263d6dac7811 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 10 Jul 2020 08:18:43 -0700 Subject: Do not fail when emilio detects errors --- Makefile | 2 +- Makefile.win | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index eaa8b3d47..2e3cc8acb 100644 --- a/Makefile +++ b/Makefile @@ -210,7 +210,7 @@ soak-eunit: couch while [ $$? -eq 0 ] ; do $(REBAR) -r eunit $(EUNIT_OPTS) ; done emilio: - @bin/emilio -c emilio.config src/ | bin/warnings_in_scope -s 3 + @bin/emilio -c emilio.config src/ | bin/warnings_in_scope -s 3 || exit 0 .venv/bin/black: @python3 -m venv .venv diff --git a/Makefile.win b/Makefile.win index 265cdf339..16cf2ca35 100644 --- a/Makefile.win +++ b/Makefile.win @@ -178,7 +178,7 @@ just-eunit: @$(REBAR) -r eunit $(EUNIT_OPTS) emilio: - @bin\emilio -c emilio.config src\ | python.exe bin\warnings_in_scope -s 3 + @bin\emilio -c emilio.config src\ | python.exe bin\warnings_in_scope -s 3 || exit 0 .venv/bin/black: @python.exe -m venv .venv -- cgit v1.2.1 From ddbaf4f0e88558ea00a9fb4e4cf9eb2365af9ef7 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 9 Jul 2020 10:47:31 -0700 Subject: Simplify using `req_body` for JSON requests Currently the EPI plugins have no easy way to modify body of the document in before request. There are complicated approaches via overiding compression header. This is due to the fact that `chttp:json_body/1` expects compressed body. We can rely on the fact that `MochiReq:recv_body/1` returns binary to allow passing of already parsed JSON terms (objects and lists). --- src/chttpd/src/chttpd.erl | 10 ++++++---- src/chttpd/src/chttpd_misc.erl | 3 +-- src/chttpd/test/eunit/chttpd_handlers_tests.erl | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index eca936fed..5a3e3fa38 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -423,8 +423,7 @@ possibly_hack(#httpd{path_parts=[<<"_replicate">>]}=Req) -> {Props0} = chttpd:json_body_obj(Req), Props1 = fix_uri(Req, Props0, <<"source">>), Props2 = fix_uri(Req, Props1, <<"target">>), - put(post_body, {Props2}), - Req; + Req#httpd{req_body={Props2}}; possibly_hack(Req) -> Req. @@ -677,13 +676,16 @@ body(#httpd{mochi_req=MochiReq, req_body=ReqBody}) -> validate_ctype(Req, Ctype) -> couch_httpd:validate_ctype(Req, Ctype). -json_body(Httpd) -> +json_body(#httpd{req_body=undefined} = Httpd) -> case body(Httpd) of undefined -> throw({bad_request, "Missing request body"}); Body -> ?JSON_DECODE(maybe_decompress(Httpd, Body)) - end. + end; + +json_body(#httpd{req_body=ReqBody}) -> + ReqBody. json_body_obj(Httpd) -> case json_body(Httpd) of diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index 565b1210c..07d53714a 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -302,10 +302,9 @@ handle_task_status_req(#httpd{method='GET'}=Req) -> handle_task_status_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). -handle_replicate_req(#httpd{method='POST', user_ctx=Ctx} = Req) -> +handle_replicate_req(#httpd{method='POST', user_ctx=Ctx, req_body=PostBody} = Req) -> chttpd:validate_ctype(Req, "application/json"), %% see HACK in chttpd.erl about replication - PostBody = get(post_body), case replicate(PostBody, Ctx) of {ok, {continuous, RepId}} -> send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); diff --git a/src/chttpd/test/eunit/chttpd_handlers_tests.erl b/src/chttpd/test/eunit/chttpd_handlers_tests.erl index f3e8f5dcd..649d82e86 100644 --- a/src/chttpd/test/eunit/chttpd_handlers_tests.erl +++ b/src/chttpd/test/eunit/chttpd_handlers_tests.erl @@ -70,7 +70,7 @@ request_replicate(Url, Body) -> Headers = [{"Content-Type", "application/json"}], Handler = {chttpd_misc, handle_replicate_req}, request(post, Url, Headers, Body, Handler, fun(Req) -> - chttpd:send_json(Req, 200, get(post_body)) + chttpd:send_json(Req, 200, Req#httpd.req_body) end). request(Method, Url, Headers, Body, {M, F}, MockFun) -> -- cgit v1.2.1 From 9e8629a3ac2dadf25d938be5f5aa31096bf8f622 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Mon, 13 Jul 2020 14:47:14 +0200 Subject: adjust rebar.config to work with rebar2 and rebar3 --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index b9dfae661..c9750ce96 100644 --- a/rebar.config +++ b/rebar.config @@ -1,5 +1,5 @@ {erl_opts, [debug_info]}. {cover_enabled, true}. {deps, [ - {erlfdb, {git, "https://github.com/apache/couchdb-erlfdb", {tag, "v1.2.2"}}} + {erlfdb, ".*", {git, "https://github.com/apache/couchdb-erlfdb", {tag, "v1.2.2"}}} ]}. -- cgit v1.2.1 From ec5a3801eb5caa12bacd4c93ca06ce1e69b5253b Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 13 Jul 2020 18:14:23 +0100 Subject: fix how lookup rate is calculated in tests --- src/ebtree.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 64a6257cb..d3929fb26 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -1094,7 +1094,7 @@ lookup_test_fun(Max, Order) -> lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), T2 = erlang:monotonic_time(), ?debugFmt("~B order. ~B iterations. insert rate: ~.2f/s, lookup rate: ~.2f/s", - [Order, Max, Max / sec(T1 - T0), Max / sec(T2 - T1)]). + [Order, Max, 1000 * (Max / msec(T1 - T0)), 1000 * (Max / msec(T2 - T1))]). range_test_() -> @@ -1177,7 +1177,7 @@ custom_collation_reverse_range_test_() -> end}. -sec(Native) -> - erlang:max(1, erlang:convert_time_unit(Native, native, second)). +msec(Native) -> + erlang:max(1, erlang:convert_time_unit(Native, native, millisecond)). -endif. -- cgit v1.2.1 From 0b244096f101446f082de9d02bc226136c057e1e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sat, 18 Jul 2020 23:31:24 +0100 Subject: honor ?MIN and ?MAX in find_child_int This fixes range and reverse range when using the special min() and max() values. --- src/ebtree.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index d3929fb26..b1db6c2a5 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -558,8 +558,7 @@ find_child_int(#tree{} = _Tree, [Child], _Key) -> Child; find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> - #tree{collate_fun = CollateFun} = Tree, - case CollateFun(Key, L) of + case less_than_or_equal(Tree, Key, L) of true -> Child; false -> -- cgit v1.2.1 From a24e76f6704840d5196d39d95255550667232ce0 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 19 Jul 2020 10:32:17 +0100 Subject: update README.md to reflect progress --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index b21c65f9f..9ce79a0c6 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,5 @@ node id. TODO 1. Rewrite inner node ids (non-root, non-leaf) so we can safely cache - them outside of a transaction. + them outside of a transaction. (see "immutable" branch) 2. Chunkify large values over multiple rows? -3. Sequential node ids? - -- cgit v1.2.1 From ffc07e92e13149f33777f28025555b9158b977db Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 17 Jul 2020 13:08:04 +0100 Subject: don't export reduce funs, they're just for tests now --- src/ebtree.erl | 83 +++++++++++++++++++++++++++------------------------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index b1db6c2a5..6aaaad246 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -17,13 +17,6 @@ validate_tree/2 ]). -%% built-in reduce functions --export([ - reduce_sum/2, - reduce_count/2, - reduce_stats/2 -]). - -record(node, { id, level = 0, @@ -728,44 +721,6 @@ reduce_noop(_KVs, _Rereduce) -> []. -reduce_sum(KVs, false) -> - {_, Vs} = lists:unzip(KVs), - lists:sum(Vs); - -reduce_sum(Rs, true) -> - lists:sum(Rs). - - -reduce_count(KVs, false) -> - length(KVs); - -reduce_count(Rs, true) -> - lists:sum(Rs). - - -reduce_stats(KVs, false) -> - {_, Vs} = lists:unzip(KVs), - { - lists:sum(Vs), - lists:min(Vs), - lists:max(Vs), - length(Vs), - lists:sum([V * V || V <- Vs]) - }; - -reduce_stats(Rs, true) -> - lists:foldl( - fun({Sum, Min, Max, Count, SumSqr}, - {SumAcc, MinAcc, MaxAcc, CountAcc, SumSqrAcc}) -> - { - Sum + SumAcc, - erlang:min(Min, MinAcc), - erlang:max(Max, MaxAcc), - Count + CountAcc, - SumSqr + SumSqrAcc - } end, hd(Rs), tl(Rs)). - - reduce_node(#tree{} = Tree, #node{level = 0} = Node) -> reduce_values(Tree, Node#node.members, false); @@ -905,6 +860,44 @@ print_node(#node{} = Node) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +reduce_sum(KVs, false) -> + {_, Vs} = lists:unzip(KVs), + lists:sum(Vs); + +reduce_sum(Rs, true) -> + lists:sum(Rs). + + +reduce_count(KVs, false) -> + length(KVs); + +reduce_count(Rs, true) -> + lists:sum(Rs). + + +reduce_stats(KVs, false) -> + {_, Vs} = lists:unzip(KVs), + { + lists:sum(Vs), + lists:min(Vs), + lists:max(Vs), + length(Vs), + lists:sum([V * V || V <- Vs]) + }; + +reduce_stats(Rs, true) -> + lists:foldl( + fun({Sum, Min, Max, Count, SumSqr}, + {SumAcc, MinAcc, MaxAcc, CountAcc, SumSqrAcc}) -> + { + Sum + SumAcc, + erlang:min(Min, MinAcc), + erlang:max(Max, MaxAcc), + Count + CountAcc, + SumSqr + SumSqrAcc + } end, hd(Rs), tl(Rs)). + + collation_fun_test_() -> Tree = #tree{collate_fun = fun collate_raw/2}, [ -- cgit v1.2.1 From 0a2eca50dc29cb3a8f9048e734e61857a475868a Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 17 Jul 2020 13:37:49 +0100 Subject: add spec/edoc to all public functions --- src/ebtree.erl | 99 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 15 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 6aaaad246..b37963a18 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -49,9 +49,19 @@ -define(MAX, <<1:1>>). +%% @equiv open(Db, Prefix, Order, []) +-spec open(term(), binary(), pos_integer()) -> #tree{}. open(Db, Prefix, Order) -> open(Db, Prefix, Order, []). + +%% @doc Open a new ebtree, initialising it if doesn't already exist. +%% @param Db An erlfdb database or transaction. +%% @param Prefix The key prefix applied to all ebtree keys. +%% @param Order The maximum number of items allowed in an ebtree node (must be an even number). +%% @param Options Supported options are {reduce_fun, Fun} and {collate_fun, Fun}. +%% @returns A data structure representing the ebtree, to be passed to all other functions. +-spec open(term(), binary(), pos_integer(), list()) -> #tree{}. open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), @@ -76,15 +86,22 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde }. +%% @doc a special value guaranteed to be smaller than any value in an ebtree. min() -> ?MIN. +%% @doc a special value guaranteed to be larger than any value in an ebtree. max() -> ?MAX. -%% lookup - +%% @doc Lookup a specific key in the ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @param Key the key to lookup +%% @returns A key-value tuple if found, false if not present in the ebtree. +-spec lookup(Db :: term(), Tree :: #tree{}, Key :: term()) -> + {Key :: term(), Value :: term()} | false. lookup(Db, #tree{} = Tree, Key) -> Fun = fun ({visit, K, V}, _Acc) when K =:= Key -> @@ -108,8 +125,14 @@ lookup(Db, #tree{} = Tree, Key) -> end, fold(Db, Tree, Fun, false). -%% fold +%% @doc Custom traversal of the ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @param Fun A callback function as nodes are loaded that directs the traversal. +%% @param Acc The initial accumulator. +%% @returns the final accumulator. +-spec fold(Db :: term(), Tree :: #tree{}, Fun :: fun(), Acc :: term()) -> term(). fold(Db, #tree{} = Tree, Fun, Acc) -> {_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> Root = get_node(Tx, Tree, ?NODE_ROOT_ID), @@ -148,8 +171,12 @@ fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> {stop, Acc1} end. -%% full reduce +%% @doc Calculate the final reduce value for the whole ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @returns the final reduce value +-spec full_reduce(Db :: term(), Tree :: #tree{}) -> term(). full_reduce(Db, #tree{} = Tree) -> Fun = fun ({visit, K, V}, {MapAcc, ReduceAcc}) -> @@ -161,8 +188,13 @@ full_reduce(Db, #tree{} = Tree) -> do_reduce(Tree, MapValues, ReduceValues). -%% reduce - +%% @doc Calculate the reduce value for all keys in the specified range. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @returns the reduce value for the specified range +-spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term()) -> term(). reduce(Db, #tree{} = Tree, StartKey, EndKey) -> Fun = fun ({visit, Key, Value}, {MapAcc, ReduceAcc}) -> @@ -202,8 +234,17 @@ do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_l do_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). -%% group reduce - produces reductions for contiguous keys in the same group. - +%% @doc Calculate the reduce value for all groups in the specified range. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @param GroupKeyFun A function that takes a key as a parameter and returns the group key. +%% @param UserAccFun A function called when a new group reduction is calculated and returns an acc. +%% @param UserAcc0 The initial accumulator. +%% @returns the final accumulator. +-spec group_reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), + GroupKeyFun :: fun(), UserAccFun :: fun(), UserAcc0 :: term()) -> term(). group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> NoGroupYet = ?MIN, Fun = fun @@ -253,8 +294,16 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User end. -%% range (inclusive of both ends) - +%% @doc Finds all key-value pairs for the specified range in forward order. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @param AccFun A function that is called when a key-value pair is found, returning an accumulator. +%% @param Acc0 The initial accumulator +%% @returns the final accumulator +-spec range(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), + AccFun :: fun(), Acc0 :: term()) -> term(). range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> erlfdb:transactional(Db, fun(Tx) -> range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) @@ -276,8 +325,17 @@ range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> ChildId = find_child_id(Tree, Node, StartKey), range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). -%% reverse range (inclusive of both ends) +%% @doc Finds all key-value pairs for the specified range in reverse order. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @param AccFun A function that is called when a key-value pair is found, returning an accumulator. +%% @param Acc0 The initial accumulator +%% @returns the final accumulator +-spec reverse_range(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), + AccFun :: fun(), Acc0 :: term()) -> term(). reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> erlfdb:transactional(Db, fun(Tx) -> reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) @@ -300,8 +358,13 @@ reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) reverse_range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). -%% insert - +%% @doc Inserts or updates a value in the ebtree +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param Key The key to store the value under. +%% @param Value The value to store. +%% @returns the tree. +-spec insert(Db :: term(), Tree :: #tree{}, Key :: term(), Value :: term()) -> #tree{}. insert(_Db, #tree{} = _Tree, ?MIN, _Value) -> erlang:error(min_not_allowed); @@ -427,8 +490,12 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> reduce_node(Tree, Node2). -%% delete - +%% @doc Deletes an entry from the ebtree +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param Key The key of the entry to delete. +%% @returns the tree. +-spec delete(Db :: term(), Tree :: #tree{}, Key :: term()) -> #tree{}. delete(Db, #tree{} = Tree, Key) -> erlfdb:transactional(Db, fun(Tx) -> Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), @@ -632,6 +699,8 @@ node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) -> erlfdb_tuple:pack({?NODE, Id}, Prefix). +%% @doc Walks the whole tree and checks it for consistency. +%% It also prints it to screen. validate_tree(Db, #tree{} = Tree) -> erlfdb:transactional(Db, fun(Tx) -> Root = get_node(Db, Tree, ?NODE_ROOT_ID), -- cgit v1.2.1 From 3f1e6a5339f9081a7eadef99a0313f1ac2123acd Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 14 Jul 2020 20:42:47 +0100 Subject: Allow fold in fwd and rev direction --- src/ebtree.erl | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index b37963a18..ca020cc07 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -11,6 +11,7 @@ range/6, reverse_range/6, fold/4, + fold/5, reduce/4, full_reduce/2, group_reduce/7, @@ -126,47 +127,60 @@ lookup(Db, #tree{} = Tree, Key) -> fold(Db, Tree, Fun, false). +%% @equiv fold(Db, Tree, Fun, Acc, []) +-spec fold(Db :: term(), Tree :: #tree{}, Fun :: fun(), Acc :: term()) -> term(). +fold(Db, #tree{} = Tree, Fun, Acc) -> + fold(Db, Tree, Fun, Acc, []). + + %% @doc Custom traversal of the ebtree. %% @param Db An erlfdb database or transaction. %% @param Tree the ebtree. %% @param Fun A callback function as nodes are loaded that directs the traversal. %% @param Acc The initial accumulator. +%% @param Options Currently supported options are [{dir, fwd}] and [{dir, rev}] %% @returns the final accumulator. --spec fold(Db :: term(), Tree :: #tree{}, Fun :: fun(), Acc :: term()) -> term(). -fold(Db, #tree{} = Tree, Fun, Acc) -> +-spec fold(Db :: term(), Tree :: #tree{}, Fun :: fun(), Acc :: term(), Options :: list()) -> term(). +fold(Db, #tree{} = Tree, Fun, Acc, Options) -> {_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> Root = get_node(Tx, Tree, ?NODE_ROOT_ID), - fold(Db, Tree, Root, Fun, Acc) + fold(Db, Tree, Root, Fun, Acc, Options) end), Reduce. -fold(Db, #tree{} = Tree, #node{} = Node, Fun, Acc) -> - fold(Db, #tree{} = Tree, Node#node.members, Fun, Acc); + +fold(Db, #tree{} = Tree, #node{} = Node, Fun, Acc, Options) -> + Dir = proplists:get_value(dir, Options, fwd), + Members = case Dir of + fwd -> Node#node.members; + rev -> lists:reverse(Node#node.members) + end, + fold(Db, #tree{} = Tree, Members, Fun, Acc, Options); -fold(_Db, #tree{} = _Tree, [], _Fun, Acc) -> +fold(_Db, #tree{} = _Tree, [], _Fun, Acc, _Options) -> {ok, Acc}; -fold(Db, #tree{} = Tree, [{K, V} | Rest], Fun, Acc0) -> +fold(Db, #tree{} = Tree, [{K, V} | Rest], Fun, Acc0, Options) -> case Fun({visit, K, V}, Acc0) of {ok, Acc1} -> - fold(Db, Tree, Rest, Fun, Acc1); + fold(Db, Tree, Rest, Fun, Acc1, Options); {stop, Acc1} -> {stop, Acc1} end; -fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0) -> +fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0, Options) -> case Fun({traverse, F, L, R}, Acc0) of {ok, Acc1} -> Node = get_node(Db, Tree, P), - case fold(Db, Tree, Node, Fun, Acc1) of + case fold(Db, Tree, Node, Fun, Acc1, Options) of {ok, Acc2} -> - fold(Db, Tree, Rest, Fun, Acc2); + fold(Db, Tree, Rest, Fun, Acc2, Options); {stop, Acc2} -> {stop, Acc2} end; {skip, Acc1} -> - fold(Db, Tree, Rest, Fun, Acc1); + fold(Db, Tree, Rest, Fun, Acc1, Options); {stop, Acc1} -> {stop, Acc1} end. -- cgit v1.2.1 From 842c116a4e76ff48e7478fbf0799eed770a0a6fb Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 20 Jul 2020 11:16:14 +0100 Subject: Add support for group_reduce in reverse order --- src/ebtree.erl | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index ca020cc07..94e757b3e 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -15,6 +15,7 @@ reduce/4, full_reduce/2, group_reduce/7, + group_reduce/8, validate_tree/2 ]). @@ -248,6 +249,13 @@ do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_l do_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). +%% @equiv group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []) +-spec group_reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), + GroupKeyFun :: fun(), UserAccFun :: fun(), UserAcc0 :: term()) -> term(). +group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> + group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []). + + %% @doc Calculate the reduce value for all groups in the specified range. %% @param Db An erlfdb database or transaction. %% @param Tree The ebtree. @@ -256,19 +264,24 @@ do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_l %% @param GroupKeyFun A function that takes a key as a parameter and returns the group key. %% @param UserAccFun A function called when a new group reduction is calculated and returns an acc. %% @param UserAcc0 The initial accumulator. +%% @param Options Currently supported options are [{dir, fwd}] and [{dir, rev}] %% @returns the final accumulator. -spec group_reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), - GroupKeyFun :: fun(), UserAccFun :: fun(), UserAcc0 :: term()) -> term(). -group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> + GroupKeyFun :: fun(), UserAccFun :: fun(), UserAcc0 :: term(), Options :: list()) -> term(). +group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, Options) -> + Dir = proplists:get_value(dir, Options, fwd), NoGroupYet = ?MIN, Fun = fun ({visit, Key, Value}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> + BeforeStart = less_than(Tree, Key, StartKey), AfterEnd = greater_than(Tree, Key, EndKey), InRange = in_range(Tree, StartKey, Key, EndKey), KeyGroup = GroupKeyFun(Key), SameGroup = CurrentGroup =:= KeyGroup, if - AfterEnd -> + Dir == fwd andalso AfterEnd -> + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso BeforeStart -> {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; SameGroup -> {ok, {CurrentGroup, UserAcc, [{Key, Value} | MapAcc], ReduceAcc}}; @@ -288,9 +301,13 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User FirstInRange = in_range(Tree, StartKey, FirstKey, EndKey), LastInRange = in_range(Tree, StartKey, LastKey, EndKey), if - BeforeStart -> + Dir == fwd andalso BeforeStart -> {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; - AfterEnd -> + Dir == rev andalso AfterEnd -> + {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == fwd andalso AfterEnd -> + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso BeforeStart -> {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; Whole andalso FirstInRange andalso LastInRange -> {skip, {CurrentGroup, UserAcc, MapAcc, [Reduction | ReduceAcc]}}; @@ -298,7 +315,7 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} end end, - {CurrentGroup, UserAcc1, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, UserAcc0, [], []}), + {CurrentGroup, UserAcc1, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, UserAcc0, [], []}, Options), if MapValues /= [] orelse ReduceValues /= [] -> FinalGroup = do_reduce(Tree, MapValues, ReduceValues), @@ -1113,6 +1130,12 @@ group_reduce_level_test_() -> ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], + group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, [], [{dir, fwd}]))), + + ?_test(?assertEqual([{[1, 2], 376}, {[1, 1], 441}, {[1, 0], 408}], + group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, [], [{dir, rev}]))), + ?_test(?assertEqual([{[0,0],432}, {[0,1],468}, {[0,2],400}, {[1,0],408}, {[1,1],441}, {[1,2],376}, {[2,0],384}, {[2,1],416}, {[2,2],450}, {[3,0],459}, {[3,1],392}, {[3,2],424}], group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))) -- cgit v1.2.1 From e347ad2099d8847153aaed5a4eed7a7f73227ea7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 20 Jul 2020 12:33:16 +0100 Subject: two blank lines between different functions --- src/ebtree.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ebtree.erl b/src/ebtree.erl index 94e757b3e..3b4315975 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -340,6 +340,7 @@ range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) end). + range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], @@ -372,6 +373,7 @@ reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) end). + reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], @@ -422,6 +424,7 @@ insert(Db, #tree{} = Tree, Key, Value) -> end), Tree. + split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> {LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members), @@ -738,6 +741,7 @@ validate_tree(Db, #tree{} = Tree) -> validate_tree(Tx, Tree, Root) end). + validate_tree(_Tx, #tree{} = Tree, #node{level = 0} = Node) -> print_node(Node), validate_node(Tree, Node); @@ -889,6 +893,7 @@ usort(#tree{} = Tree, List) -> #tree{collate_fun = CollateFun} = Tree, lists:usort(collation_wrapper_fun(CollateFun), List). + collation_wrapper_fun(CollateFun) -> fun (#node{} = N1, #node{} = N2) -> -- cgit v1.2.1 From 11f46e5da1d42bc63b254430a71a5242b43a70d7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 20 Jul 2020 14:41:17 +0100 Subject: refine typespec for callback functions --- src/ebtree.erl | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/src/ebtree.erl b/src/ebtree.erl index 3b4315975..1b6ebf374 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -125,11 +125,10 @@ lookup(Db, #tree{} = Tree, Key) -> {skip, Acc} end end, - fold(Db, Tree, Fun, false). + fold(Db, Tree, Fun, false, []). %% @equiv fold(Db, Tree, Fun, Acc, []) --spec fold(Db :: term(), Tree :: #tree{}, Fun :: fun(), Acc :: term()) -> term(). fold(Db, #tree{} = Tree, Fun, Acc) -> fold(Db, Tree, Fun, Acc, []). @@ -139,9 +138,22 @@ fold(Db, #tree{} = Tree, Fun, Acc) -> %% @param Tree the ebtree. %% @param Fun A callback function as nodes are loaded that directs the traversal. %% @param Acc The initial accumulator. -%% @param Options Currently supported options are [{dir, fwd}] and [{dir, rev}] +%% @param Options Options that control how the fold is executed. %% @returns the final accumulator. --spec fold(Db :: term(), Tree :: #tree{}, Fun :: fun(), Acc :: term(), Options :: list()) -> term(). + +-type fold_args() :: + {visit, Key :: term(), Value :: term()} | + {traverse, First :: term(), Last :: term(), Reduction :: term()}. + +-type fold_option() :: [{dir, fwd | rev}]. + +-spec fold(Db, Tree, Fun, Acc0, Options) -> Acc1 when + Db :: term(), + Tree :: #tree{}, + Fun :: fun((fold_args(), Acc0) -> {ok | skip | stop, Acc1}), + Acc0 :: term(), + Options :: [fold_option()], + Acc1 :: term(). fold(Db, #tree{} = Tree, Fun, Acc, Options) -> {_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> Root = get_node(Tx, Tree, ?NODE_ROOT_ID), @@ -199,7 +211,7 @@ full_reduce(Db, #tree{} = Tree) -> ({traverse, _F, _L, R}, {MapAcc, ReduceAcc}) -> {skip, {MapAcc, [R | ReduceAcc]}} end, - {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}), + {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}, []), do_reduce(Tree, MapValues, ReduceValues). @@ -238,7 +250,7 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> {ok, {MapAcc, ReduceAcc}} end end, - {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}), + {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}, []), do_reduce(Tree, MapValues, ReduceValues). @@ -250,8 +262,14 @@ do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_l %% @equiv group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []) --spec group_reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), - GroupKeyFun :: fun(), UserAccFun :: fun(), UserAcc0 :: term()) -> term(). +-spec group_reduce( + Db :: term(), + Tree :: #tree{}, + StartKey :: term(), + EndKey :: term(), + GroupKeyFun :: fun((term()) -> group_key()), + UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), + UserAcc0 :: term()) -> Acc1 :: term(). group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []). @@ -266,8 +284,17 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User %% @param UserAcc0 The initial accumulator. %% @param Options Currently supported options are [{dir, fwd}] and [{dir, rev}] %% @returns the final accumulator. --spec group_reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), - GroupKeyFun :: fun(), UserAccFun :: fun(), UserAcc0 :: term(), Options :: list()) -> term(). +-type group_key() :: term(). + +-spec group_reduce( + Db :: term(), + Tree :: #tree{}, + StartKey :: term(), + EndKey :: term(), + GroupKeyFun :: fun((term()) -> group_key()), + UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), + UserAcc0 :: term(), + Options :: [fold_option()]) -> Acc1 :: term(). group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, Options) -> Dir = proplists:get_value(dir, Options, fwd), NoGroupYet = ?MIN, -- cgit v1.2.1 From fc6dbeebb430db3d46cd3b3b7082ec167d46b679 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Mon, 20 Jul 2020 16:03:00 +0000 Subject: New cname for couchdb-vm2, see INFRA-20435 (#2982) --- build-aux/Jenkinsfile.full | 10 +++++----- build-aux/logfile-uploader.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build-aux/Jenkinsfile.full b/build-aux/Jenkinsfile.full index b3b477bea..cc13f9d16 100644 --- a/build-aux/Jenkinsfile.full +++ b/build-aux/Jenkinsfile.full @@ -727,12 +727,12 @@ pipeline { unstash 'tarball' unarchive mapping: ['pkgs/' : '.'] - echo 'Retrieving & cleaning current couchdb-vm2 tree...' + echo 'Retrieving & cleaning current repo-nightly tree...' sh ''' - rsync -avz -e "ssh -o StrictHostKeyChecking=no -i $KEY" jenkins@couchdb-vm2.apache.org:/var/www/html/$BRANCH_NAME . || mkdir -p $BRANCH_NAME + rsync -avz -e "ssh -o StrictHostKeyChecking=no -i $KEY" jenkins@repo-nightly.couchdb.org:/var/www/html/$BRANCH_NAME . || mkdir -p $BRANCH_NAME rm -rf $BRANCH_NAME/debian/* $BRANCH_NAME/el6/* $BRANCH_NAME/el7/* $BRANCH_NAME/el8/* mkdir -p $BRANCH_NAME/debian $BRANCH_NAME/el6 $BRANCH_NAME/el7 $BRANCH_NAME/el8 $BRANCH_NAME/source - rsync -avz -e "ssh -o StrictHostKeyChecking=no -i $KEY" jenkins@couchdb-vm2.apache.org:/var/www/html/js . + rsync -avz -e "ssh -o StrictHostKeyChecking=no -i $KEY" jenkins@repo-nightly.couchdb.org:/var/www/html/js . ''' echo 'Building Debian repo...' @@ -772,9 +772,9 @@ pipeline { cd ../.. ''' - echo 'rsyncing tree to couchdb-vm2...' + echo 'rsyncing tree to repo-nightly...' sh ''' - rsync -avz --delete -e "ssh -o StrictHostKeyChecking=no -i $KEY" $BRANCH_NAME jenkins@couchdb-vm2.apache.org:/var/www/html + rsync -avz --delete -e "ssh -o StrictHostKeyChecking=no -i $KEY" $BRANCH_NAME jenkins@repo-nightly.couchdb.org:/var/www/html rm -rf $BRANCH_NAME couchdb-pkg *.tar.gz ''' } // withCredentials diff --git a/build-aux/logfile-uploader.py b/build-aux/logfile-uploader.py index 148704cee..3df9e6c81 100755 --- a/build-aux/logfile-uploader.py +++ b/build-aux/logfile-uploader.py @@ -22,7 +22,7 @@ import time import requests -COUCH_URL = "https://couchdb-vm2.apache.org/ci_errorlogs" +COUCH_URL = "https://logs.couchdb.org/ci_errorlogs" TARFILE = "couchlog.tar.gz" -- cgit v1.2.1 From b23df65b26e68ac750d2a848f1eb3bdf345bedfc Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 17 Jul 2020 12:56:48 +0100 Subject: Add ASLv2 license --- rebar.config | 12 ++++++++++++ src/ebtree.app.src | 12 ++++++++++++ src/ebtree.erl | 12 ++++++++++++ 3 files changed, 36 insertions(+) diff --git a/rebar.config b/rebar.config index c9750ce96..edf6725c8 100644 --- a/rebar.config +++ b/rebar.config @@ -1,3 +1,15 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + {erl_opts, [debug_info]}. {cover_enabled, true}. {deps, [ diff --git a/src/ebtree.app.src b/src/ebtree.app.src index 1420a2d90..d4966f6a5 100644 --- a/src/ebtree.app.src +++ b/src/ebtree.app.src @@ -1,3 +1,15 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + {application, ebtree, [{description, "An OTP library"}, {vsn, git}, diff --git a/src/ebtree.erl b/src/ebtree.erl index 1b6ebf374..ccfd4141a 100644 --- a/src/ebtree.erl +++ b/src/ebtree.erl @@ -1,3 +1,15 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + -module(ebtree). -export([ -- cgit v1.2.1 From 6233d43b711fa83cf034f87466c8a666765f0cd6 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 21 Jul 2020 12:38:34 +0100 Subject: add ebtree to rebar / reltool.config --- rebar.config.script | 1 + rel/reltool.config | 2 ++ 2 files changed, 3 insertions(+) diff --git a/rebar.config.script b/rebar.config.script index caf69131d..f8a24163f 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -145,6 +145,7 @@ SubDirs = [ "src/rexi", "src/setup", "src/smoosh", + "src/ebtree", "rel" ], diff --git a/rel/reltool.config b/rel/reltool.config index b59c95f55..be436ded2 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -48,6 +48,7 @@ couch_views, ddoc_cache, dreyfus, + ebtree, ets_lru, fabric, folsom, @@ -112,6 +113,7 @@ {app, couch_views, [{incl_cond, include}]}, {app, ddoc_cache, [{incl_cond, include}]}, {app, dreyfus, [{incl_cond, include}]}, + {app, ebtree, [{incl_cond, include}]}, {app, ets_lru, [{incl_cond, include}]}, {app, fabric, [{incl_cond, include}]}, {app, folsom, [{incl_cond, include}]}, -- cgit v1.2.1 From fb6c83a2a2959346177d0fe035faf2ea8fa22867 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 21 Jul 2020 17:56:16 +0100 Subject: Allow encode/decode customisation --- src/ebtree/src/ebtree.erl | 88 +++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index ccfd4141a..f27f7d493 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -44,7 +44,8 @@ min, max, collate_fun, - reduce_fun + reduce_fun, + encode_fun }). -define(META, 0). @@ -79,26 +80,29 @@ open(Db, Prefix, Order) -> open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), + EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/2), + + Tree0 = init_tree(Prefix, Order), + Tree1 = Tree0#tree{ + reduce_fun = ReduceFun, + collate_fun = CollateFun, + encode_fun = EncodeFun + }, erlfdb:transactional(Db, fun(Tx) -> - case get_meta(Tx, Prefix, ?META_ORDER) of + case get_meta(Tx, Tree1, ?META_ORDER) of not_found -> erlfdb:clear_range_startswith(Tx, Prefix), - set_meta(Tx, Prefix, ?META_ORDER, Order), - set_meta(Tx, Prefix, ?META_NEXT_ID, 1), - set_node(Tx, init_tree(Prefix, Order), #node{id = ?NODE_ROOT_ID}); + set_meta(Tx, Tree1, ?META_ORDER, Order), + set_meta(Tx, Tree1, ?META_NEXT_ID, 1), + set_node(Tx, Tree1, #node{id = ?NODE_ROOT_ID}); Order -> ok; Else -> erlang:error({order_mismatch, Else}) end end), - Tree = init_tree(Prefix, Order), - Tree#tree{ - reduce_fun = ReduceFun, - collate_fun = CollateFun - }. - + Tree1. %% @doc a special value guaranteed to be smaller than any value in an ebtree. min() -> @@ -702,16 +706,13 @@ find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> %% metadata functions get_meta(Tx, #tree{} = Tree, MetaKey) -> - get_meta(Tx, Tree#tree.prefix, MetaKey); - - -get_meta(Tx, Prefix, MetaKey) when is_binary(Prefix) -> + #tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, Future = get_meta_future(Tx, Prefix, MetaKey), case erlfdb:wait(Future) of not_found -> not_found; - Value -> - decode_value(Value) + Bin when is_binary(Bin) -> + EncodeFun(decode, Bin) end. @@ -719,24 +720,26 @@ get_meta_future(Tx, Prefix, MetaKey) -> erlfdb:get(Tx, meta_key(Prefix, MetaKey)). -set_meta(Tx, Prefix, MetaKey, MetaValue) -> +set_meta(Tx, #tree{} = Tree, MetaKey, MetaValue) -> + #tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, erlfdb:set( Tx, meta_key(Prefix, MetaKey), - encode_value(MetaValue) + EncodeFun(encode, MetaValue) ). + meta_key(Prefix, MetaKey) when is_binary(Prefix) -> erlfdb_tuple:pack({?META, MetaKey}, Prefix). %% node persistence functions get_node(Tx, #tree{} = Tree, Id) -> - get_node_wait(Id, get_node_future(Tx, Tree, Id)). + get_node_wait(Tree, Id, get_node_future(Tx, Tree, Id)). -get_node_wait(Id, Future) -> - decode_node(Id, erlfdb:wait(Future)). +get_node_wait(#tree{} = Tree, Id, Future) -> + decode_node(Tree, Id, erlfdb:wait(Future)). get_node_future(Tx, #tree{} = Tree, Id) -> @@ -764,7 +767,7 @@ set_nodes(Tx, #tree{} = Tree, Nodes) -> set_node(Tx, #tree{} = Tree, #node{} = Node) -> validate_node(Tree, Node), Key = node_key(Tree#tree.prefix, Node#node.id), - Value = encode_node(Node), + Value = encode_node(Tree, Node), erlfdb:set(Tx, Key, Value). @@ -827,18 +830,22 @@ validate_node(#tree{} = Tree, #node{} = Node) -> %% data marshalling functions (encodes unnecesary fields as a NIL_REF) -encode_node(#node{prev = undefined} = Node) -> - encode_node(Node#node{prev = []}); +encode_node(#tree{} = Tree, #node{prev = undefined} = Node) -> + encode_node(Tree, Node#node{prev = []}); + +encode_node(#tree{} = Tree, #node{next = undefined} = Node) -> + encode_node(Tree, Node#node{next = []}); -encode_node(#node{next = undefined} = Node) -> - encode_node(Node#node{next = []}); +encode_node(#tree{} = Tree, #node{} = Node) -> + #tree{encode_fun = EncodeFun} = Tree, + EncodeFun(encode, Node#node{id = []}). -encode_node(#node{} = Node) -> - encode_value(Node#node{id = []}). +decode_node(#tree{} = Tree, Id, Bin) when is_binary(Bin) -> + #tree{encode_fun = EncodeFun} = Tree, + Term = EncodeFun(decode, Bin), + decode_node(Id, Term). -decode_node(Id, Bin) when is_binary(Bin) -> - decode_node(Id, decode_value(Bin)); decode_node(Id, #node{prev = []} = Node) -> decode_node(Id, Node#node{prev = undefined}); @@ -849,15 +856,6 @@ decode_node(Id, #node{next = []} = Node) -> decode_node(Id, #node{} = Node) -> Node#node{id = Id}. - -encode_value(Value) -> - term_to_binary(Value, [compressed, {minor_version, 2}]). - - -decode_value(Bin) when is_binary(Bin) -> - binary_to_term(Bin, [safe]). - - %% built-in reduce functions. reduce_noop(_KVs, _Rereduce) -> @@ -949,6 +947,14 @@ collation_wrapper_fun(CollateFun) -> collate_raw(K1, K2) -> K1 =< K2. +%% encoding function + +encode_erlang(encode, Term) -> + term_to_binary(Term, [compressed, {minor_version, 2}]); + + +encode_erlang(decode, Bin) -> + binary_to_term(Bin, [safe]). %% private functions @@ -982,7 +988,7 @@ last_key(Members) when is_list(Members) -> new_node_id(Tx, Tree) -> NextId = get_meta(Tx, Tree, ?META_NEXT_ID), - set_meta(Tx, Tree#tree.prefix, ?META_NEXT_ID, NextId + 1), + set_meta(Tx, Tree, ?META_NEXT_ID, NextId + 1), NextId. -- cgit v1.2.1 From 909357e993816bd4a441a701ed97a23acaaffa2f Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Fri, 10 Jul 2020 08:30:56 +0200 Subject: port view_sandboxing.js into elixir --- test/elixir/README.md | 2 +- test/elixir/test/view_sandboxing_test.exs | 191 ++++++++++++++++++++++++++++++ test/javascript/tests/view_sandboxing.js | 1 + 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 test/elixir/test/view_sandboxing_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index cf529438d..7f11d87cf 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -109,7 +109,7 @@ X means done, - means partially - [ ] Port view_multi_key_temp.js - [X] Port view_offsets.js - [X] Port view_pagination.js - - [ ] Port view_sandboxing.js + - [X] Port view_sandboxing.js - [X] Port view_update_seq.js # Using ExUnit to write unit tests diff --git a/test/elixir/test/view_sandboxing_test.exs b/test/elixir/test/view_sandboxing_test.exs new file mode 100644 index 000000000..af0928efa --- /dev/null +++ b/test/elixir/test/view_sandboxing_test.exs @@ -0,0 +1,191 @@ +defmodule ViewSandboxingTest do + use CouchTestCase + + @document %{integer: 1, string: "1", array: [1, 2, 3]} + + @tag :with_db + test "attempting to change the document has no effect", context do + db_name = context[:db_name] + + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + doc.integer = 2; + emit(null, doc); + } + """ + + resp = query(db_name, map_fun, nil, %{include_docs: true}) + rows = resp["rows"] + # either we have an error or our doc is unchanged + assert resp["total_rows"] == 0 or Enum.at(rows, 0)["doc"]["integer"] == 1 + + map_fun = """ + function(doc) { + doc.array[0] = 0; + emit(null, doc); + } + """ + + resp = query(db_name, map_fun, nil, %{include_docs: true}) + row = Enum.at(resp["rows"], 0) + # either we have an error or our doc is unchanged + assert resp["total_rows"] == 0 or Enum.at(row["doc"]["array"], 0) == 1 + end + + @tag :with_db + test "view cannot invoke interpreter internals", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + gc(); + emit(null, doc); + } + """ + + # make sure that a view cannot invoke interpreter internals such as the + # garbage collector + resp = query(db_name, map_fun) + assert resp["total_rows"] == 0 + end + + @tag :with_db + test "view cannot access the map_funs and map_results array", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + map_funs.push(1); + emit(null, doc); + } + """ + + resp = query(db_name, map_fun) + assert resp["total_rows"] == 0 + + map_fun = """ + function(doc) { + map_results.push(1); + emit(null, doc); + } + """ + + resp = query(db_name, map_fun) + assert resp["total_rows"] == 0 + end + + @tag :with_db + test "COUCHDB-925 - altering 'doc' variable in map function affects other map functions", + context do + db_name = context[:db_name] + + ddoc = %{ + _id: "_design/foobar", + language: "javascript", + views: %{ + view1: %{ + map: """ + function(doc) { + if (doc.values) { + doc.values = [666]; + } + if (doc.tags) { + doc.tags.push("qwerty"); + } + if (doc.tokens) { + doc.tokens["c"] = 3; + } + } + """ + }, + view2: %{ + map: """ + function(doc) { + if (doc.values) { + emit(doc._id, doc.values); + } + if (doc.tags) { + emit(doc._id, doc.tags); + } + if (doc.tokens) { + emit(doc._id, doc.tokens); + } + } + """ + } + } + } + + doc1 = %{ + _id: "doc1", + values: [1, 2, 3] + } + + doc2 = %{ + _id: "doc2", + tags: ["foo", "bar"], + tokens: %{a: 1, b: 2} + } + + {:ok, _} = create_doc(db_name, ddoc) + {:ok, _} = create_doc(db_name, doc1) + {:ok, _} = create_doc(db_name, doc2) + + resp1 = view(db_name, "foobar/view1") + resp2 = view(db_name, "foobar/view2") + + assert Enum.empty?(resp1.body["rows"]) + assert length(resp2.body["rows"]) == 3 + + assert doc1[:_id] == Enum.at(resp2.body["rows"], 0)["key"] + assert doc2[:_id] == Enum.at(resp2.body["rows"], 1)["key"] + assert doc2[:_id] == Enum.at(resp2.body["rows"], 2)["key"] + + assert length(Enum.at(resp2.body["rows"], 0)["value"]) == 3 + + row0_values = Enum.at(resp2.body["rows"], 0)["value"] + + assert Enum.at(row0_values, 0) == 1 + assert Enum.at(row0_values, 1) == 2 + assert Enum.at(row0_values, 2) == 3 + + row1_values = Enum.at(resp2.body["rows"], 1)["value"] + row2_values = Enum.at(resp2.body["rows"], 2)["value"] + + # we can't be 100% sure about the order for the same key + assert (is_map(row1_values) and row1_values["a"] == 1) or + (is_list(row1_values) and Enum.at(row1_values, 0) == "foo") + + assert (is_map(row1_values) and row1_values["b"] == 2) or + (is_list(row1_values) and Enum.at(row1_values, 1) == "bar") + + assert (is_map(row2_values) and row2_values["a"] == 1) or + (is_list(row2_values) and Enum.at(row2_values, 0) == "foo") + + assert (is_map(row2_values) and row2_values["b"] == 2) or + (is_list(row2_values) and Enum.at(row2_values, 1) == "bar") + + assert is_list(row1_values) or !Map.has_key?(row1_values, "c") + assert is_list(row2_values) or !Map.has_key?(row2_values, "c") + end + + @tag :with_db + test "runtime code evaluation can be prevented", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + var glob = emit.constructor('return this')(); + emit(doc._id, null); + } + """ + + resp = query(db_name, map_fun) + assert resp["total_rows"] == 0 + end +end diff --git a/test/javascript/tests/view_sandboxing.js b/test/javascript/tests/view_sandboxing.js index 1cdd815de..0e5f308a9 100644 --- a/test/javascript/tests/view_sandboxing.js +++ b/test/javascript/tests/view_sandboxing.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true couchTests.view_sandboxing = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From b518f01a4def8eac085c76599ecf1b03aeff06f8 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Mon, 13 Jul 2020 22:44:05 +0200 Subject: port update_documents.js into elixir --- test/elixir/README.md | 2 +- test/elixir/test/update_documents_test.exs | 324 +++++++++++++++++++++++++++++ test/javascript/tests/update_documents.js | 2 +- 3 files changed, 326 insertions(+), 2 deletions(-) create mode 100644 test/elixir/test/update_documents_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 7f11d87cf..566364a34 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -93,7 +93,7 @@ X means done, - means partially - [X] Port security_validation.js - [ ] Port show_documents.js - [ ] Port stats.js - - [ ] Port update_documents.js + - [X] Port update_documents.js - [X] Port users_db.js - [ ] Port users_db_security.js - [X] Port utf8.js diff --git a/test/elixir/test/update_documents_test.exs b/test/elixir/test/update_documents_test.exs new file mode 100644 index 000000000..c29b31a4d --- /dev/null +++ b/test/elixir/test/update_documents_test.exs @@ -0,0 +1,324 @@ +defmodule UpdateDocumentsTest do + use CouchTestCase + + @ddoc %{ + _id: "_design/update", + language: "javascript", + updates: %{ + hello: """ + function(doc, req) { + if (!doc) { + if (req.id) { + return [ + // Creates a new document with the PUT docid, + { _id : req.id, + reqs : [req] }, + // and returns an HTML response to the client. + "

New World

"]; + }; + // + return [null, "

Empty World

"]; + }; + // we can update the document inline + doc.world = "hello"; + // we can record aspects of the request or use them in application logic. + doc.reqs && doc.reqs.push(req); + doc.edited_by = req.userCtx; + return [doc, "

hello doc

"]; + } + """, + "in-place": """ + function(doc, req) { + var field = req.query.field; + var value = req.query.value; + var message = "set "+field+" to "+value; + doc[field] = value; + return [doc, message]; + } + """, + "form-update": """ + function(doc, req) { + for (var field in req.form) { + doc[field] = req.form[field]; + } + var message = "updated doc from form"; + return [doc, message]; + } + """, + "bump-counter": """ + function(doc, req) { + if (!doc.counter) doc.counter = 0; + doc.counter += 1; + var message = "

bumped it!

"; + return [doc, message]; + } + """, + error: """ + function(doc, req) { + superFail.badCrash; + } + """, + "get-uuid": """ + function(doc, req) { + return [null, req.uuid]; + } + """, + "code-n-bump": """ + function(doc,req) { + if (!doc.counter) doc.counter = 0; + doc.counter += 1; + var message = "

bumped it!

"; + resp = {"code": 302, "body": message} + return [doc, resp]; + } + """, + "resp-code": """ + function(doc,req) { + resp = {"code": 302} + return [null, resp]; + } + """, + "resp-code-and-json": """ + function(doc,req) { + resp = {"code": 302, "json": {"ok": true}} + return [{"_id": req["uuid"]}, resp]; + } + """, + binary: """ + function(doc, req) { + var resp = { + "headers" : { + "Content-Type" : "application/octet-stream" + }, + "base64" : "aGVsbG8gd29ybGQh" // "hello world!" encoded + }; + return [doc, resp]; + } + """, + empty: """ + function(doc, req) { + return [{}, 'oops']; + } + """ + } + } + + @document %{word: "plankton", name: "Rusty"} + + @tag :with_db + test "update error invalid path", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + resp = Couch.post("/#{db_name}/_design/update/_update/") + assert resp.status_code == 404 + assert resp.body["reason"] == "Invalid path." + end + + @tag :with_db + test "update document", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + {:ok, resp} = create_doc(db_name, @document) + docid = resp.body["id"] + + resp = Couch.put("/#{db_name}/_design/update/_update/hello/#{docid}") + assert resp.status_code == 201 + assert resp.body == "

hello doc

" + assert String.contains?(resp.headers["Content-Type"], "charset=utf-8") + assert resp.headers["X-Couch-Id"] == docid + + resp = Couch.get("/#{db_name}/#{docid}") + assert resp.status_code == 200 + assert resp.body["world"] == "hello" + + # Fix for COUCHDB-379 + assert String.starts_with?(resp.headers["Server"], "CouchDB") + + resp = Couch.put("/#{db_name}/_design/update/_update/hello") + assert resp.status_code == 200 + assert resp.body == "

Empty World

" + end + + @tag :with_db + test "GET is not allowed", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + resp = Couch.get("/#{db_name}/_design/update/_update/hello") + assert resp.body["error"] == "method_not_allowed" + end + + @tag :with_db + test "doc can be created", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + resp = Couch.get("/#{db_name}/nonExistingDoc") + assert resp.status_code == 404 + + resp = Couch.put("/#{db_name}/_design/update/_update/hello/nonExistingDoc") + assert resp.status_code == 201 + assert resp.body == "

New World

" + + resp = Couch.get("/#{db_name}/nonExistingDoc") + assert resp.status_code == 200 + end + + @tag :with_db + test "in place update", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + {:ok, resp} = create_doc(db_name, @document) + docid = resp.body["id"] + + resp = + Couch.put( + "/#{db_name}/_design/update/_update/in-place/#{docid}?field=title&value=test" + ) + + assert resp.status_code == 201 + assert resp.body == "set title to test" + resp = Couch.get("/#{db_name}/#{docid}") + assert resp.status_code == 200 + assert resp.body["title"] == "test" + end + + @tag :with_db + test "form update via application/x-www-form-urlencoded", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + {:ok, resp} = create_doc(db_name, @document) + docid = resp.body["id"] + + resp = + Couch.put( + "/#{db_name}/_design/update/_update/form-update/#{docid}", + headers: ["Content-Type": "application/x-www-form-urlencoded"], + body: "formfoo=bar&formbar=foo" + ) + + assert resp.status_code == 201 + assert resp.body == "updated doc from form" + + resp = Couch.get("/#{db_name}/#{docid}") + assert resp.status_code == 200 + assert resp.body["formfoo"] == "bar" + assert resp.body["formbar"] == "foo" + end + + @tag :with_db + test "bump counter", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + {:ok, resp} = create_doc(db_name, @document) + docid = resp.body["id"] + + resp = + Couch.put("/#{db_name}/_design/update/_update/bump-counter/#{docid}", + headers: ["X-Couch-Full-Commit": "true"] + ) + + assert resp.status_code == 201 + assert resp.body == "

bumped it!

" + + resp = Couch.get("/#{db_name}/#{docid}") + assert resp.status_code == 200 + assert resp.body["counter"] == 1 + + resp = + Couch.put("/#{db_name}/_design/update/_update/bump-counter/#{docid}", + headers: ["X-Couch-Full-Commit": "true"] + ) + + newrev = resp.headers["X-Couch-Update-NewRev"] + + resp = Couch.get("/#{db_name}/#{docid}") + assert resp.status_code == 200 + assert resp.body["counter"] == 2 + assert resp.body["_rev"] == newrev + end + + @tag :with_db + test "Server provides UUID when POSTing without an ID in the URL", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + resp = Couch.put("/#{db_name}/_design/update/_update/get-uuid/") + assert resp.status_code == 200 + assert String.length(resp.body) == 32 + end + + @tag :with_db + test "COUCHDB-1229 - allow slashes in doc ids for update handlers", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + create_doc(db_name, %{_id: "with/slash", counter: 1}) + + resp = Couch.put("/#{db_name}/_design/update/_update/bump-counter/with/slash") + assert resp.status_code == 201 + assert resp.body == "

bumped it!

" + + resp = Couch.get("/#{db_name}/with%2Fslash") + assert resp.status_code == 200 + assert resp.body["counter"] == 2 + end + + @tag :with_db + test "COUCHDB-648 - the code in the JSON response should be honored", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + {:ok, resp} = create_doc(db_name, @document) + docid = resp.body["id"] + + Couch.put("/#{db_name}/_design/update/_update/bump-counter/#{docid}") + Couch.put("/#{db_name}/_design/update/_update/bump-counter/#{docid}") + + resp = Couch.put("/#{db_name}/_design/update/_update/code-n-bump/#{docid}") + assert resp.status_code == 302 + assert resp.body == "

bumped it!

" + + resp = Couch.get("/#{db_name}/#{docid}") + assert resp.status_code == 200 + assert resp.body["counter"] == 3 + + resp = Couch.put("/#{db_name}/_design/update/_update/resp-code/") + assert resp.status_code == 302 + + resp = Couch.put("/#{db_name}/_design/update/_update/resp-code-and-json/") + assert resp.status_code == 302 + assert resp.body["ok"] == true + end + + @tag :with_db + test "base64 response", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + {:ok, resp} = create_doc(db_name, @document) + docid = resp.body["id"] + + resp = + Couch.put("/#{db_name}/_design/update/_update/binary/#{docid}", + body: "rubbish" + ) + + assert resp.status_code == 201 + assert resp.body == "hello world!" + assert String.contains?(resp.headers["Content-Type"], "application/octet-stream") + end + + @tag :with_db + test "Insert doc with empty id", context do + db_name = context[:db_name] + create_doc(db_name, @ddoc) + + resp = Couch.put("/#{db_name}/_design/update/_update/empty/foo") + assert resp.status_code == 400 + assert resp.body["reason"] == "Document id must not be empty" + end +end diff --git a/test/javascript/tests/update_documents.js b/test/javascript/tests/update_documents.js index 6cd4a91d6..913c99a57 100644 --- a/test/javascript/tests/update_documents.js +++ b/test/javascript/tests/update_documents.js @@ -10,7 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true couchTests.update_documents = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 983ff1c5a17c64e0c874374e4b0c00705c413341 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 22 Jul 2020 16:22:06 +0100 Subject: Use stored order without complaint --- src/ebtree/src/ebtree.erl | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index f27f7d493..0ff8935ee 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -73,7 +73,8 @@ open(Db, Prefix, Order) -> %% @doc Open a new ebtree, initialising it if doesn't already exist. %% @param Db An erlfdb database or transaction. %% @param Prefix The key prefix applied to all ebtree keys. -%% @param Order The maximum number of items allowed in an ebtree node (must be an even number). +%% @param Order The maximum number of items allowed in an ebtree node (must be an even number). Ignored +%% if ebtree is already initialised. %% @param Options Supported options are {reduce_fun, Fun} and {collate_fun, Fun}. %% @returns A data structure representing the ebtree, to be passed to all other functions. -spec open(term(), binary(), pos_integer(), list()) -> #tree{}. @@ -82,27 +83,26 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/2), - Tree0 = init_tree(Prefix, Order), - Tree1 = Tree0#tree{ + Tree = #tree{ + prefix = Prefix, reduce_fun = ReduceFun, collate_fun = CollateFun, encode_fun = EncodeFun }, erlfdb:transactional(Db, fun(Tx) -> - case get_meta(Tx, Tree1, ?META_ORDER) of + case get_meta(Tx, Tree, ?META_ORDER) of not_found -> erlfdb:clear_range_startswith(Tx, Prefix), - set_meta(Tx, Tree1, ?META_ORDER, Order), - set_meta(Tx, Tree1, ?META_NEXT_ID, 1), - set_node(Tx, Tree1, #node{id = ?NODE_ROOT_ID}); - Order -> - ok; - Else -> - erlang:error({order_mismatch, Else}) + set_meta(Tx, Tree, ?META_ORDER, Order), + set_meta(Tx, Tree, ?META_NEXT_ID, 1), + set_node(Tx, Tree, #node{id = ?NODE_ROOT_ID}), + init_order(Tree, Order); + ActualOrder when is_integer(ActualOrder) -> + init_order(Tree, ActualOrder) end - end), - Tree1. + end). + %% @doc a special value guaranteed to be smaller than any value in an ebtree. min() -> @@ -958,10 +958,9 @@ encode_erlang(decode, Bin) -> %% private functions -init_tree(Prefix, Order) - when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> - #tree{ - prefix = Prefix, +init_order(#tree{} = Tree, Order) + when is_integer(Order), Order > 2, Order rem 2 == 0 -> + Tree#tree{ min = Order div 2, max = Order }. -- cgit v1.2.1 From 9b2bf52ddc9e7bf70aa0a1fd4d45f0474df32afa Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 22 Jul 2020 17:19:14 +0100 Subject: Pass fdb key to the encode_fun This commit removes the get/wait/future split as it made it much harder to pass the fdb key down. Since those functions only call each other, there is no loss of functionality. --- src/ebtree/src/ebtree.erl | 54 ++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 0ff8935ee..cb5e80541 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -81,7 +81,7 @@ open(Db, Prefix, Order) -> open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), - EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/2), + EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/3), Tree = #tree{ prefix = Prefix, @@ -707,25 +707,23 @@ find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> get_meta(Tx, #tree{} = Tree, MetaKey) -> #tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, - Future = get_meta_future(Tx, Prefix, MetaKey), + Key = meta_key(Prefix, MetaKey), + Future = erlfdb:get(Tx, Key), case erlfdb:wait(Future) of not_found -> not_found; Bin when is_binary(Bin) -> - EncodeFun(decode, Bin) + EncodeFun(decode, Key, Bin) end. -get_meta_future(Tx, Prefix, MetaKey) -> - erlfdb:get(Tx, meta_key(Prefix, MetaKey)). - - set_meta(Tx, #tree{} = Tree, MetaKey, MetaValue) -> #tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, + Key = meta_key(Prefix, MetaKey), erlfdb:set( Tx, - meta_key(Prefix, MetaKey), - EncodeFun(encode, MetaValue) + Key, + EncodeFun(encode, Key, MetaValue) ). @@ -735,16 +733,10 @@ meta_key(Prefix, MetaKey) when is_binary(Prefix) -> %% node persistence functions get_node(Tx, #tree{} = Tree, Id) -> - get_node_wait(Tree, Id, get_node_future(Tx, Tree, Id)). - - -get_node_wait(#tree{} = Tree, Id, Future) -> - decode_node(Tree, Id, erlfdb:wait(Future)). - - -get_node_future(Tx, #tree{} = Tree, Id) -> Key = node_key(Tree#tree.prefix, Id), - erlfdb:get(Tx, Key). + Future = erlfdb:get(Tx, Key), + Value = erlfdb:wait(Future), + decode_node(Tree, Id, Key, Value). clear_nodes(Tx, #tree{} = Tree, Nodes) -> @@ -767,7 +759,7 @@ set_nodes(Tx, #tree{} = Tree, Nodes) -> set_node(Tx, #tree{} = Tree, #node{} = Node) -> validate_node(Tree, Node), Key = node_key(Tree#tree.prefix, Node#node.id), - Value = encode_node(Tree, Node), + Value = encode_node(Tree, Key, Node), erlfdb:set(Tx, Key, Value). @@ -830,20 +822,20 @@ validate_node(#tree{} = Tree, #node{} = Node) -> %% data marshalling functions (encodes unnecesary fields as a NIL_REF) -encode_node(#tree{} = Tree, #node{prev = undefined} = Node) -> - encode_node(Tree, Node#node{prev = []}); +encode_node(#tree{} = Tree, Key, #node{prev = undefined} = Node) -> + encode_node(Tree, Key, Node#node{prev = []}); -encode_node(#tree{} = Tree, #node{next = undefined} = Node) -> - encode_node(Tree, Node#node{next = []}); +encode_node(#tree{} = Tree, Key, #node{next = undefined} = Node) -> + encode_node(Tree, Key, Node#node{next = []}); -encode_node(#tree{} = Tree, #node{} = Node) -> +encode_node(#tree{} = Tree, Key, #node{} = Node) -> #tree{encode_fun = EncodeFun} = Tree, - EncodeFun(encode, Node#node{id = []}). + EncodeFun(encode, Key, Node#node{id = []}). -decode_node(#tree{} = Tree, Id, Bin) when is_binary(Bin) -> +decode_node(#tree{} = Tree, Id, Key, Value) when is_binary(Value) -> #tree{encode_fun = EncodeFun} = Tree, - Term = EncodeFun(decode, Bin), + Term = EncodeFun(decode, Key, Value), decode_node(Id, Term). @@ -949,12 +941,12 @@ collate_raw(K1, K2) -> %% encoding function -encode_erlang(encode, Term) -> - term_to_binary(Term, [compressed, {minor_version, 2}]); +encode_erlang(encode, _Key, Value) -> + term_to_binary(Value, [compressed, {minor_version, 2}]); -encode_erlang(decode, Bin) -> - binary_to_term(Bin, [safe]). +encode_erlang(decode, _Key, Value) -> + binary_to_term(Value, [safe]). %% private functions -- cgit v1.2.1 From 694460508b714676c352972b0c2d7020c1990ae3 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Wed, 22 Jul 2020 13:36:05 +0200 Subject: Port view multi_key tests into elixir --- test/elixir/README.md | 6 +- test/elixir/test/view_multi_key_all_docs_test.exs | 191 +++++++++++++ test/elixir/test/view_multi_key_design_test.exs | 316 ++++++++++++++++++++++ test/javascript/tests/view_multi_key_all_docs.js | 1 + test/javascript/tests/view_multi_key_design.js | 1 + test/javascript/tests/view_multi_key_temp.js | 1 + 6 files changed, 513 insertions(+), 3 deletions(-) create mode 100644 test/elixir/test/view_multi_key_all_docs_test.exs create mode 100644 test/elixir/test/view_multi_key_design_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 566364a34..38c85a5e8 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -104,9 +104,9 @@ X means done, - means partially - [ ] Port view_conflicts.js - [ ] Port view_errors.js - [ ] Port view_include_docs.js - - [ ] Port view_multi_key_all_docs.js - - [ ] Port view_multi_key_design.js - - [ ] Port view_multi_key_temp.js + - [X] Port view_multi_key_all_docs.js + - [X] Port view_multi_key_design.js + - [ ] ~~Port view_multi_key_temp.js~~ - [X] Port view_offsets.js - [X] Port view_pagination.js - [X] Port view_sandboxing.js diff --git a/test/elixir/test/view_multi_key_all_docs_test.exs b/test/elixir/test/view_multi_key_all_docs_test.exs new file mode 100644 index 000000000..d9fa41e23 --- /dev/null +++ b/test/elixir/test/view_multi_key_all_docs_test.exs @@ -0,0 +1,191 @@ +defmodule ViewMultiKeyAllDocsTest do + use CouchTestCase + + @keys ["10", "15", "30", "37", "50"] + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + bulk_save(db_name, make_docs(0..99)) + + {:ok, [db_name: db_name]} + end + + test "keys in POST body", context do + db_name = context[:db_name] + + resp = all_docs(db_name, nil, @keys) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == length(@keys) + + rows_id = Enum.map(rows, & &1["id"]) + assert rows_id == @keys + end + + test "keys in GET parameters", context do + db_name = context[:db_name] + resp = all_docs(db_name, keys: :jiffy.encode(@keys)) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == length(@keys) + rows_id = Enum.map(rows, & &1["id"]) + assert rows_id == @keys + end + + test "keys in POST body (limit)", context do + db_name = context[:db_name] + + resp = all_docs(db_name, [limit: 1], @keys) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 1 + assert Enum.at(rows, 0)["id"] == Enum.at(@keys, 0) + end + + test "keys in GET parameters (limit)", context do + db_name = context[:db_name] + resp = all_docs(db_name, limit: 1, keys: :jiffy.encode(@keys)) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 1 + assert Enum.at(rows, 0)["id"] == Enum.at(@keys, 0) + end + + test "keys in POST body (skip)", context do + db_name = context[:db_name] + + resp = all_docs(db_name, [skip: 2], @keys) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 3 + + rows_id = Enum.map(rows, & &1["id"]) + assert rows_id == Enum.drop(@keys, 2) + end + + test "keys in GET parameters (skip)", context do + db_name = context[:db_name] + resp = all_docs(db_name, skip: 2, keys: :jiffy.encode(@keys)) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 3 + rows_id = Enum.map(rows, & &1["id"]) + assert rows_id == Enum.drop(@keys, 2) + end + + test "keys in POST body (descending)", context do + db_name = context[:db_name] + + resp = all_docs(db_name, [descending: true], @keys) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == length(@keys) + + rows_id = Enum.map(rows, & &1["id"]) + assert rows_id == Enum.reverse(@keys) + end + + test "keys in GET parameters (descending)", context do + db_name = context[:db_name] + resp = all_docs(db_name, descending: true, keys: :jiffy.encode(@keys)) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == length(@keys) + rows_id = Enum.map(rows, & &1["id"]) + assert rows_id == Enum.reverse(@keys) + end + + test "keys in POST body (descending, skip, limit)", context do + db_name = context[:db_name] + + resp = all_docs(db_name, [descending: "true", skip: 3, limit: 1], @keys) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 1 + + key = + @keys + |> Enum.reverse() + |> Enum.drop(3) + |> Enum.at(0) + + assert Enum.at(rows, 0)["id"] == key + end + + test "keys in GET parameters (descending, skip, limit)", context do + db_name = context[:db_name] + + resp = + all_docs(db_name, descending: "true", skip: 3, limit: 1, keys: :jiffy.encode(@keys)) + + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 1 + + key = + @keys + |> Enum.reverse() + |> Enum.drop(3) + |> Enum.at(0) + + assert Enum.at(rows, 0)["id"] == key + end + + test "POST - get invalid rows when the key doesn't exist", context do + db_name = context[:db_name] + + resp = all_docs(db_name, nil, ["1211", "i_dont_exist", "0"]) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 3 + assert Enum.at(rows, 0)["error"] == "not_found" + assert not Map.has_key?(Enum.at(rows, 0), "id") + assert Enum.at(rows, 1)["error"] == "not_found" + assert not Map.has_key?(Enum.at(rows, 1), "id") + assert Enum.at(rows, 2)["id"] == Enum.at(rows, 2)["key"] + assert Enum.at(rows, 2)["key"] == "0" + end + + test "GET - get invalid rows when the key doesn't exist", context do + db_name = context[:db_name] + + resp = all_docs(db_name, keys: :jiffy.encode(["1211", "i_dont_exist", "0"])) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert length(rows) == 3 + assert Enum.at(rows, 0)["error"] == "not_found" + assert not Map.has_key?(Enum.at(rows, 0), "id") + assert Enum.at(rows, 1)["error"] == "not_found" + assert not Map.has_key?(Enum.at(rows, 1), "id") + assert Enum.at(rows, 2)["id"] == Enum.at(rows, 2)["key"] + assert Enum.at(rows, 2)["key"] == "0" + end + + test "empty keys", context do + db_name = context[:db_name] + + resp = all_docs(db_name, keys: :jiffy.encode([])) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert Enum.empty?(rows) + end + + defp all_docs(db_name, options, keys \\ nil) do + resp = + case keys do + nil -> + Couch.get("/#{db_name}/_all_docs", query: options) + + _ -> + Couch.post("/#{db_name}/_all_docs", + query: options, + body: %{"keys" => keys} + ) + end + + resp + end +end diff --git a/test/elixir/test/view_multi_key_design_test.exs b/test/elixir/test/view_multi_key_design_test.exs new file mode 100644 index 000000000..ab57e89eb --- /dev/null +++ b/test/elixir/test/view_multi_key_design_test.exs @@ -0,0 +1,316 @@ +defmodule ViewMultiKeyDesignTest do + use CouchTestCase + + @keys [10, 15, 30, 37, 50] + + @ddoc %{ + _id: "_design/test", + language: "javascript", + views: %{ + all_docs: %{ + map: "function(doc) { emit(doc.integer, doc.string) }" + }, + multi_emit: %{ + map: "function(doc) {for(var i = 0 ; i < 3 ; i++) { emit(i, doc.integer) ; } }" + }, + summate: %{ + map: "function (doc) {emit(doc.integer, doc.integer)};", + reduce: "function (keys, values) { return sum(values); };" + } + } + } + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + bulk_save(db_name, make_docs(0..99)) + {:ok, _} = create_doc(db_name, @ddoc) + + {:ok, [db_name: db_name]} + end + + test "that missing keys work too", context do + db_name = context[:db_name] + keys = [101, 30, 15, 37, 50] + resp = view(db_name, "test/summate", [group: true], keys) + rows = resp.body["rows"] + assert length(rows) == length(keys) - 1 + + assert Enum.all?(rows, &Enum.member?(keys, &1["key"])) + assert Enum.all?(rows, &(&1["key"] == &1["value"])) + end + + test "keys in POST body", context do + db_name = context[:db_name] + resp = view(db_name, "test/all_docs", nil, @keys) + rows = resp.body["rows"] + assert length(rows) == length(@keys) + assert Enum.all?(rows, &Enum.member?(@keys, &1["key"])) + assert Enum.all?(rows, &(&1["key"] == String.to_integer(&1["value"]))) + end + + test "keys in GET parameters", context do + db_name = context[:db_name] + resp = view(db_name, "test/all_docs", keys: :jiffy.encode(@keys)) + rows = resp.body["rows"] + assert length(rows) == length(@keys) + assert Enum.all?(rows, &Enum.member?(@keys, &1["key"])) + assert Enum.all?(rows, &(&1["key"] == String.to_integer(&1["value"]))) + end + + test "empty keys", context do + db_name = context[:db_name] + + resp = view(db_name, "test/all_docs", keys: :jiffy.encode([])) + assert resp.status_code == 200 + rows = resp.body["rows"] + assert Enum.empty?(rows) + end + + test "keys in POST body (group)", context do + db_name = context[:db_name] + resp = view(db_name, "test/summate", [group: true], @keys) + rows = resp.body["rows"] + assert length(rows) == length(@keys) + assert Enum.all?(rows, &Enum.member?(@keys, &1["key"])) + assert Enum.all?(rows, &(&1["key"] == &1["value"])) + end + + test "keys in GET body (group)", context do + db_name = context[:db_name] + resp = view(db_name, "test/summate", group: true, keys: :jiffy.encode(@keys)) + rows = resp.body["rows"] + assert length(rows) == length(@keys) + assert Enum.all?(rows, &Enum.member?(@keys, &1["key"])) + assert Enum.all?(rows, &(&1["key"] == &1["value"])) + end + + test "POST - invalid parameter combinations get rejected ", context do + db_name = context[:db_name] + + badargs = [[startkey: 0], [endkey: 0], [key: 0], [group_level: 2]] + + Enum.each(badargs, fn args -> + resp = + Couch.post("/#{db_name}/_design/test/_view/all_docs", + query: args, + body: %{"keys" => @keys} + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + end) + + resp = + Couch.post("/#{db_name}/_design/test/_view/summate", + query: nil, + body: %{"keys" => @keys} + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + end + + test "GET - invalid parameter combinations get rejected ", context do + db_name = context[:db_name] + + badargs = [ + [startkey: 0, keys: :jiffy.encode(@keys)], + [endkey: 0, keys: :jiffy.encode(@keys)], + [key: 0, keys: :jiffy.encode(@keys)], + [group_level: 2, keys: :jiffy.encode(@keys)] + ] + + Enum.each(badargs, fn args -> + resp = + Couch.get("/#{db_name}/_design/test/_view/all_docs", + query: args + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + end) + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [keys: :jiffy.encode(@keys)], + body: %{"keys" => @keys} + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + end + + test "that a map & reduce containing func support keys when reduce=false", context do + db_name = context[:db_name] + resp = view(db_name, "test/summate", [reduce: false], @keys) + assert length(resp.body["rows"]) == 5 + + resp = view(db_name, "test/summate", reduce: false, keys: :jiffy.encode(@keys)) + assert length(resp.body["rows"]) == 5 + end + + test "that limiting by startkey_docid and endkey_docid get applied", context do + db_name = context[:db_name] + + exp_key = [0, 0, 0, 2, 2, 2] + exp_val = [21, 22, 23, 21, 22, 23] + + resp = + view(db_name, "test/multi_emit", [startkey_docid: 21, endkey_docid: 23], [0, 2]) + + rows = resp.body["rows"] + rows_key = Enum.map(rows, & &1["key"]) + assert rows_key == exp_key + + rows_value = Enum.map(rows, & &1["value"]) + assert rows_value == exp_val + + resp = + view(db_name, "test/multi_emit", + startkey_docid: 21, + endkey_docid: 23, + keys: :jiffy.encode([0, 2]) + ) + + rows = resp.body["rows"] + rows_key = Enum.map(rows, & &1["key"]) + assert rows_key == exp_key + + rows_value = Enum.map(rows, & &1["value"]) + assert rows_value == exp_val + end + + test "limit works", context do + db_name = context[:db_name] + + resp = view(db_name, "test/all_docs", [limit: 1], @keys) + rows = resp.body["rows"] + assert length(rows) == 1 + assert Enum.at(rows, 0)["key"] == 10 + + resp = view(db_name, "test/all_docs", limit: 1, keys: :jiffy.encode(@keys)) + rows = resp.body["rows"] + assert length(rows) == 1 + assert Enum.at(rows, 0)["key"] == 10 + end + + test "offset works", context do + db_name = context[:db_name] + + resp = view(db_name, "test/multi_emit", [skip: 1], [0]) + rows = resp.body["rows"] + assert length(rows) == 99 + + resp = view(db_name, "test/multi_emit", skip: 1, keys: :jiffy.encode([0])) + rows = resp.body["rows"] + assert length(rows) == 99 + end + + test "dir works", context do + db_name = context[:db_name] + + resp = view(db_name, "test/multi_emit", [descending: true], [1]) + rows = resp.body["rows"] + assert length(rows) == 100 + + resp = view(db_name, "test/multi_emit", descending: true, keys: :jiffy.encode([1])) + rows = resp.body["rows"] + assert length(rows) == 100 + end + + test "argument combinations", context do + db_name = context[:db_name] + + resp = view(db_name, "test/multi_emit", [descending: true, skip: 3, limit: 2], [2]) + rows = resp.body["rows"] + assert length(rows) == 2 + + resp = + view(db_name, "test/multi_emit", + descending: true, + skip: 3, + limit: 2, + keys: :jiffy.encode([2]) + ) + + rows = resp.body["rows"] + assert length(rows) == 2 + + resp = + view(db_name, "test/multi_emit", [skip: 0, limit: 1, startkey_docid: "13"], [0]) + + rows = resp.body["rows"] + assert length(rows) == 1 + assert Enum.at(rows, 0)["value"] == 13 + + resp = + view(db_name, "test/multi_emit", [skip: 2, limit: 3, startkey_docid: "13"], [0]) + + rows = resp.body["rows"] + assert length(rows) == 3 + + resp = + view(db_name, "test/multi_emit", + skip: 2, + limit: 3, + startkey_docid: "13", + keys: :jiffy.encode([0]) + ) + + rows = resp.body["rows"] + assert length(rows) == 3 + + resp = + view( + db_name, + "test/multi_emit", + [skip: 1, limit: 5, startkey_docid: "25", endkey_docid: "27"], + [1] + ) + + rows = resp.body["rows"] + assert length(rows) == 2 + assert Enum.at(rows, 0)["value"] == 26 or assert(Enum.at(rows, 0)["value"] == 27) + + resp = + view(db_name, "test/multi_emit", + skip: 1, + limit: 5, + startkey_docid: "25", + endkey_docid: "27", + keys: :jiffy.encode([1]) + ) + + rows = resp.body["rows"] + assert length(rows) == 2 + assert Enum.at(rows, 0)["value"] == 26 or assert(Enum.at(rows, 0)["value"] == 27) + + resp = + view( + db_name, + "test/multi_emit", + [skip: 1, limit: 5, startkey_docid: "28", endkey_docid: "26", descending: true], + [1] + ) + + rows = resp.body["rows"] + assert length(rows) == 2 + assert Enum.at(rows, 0)["value"] == 26 or assert(Enum.at(rows, 0)["value"] == 27) + + resp = + view(db_name, "test/multi_emit", + skip: 1, + limit: 5, + startkey_docid: "28", + endkey_docid: "26", + descending: true, + keys: :jiffy.encode([1]) + ) + + rows = resp.body["rows"] + assert length(rows) == 2 + end +end diff --git a/test/javascript/tests/view_multi_key_all_docs.js b/test/javascript/tests/view_multi_key_all_docs.js index 6704a0ffa..8969c88c9 100644 --- a/test/javascript/tests/view_multi_key_all_docs.js +++ b/test/javascript/tests/view_multi_key_all_docs.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_multi_key_all_docs = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); diff --git a/test/javascript/tests/view_multi_key_design.js b/test/javascript/tests/view_multi_key_design.js index a50d1fb9f..20e52a2d0 100644 --- a/test/javascript/tests/view_multi_key_design.js +++ b/test/javascript/tests/view_multi_key_design.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_multi_key_design = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); diff --git a/test/javascript/tests/view_multi_key_temp.js b/test/javascript/tests/view_multi_key_temp.js index 25bec4b31..2bed6e7bf 100644 --- a/test/javascript/tests/view_multi_key_temp.js +++ b/test/javascript/tests/view_multi_key_temp.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.skip = true; couchTests.view_multi_key_temp = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 822f2782ca3137fcbd710e3b87e95778fda5674b Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 23 Jul 2020 07:13:54 -0700 Subject: Fix 'first page should not return previous bookmark' test --- src/couch_views/src/couch_views_http.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/couch_views/src/couch_views_http.erl b/src/couch_views/src/couch_views_http.erl index 8e12b2476..2aa9e9e85 100644 --- a/src/couch_views/src/couch_views_http.erl +++ b/src/couch_views/src/couch_views_http.erl @@ -177,10 +177,14 @@ maybe_add_next_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) maybe_add_previous_bookmark(#mrargs{extra = Extra} = Args, #{rows := Rows} = Result, KeyFun) -> StartKey = couch_util:get_value(fk, Extra), - case first_key(KeyFun, Rows) of - undefined -> + case {StartKey, first_key(KeyFun, Rows)} of + {undefined, _} -> Result; - EndKey -> + {_, undefined} -> + Result; + {StartKey, StartKey} -> + Result; + {StartKey, EndKey} -> Bookmark = bookmark_encode( Args#mrargs{ start_key = StartKey, -- cgit v1.2.1 From ebe62b281f9ba30bd3643181ec0b6674bedd7c03 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 24 Jul 2020 15:24:40 +0100 Subject: Only call erlfdb:set if the node changes This removes spurious conflicts and allows concurrent writing to non-overlapping parts of the tree. --- src/ebtree/src/ebtree.erl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index cb5e80541..38a328947 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -534,7 +534,7 @@ insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> Node1 = Node0#node{ members = umerge(Tree, [{Key, Value}], Node0#node.members) }, - set_node(Tx, Tree, Node1), + set_node(Tx, Tree, Node0, Node1), reduce_node(Tree, Node1); insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> @@ -563,7 +563,7 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> members = lists:keyreplace(ChildId1, 3, Node1#node.members, {NewFirstKey, NewLastKey, ChildId1, NewReduction}) }, - set_node(Tx, Tree, Node2), + set_node(Tx, Tree, Node0, Node2), reduce_node(Tree, Node2). @@ -584,7 +584,7 @@ delete(Db, #tree{} = Tree, Key) -> clear_node(Tx, Tree, Root2), set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); Root1 -> - set_node(Tx, Tree, Root1) + set_node(Tx, Tree, Root0, Root1) end end), Tree. @@ -633,7 +633,7 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> set_nodes(Tx, Tree, NewNodes), Parent1; false -> - set_node(Tx, Tree, Child1), + set_node(Tx, Tree, Child0, Child1), {_OldFirstKey, _OldLastKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 3, Parent0#node.members), Parent0#node{ members = lists:keyreplace(ChildId0, 3, Parent0#node.members, @@ -756,6 +756,13 @@ set_nodes(Tx, #tree{} = Tree, Nodes) -> end, Nodes). +set_node(_Tx, #tree{} = _Tree, #node{} = Same, #node{} = Same) -> + ok; + +set_node(Tx, #tree{} = Tree, #node{} = _From, #node{} = To) -> + set_node(Tx, Tree, To). + + set_node(Tx, #tree{} = Tree, #node{} = Node) -> validate_node(Tree, Node), Key = node_key(Tree#tree.prefix, Node#node.id), -- cgit v1.2.1 From 90158eaadc6a67dd2d522121f8257fc409c8168b Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 24 Jul 2020 17:25:59 +0100 Subject: separate out collation wrapper to avoid spurious comparisons Ensure we only collate nodes, members and k/v's as intended. --- src/ebtree/src/ebtree.erl | 71 ++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 38a328947..f08e1e9be 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -505,8 +505,8 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> Parent1 = Parent0#node{ members = - umerge(Tree, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], - umerge(Tree, [{FirstRightKey, LastRightKey, RightId, RightReduction}], + umerge_members(Tree, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], + umerge_members(Tree, [{FirstRightKey, LastRightKey, RightId, RightReduction}], lists:keydelete(Child#node.id, 3, Parent0#node.members))) }, clear_node(Tx, Tree, Child), @@ -532,7 +532,7 @@ update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> Node1 = Node0#node{ - members = umerge(Tree, [{Key, Value}], Node0#node.members) + members = umerge_members(Tree, [{Key, Value}], Node0#node.members) }, set_node(Tx, Tree, Node0, Node1), reduce_node(Tree, Node1); @@ -557,8 +557,8 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> ChildId1 = Child1#node.id, NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), {CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members), - [NewFirstKey, _] = sort(Tree, [Key, CurrentFirstKey]), - [_, NewLastKey] = sort(Tree, [Key, CurrentLastKey]), + [NewFirstKey, _] = sort_keys(Tree, [Key, CurrentFirstKey]), + [_, NewLastKey] = sort_keys(Tree, [Key, CurrentLastKey]), Node2 = Node1#node{ members = lists:keyreplace(ChildId1, 3, Node1#node.members, {NewFirstKey, NewLastKey, ChildId1, NewReduction}) @@ -621,7 +621,7 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Members1 = lists:keydelete(ChildId0, 3, Members0), Members2 = lists:keydelete(Sibling#node.id, 3, Members1), Members3 = lists:foldl(fun(N, Acc) -> - umerge(Tree, [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) + umerge_members(Tree, [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) end, Members2, NewNodes), Parent1 = Parent0#node{ @@ -643,7 +643,7 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> - [Left, Right] = sort(Tree, [Node1, Node2]), + [Left, Right] = sort_nodes(Tree, [Node1, Node2]), #node{ id = new_node_id(Tx, Tree), @@ -655,7 +655,7 @@ merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = N rebalance(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> - [Left0, Right0] = sort(Tree, [Node1, Node2]), + [Left0, Right0] = sort_nodes(Tree, [Node1, Node2]), Members = lists:append(Left0#node.members, Right0#node.members), {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), @@ -805,8 +805,8 @@ validate_node(#tree{} = Tree, #node{} = Node) -> NumKeys = length(Node#node.members), IsLeaf = Node#node.level =:= 0, IsRoot = ?NODE_ROOT_ID == Node#node.id, - OutOfOrder = Node#node.members /= sort(Tree, Node#node.members), - Duplicates = Node#node.members /= usort(Tree, Node#node.members), + OutOfOrder = Node#node.members /= sort_members(Tree, Node#node.members), + Duplicates = Node#node.members /= usort_members(Tree, Node#node.members), if Node#node.id == undefined -> erlang:error({node_without_id, Node}); @@ -915,32 +915,51 @@ less_than_or_equal(#tree{} = Tree, A, B) -> CollateFun(A, B). -umerge(#tree{} = Tree, List1, List2) -> +umerge_members(#tree{} = Tree, List1, List2) -> #tree{collate_fun = CollateFun} = Tree, - lists:umerge(collation_wrapper_fun(CollateFun), List1, List2). + CollateWrapper = fun + ({K1, _V1}, {K2, _V2}) -> + CollateFun(K1, K2); + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> + CollateFun(L1, L2) + end, + lists:umerge(CollateWrapper, List1, List2). -sort(#tree{} = Tree, List) -> +sort_keys(#tree{} = Tree, List) -> #tree{collate_fun = CollateFun} = Tree, - lists:sort(collation_wrapper_fun(CollateFun), List). + lists:sort(CollateFun, List). -usort(#tree{} = Tree, List) -> +sort_nodes(#tree{} = Tree, List) -> #tree{collate_fun = CollateFun} = Tree, - lists:usort(collation_wrapper_fun(CollateFun), List). + CollateWrapper = fun + (#node{} = N1, #node{} = N2) -> + CollateFun(first_key(N1), first_key(N2)) + end, + lists:sort(CollateWrapper, List). -collation_wrapper_fun(CollateFun) -> - fun - (#node{} = N1, #node{} = N2) -> - CollateFun(first_key(N1), first_key(N2)); +sort_members(#tree{} = Tree, List) -> + #tree{collate_fun = CollateFun} = Tree, + CollateWrapper = fun ({K1, _V1}, {K2, _V2}) -> CollateFun(K1, K2); ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> - CollateFun(L1, L2); - (K1, K2) -> - CollateFun(K1, K2) - end. + CollateFun(L1, L2) + end, + lists:sort(CollateWrapper, List). + + +usort_members(#tree{} = Tree, List) -> + #tree{collate_fun = CollateFun} = Tree, + CollateWrapper = fun + ({K1, _V1}, {K2, _V2}) -> + CollateFun(K1, K2); + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> + CollateFun(L1, L2) + end, + lists:usort(CollateWrapper, List). collate_raw(K1, K2) -> @@ -1285,7 +1304,7 @@ custom_collation_range_test_() -> lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( fun(_) -> - [StartKey, EndKey] = sort(Tree, [rand:uniform(Max), rand:uniform(Max)]), + [StartKey, EndKey] = sort_keys(Tree, [rand:uniform(Max), rand:uniform(Max)]), Seq = if StartKey < EndKey -> lists:seq(StartKey, EndKey); @@ -1309,7 +1328,7 @@ custom_collation_reverse_range_test_() -> lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( fun(_) -> - [StartKey, EndKey] = sort(Tree, [rand:uniform(Max), rand:uniform(Max)]), + [StartKey, EndKey] = sort_keys(Tree, [rand:uniform(Max), rand:uniform(Max)]), Seq = if StartKey < EndKey -> lists:seq(StartKey, EndKey); -- cgit v1.2.1 From 0a444461920c2b95d9ef8ef3c6512ee37f9af351 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Fri, 24 Jul 2020 09:47:09 -0700 Subject: add get_active_job_ids and get_types We expose get_types in couch_jobs and also add get_active_jobs_ids to get the active job ids given a certain type. --- src/couch_jobs/src/couch_jobs.erl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index 88b4bf470..f6fb62664 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -19,6 +19,8 @@ remove/3, get_job_data/3, get_job_state/3, + get_active_jobs_ids/2, + get_types/1, % Job processing accept/1, @@ -104,6 +106,23 @@ get_job_state(Tx, Type, JobId) when is_binary(JobId) -> end). +-spec get_active_jobs_ids(jtx(), job_type()) -> [job_id()] | {error, + any()}. +get_active_jobs_ids(Tx, Type) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + Since = couch_jobs_fdb:get_active_since(JTx, Type, + {versionstamp, 0, 0}), + maps:keys(Since) + end). + + +-spec get_types(jtx()) -> [job_type()] | {error, any()}. +get_types(Tx) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:get_types(JTx) + end). + + %% Job processor API -spec accept(job_type()) -> {ok, job(), job_data()} | {error, any()}. -- cgit v1.2.1 From fd9557a9afd6831bd4271176937bab3b932f88d9 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Fri, 24 Jul 2020 09:51:29 -0700 Subject: add support for active_tasks via fabric2 Instead of relying on couch_task_status, we use fabric2_active_tasks to construct active_task info via couch_jobs. --- src/chttpd/src/chttpd_misc.erl | 7 ++--- src/fabric/src/fabric2_active_tasks.erl | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 src/fabric/src/fabric2_active_tasks.erl diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index 07d53714a..ec2435c41 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -294,11 +294,8 @@ dbs_info_callback({error, Reason}, #vacc{resp = Resp0} = Acc) -> handle_task_status_req(#httpd{method='GET'}=Req) -> ok = chttpd:verify_is_server_admin(Req), - {Replies, _BadNodes} = gen_server:multi_call(couch_task_status, all), - Response = lists:flatmap(fun({Node, Tasks}) -> - [{[{node,Node} | Task]} || Task <- Tasks] - end, Replies), - send_json(Req, lists:sort(Response)); + ActiveTasks = fabric2_active_tasks:get_active_tasks(), + send_json(Req, ActiveTasks); handle_task_status_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). diff --git a/src/fabric/src/fabric2_active_tasks.erl b/src/fabric/src/fabric2_active_tasks.erl new file mode 100644 index 000000000..2c03ec3a9 --- /dev/null +++ b/src/fabric/src/fabric2_active_tasks.erl @@ -0,0 +1,51 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(fabric2_active_tasks). + + +-export([ + get_active_tasks/0, + get_active_task_info/1, + + update_active_task_info/2 +]). + + +-define(ACTIVE_TASK_INFO, <<"active_task_info">>). + + +get_active_tasks() -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(undefined), fun(JTx) -> + Types = couch_jobs:get_types(JTx), + lists:foldl(fun(Type, TaskAcc) -> + JobIds = couch_jobs:get_active_jobs_ids(JTx, Type), + Tasks = lists:filtermap(fun(JobId) -> + {ok, Data} = couch_jobs:get_job_data(JTx, Type, JobId), + case maps:get(?ACTIVE_TASK_INFO, Data, not_found) of + not_found -> false; + Info -> {true, Info} + end + end, JobIds), + TaskAcc ++ Tasks + end, [], Types) + end). + + +get_active_task_info(JobData) -> + #{?ACTIVE_TASK_INFO:= ActiveTaskInfo} = JobData, + ActiveTaskInfo. + + +update_active_task_info(JobData, ActiveTaskInfo) -> + JobData#{?ACTIVE_TASK_INFO => ActiveTaskInfo}. -- cgit v1.2.1 From a447f074dbba1417eb902f181f8f15e0da2da856 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Fri, 24 Jul 2020 09:54:41 -0700 Subject: add active_tasks for view builds using version stamps Active Tasks requires TotalChanges and ChangesDone to show the progress of long running tasks. This requires count_changes_since to be implemented. Unfortunately, that is not easily done via with foundationdb. This commit replaces TotalChanges with the versionstamp + the number of docs as a progress indicator. This can possibly break existing api that relys on TotalChanges. ChangesDone will still exist, but instead of relying on the current changes seq it is simply a reflection of how many documents were written by the updater process. --- src/couch_views/src/couch_views_indexer.erl | 33 ++++- src/couch_views/src/couch_views_util.erl | 27 +++- .../test/couch_views_active_tasks_test.erl | 155 +++++++++++++++++++++ 3 files changed, 208 insertions(+), 7 deletions(-) create mode 100644 src/couch_views/test/couch_views_active_tasks_test.erl diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 31868d9c0..9183d982e 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -90,6 +90,7 @@ init() -> job => Job, job_data => Data, count => 0, + changes_done => 0, limiter => Limiter, doc_acc => [], design_opts => Mrst#mrst.design_opts @@ -132,7 +133,9 @@ upgrade_data(Data) -> true -> Acc; false -> maps:put(Key, Default, Acc) end - end, Data, Defaults). + end, Data, Defaults), + % initialize active task + fabric2_active_tasks:update_active_task_info(Data, #{}). % Transaction limit exceeded don't retry @@ -191,7 +194,8 @@ do_update(Db, Mrst0, State0) -> last_seq := LastSeq, limit := Limit, limiter := Limiter, - view_vs := ViewVS + view_vs := ViewVS, + changes_done := ChangesDone0 } = State2, DocAcc1 = fetch_docs(TxDb, DocAcc), couch_rate:in(Limiter, Count), @@ -199,13 +203,16 @@ do_update(Db, Mrst0, State0) -> {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), WrittenDocs = write_docs(TxDb, Mrst1, MappedDocs, State2), + ChangesDone = ChangesDone0 + WrittenDocs, + couch_rate:success(Limiter, WrittenDocs), case Count < Limit of true -> maybe_set_build_status(TxDb, Mrst1, ViewVS, ?INDEX_READY), - report_progress(State2, finished), + report_progress(State2#{changes_done := ChangesDone}, + finished), {Mrst1, finished}; false -> State3 = report_progress(State2, update), @@ -213,6 +220,7 @@ do_update(Db, Mrst0, State0) -> tx_db := undefined, count := 0, doc_acc := [], + changes_done := ChangesDone, view_seq := LastSeq }} end @@ -483,7 +491,9 @@ report_progress(State, UpdateType) -> tx_db := TxDb, job := Job1, job_data := JobData, - last_seq := LastSeq + last_seq := LastSeq, + db_seq := DBSeq, + changes_done := ChangesDone } = State, #{ @@ -494,9 +504,18 @@ report_progress(State, UpdateType) -> <<"retries">> := Retries } = JobData, + ActiveTasks = fabric2_active_tasks:get_active_task_info(JobData), + TotalDone = case maps:get(<<"changes_done">>, ActiveTasks, 0) of + 0 -> ChangesDone; + N -> N + ChangesDone + end, + + NewActiveTasks = couch_views_util:active_tasks_info(TotalDone, + DbName, DDocId, LastSeq, DBSeq), + % Reconstruct from scratch to remove any % possible existing error state. - NewData = #{ + NewData0 = #{ <<"db_name">> => DbName, <<"db_uuid">> => DbUUID, <<"ddoc_id">> => DDocId, @@ -504,6 +523,8 @@ report_progress(State, UpdateType) -> <<"view_seq">> => LastSeq, <<"retries">> => Retries }, + NewData = fabric2_active_tasks:update_active_task_info(NewData0, + NewActiveTasks), case UpdateType of update -> @@ -540,4 +561,4 @@ key_size_limit() -> value_size_limit() -> - config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). + config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). \ No newline at end of file diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl index 154e9e270..11bba75bd 100644 --- a/src/couch_views/src/couch_views_util.erl +++ b/src/couch_views/src/couch_views_util.erl @@ -17,7 +17,8 @@ ddoc_to_mrst/2, validate_args/1, validate_args/2, - is_paginated/1 + is_paginated/1, + active_tasks_info/5 ]). @@ -276,3 +277,27 @@ is_paginated(#mrargs{page_size = PageSize}) when is_integer(PageSize) -> is_paginated(_) -> false. + + +active_tasks_info(ChangesDone, DbName, DDocId, LastSeq, DBSeq) -> + #{ + <<"type">> => <<"indexer">>, + <<"database">> => DbName, + <<"changes_done">> => ChangesDone, + <<"design_document">> => DDocId, + <<"current_version_stamp">> => convert_seq_to_stamp(LastSeq), + <<"db_version_stamp">> => convert_seq_to_stamp(DBSeq) + }. + + +convert_seq_to_stamp(<<"0">>) -> + <<"0-0-0">>; + +convert_seq_to_stamp(undefined) -> + <<"0-0-0">>; + +convert_seq_to_stamp(Seq) -> + {_, Stamp, Batch, DocNumber} = fabric2_fdb:seq_to_vs(Seq), + VS = integer_to_list(Stamp) ++ "-" ++ integer_to_list(Batch) ++ "-" + ++ integer_to_list(DocNumber), + list_to_binary(VS). diff --git a/src/couch_views/test/couch_views_active_tasks_test.erl b/src/couch_views/test/couch_views_active_tasks_test.erl new file mode 100644 index 000000000..f87e01055 --- /dev/null +++ b/src/couch_views/test/couch_views_active_tasks_test.erl @@ -0,0 +1,155 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_active_tasks_test). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). +-define(INDEX_FOO, <<"_design/foo">>). +-define(INDEX_BAR, <<"_design/bar">>). +-define(TOTAL_DOCS, 1000). + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_ddoc(?INDEX_FOO, ?MAP_FUN1), + Docs = make_docs(?TOTAL_DOCS), + fabric2_db:update_docs(Db, [DDoc | Docs]), + + {Db, DDoc}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +active_tasks_test_() -> + { + "Active Tasks test", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(verify_basic_active_tasks), + ?TDEF_FE(verify_muliple_active_tasks) + ] + } + } + }. + + +verify_basic_active_tasks({Db, DDoc}) -> + pause_indexer_for_changes(self()), + couch_views:build_indices(Db, [DDoc]), + {IndexerPid, {changes_done, ChangesDone}} = wait_to_reach_changes(10000), + [ActiveTask] = fabric2_active_tasks:get_active_tasks(), + ChangesDone1 = maps:get(<<"changes_done">>, ActiveTask), + IndexerPid ! continue, + % we assume the indexer has run for a bit so it has to > 0 + ?assert(ChangesDone1 > 0), + ?assert(ChangesDone1 =< ChangesDone), + ?assertEqual(ChangesDone, ?TOTAL_DOCS). + + +verify_muliple_active_tasks({Db, DDoc}) -> + DDoc2 = create_ddoc(?INDEX_BAR, ?MAP_FUN2), + fabric2_db:update_doc(Db, DDoc2, []), + pause_indexer_for_changes(self()), + couch_views:build_indices(Db, [DDoc, DDoc2]), + + {IndexerPid, {changes_done, ChangesDone}} = wait_to_reach_changes(10000), + {IndexerPid2, {changes_done, ChangesDone2}} = wait_to_reach_changes(10000), + + ActiveTasks = fabric2_active_tasks:get_active_tasks(), + + ?assertEqual(length(ActiveTasks), 2), + + IndexerPid ! continue, + IndexerPid2 ! continue, + + ?assertEqual(ChangesDone, ?TOTAL_DOCS), + ?assertEqual(ChangesDone2, ?TOTAL_DOCS). + + +create_ddoc(DDocId, IndexName) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DDocId}, + {<<"views">>, {[ + {IndexName, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}} + ]}} + ]}). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +make_docs(Count) -> + [doc(I, Count) || I <- lists:seq(1, Count)]. + + +pause_indexer_for_changes(ParentPid) -> + meck:new(couch_views_util, [passthrough]), + meck:expect(couch_views_util, active_tasks_info, fun(ChangesDone, + DbName, DDocId, LastSeq, DBSeq) -> + case ChangesDone of + ?TOTAL_DOCS -> + ParentPid ! {self(), {changes_done, ChangesDone}}, + receive continue -> ok end; + _ -> + ok + end, + meck:passthrough([ChangesDone, DbName, DDocId, LastSeq, + DBSeq]) + end). + + +wait_to_reach_changes(Timeout) -> + receive + {Pid, {changes_done, ChangesDone}} when is_pid(Pid) -> + {Pid, {changes_done, ChangesDone}} + after Timeout -> + error(timeout_in_pause_indexer_for_changes) + end. -- cgit v1.2.1 From a817e601c359e5e2792f746a9e121ede949f8b3a Mon Sep 17 00:00:00 2001 From: Steven Tang Date: Sat, 25 Jul 2020 20:51:22 +1000 Subject: fix: finish_cluster failure due to missing uuid Resolves #2858 --- src/setup/src/setup.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/setup/src/setup.erl b/src/setup/src/setup.erl index 4867f6096..e681864c7 100644 --- a/src/setup/src/setup.erl +++ b/src/setup/src/setup.erl @@ -198,6 +198,9 @@ setup_node(NewCredentials, NewBindAddress, NodeCount, Port) -> finish_cluster(Options) -> + % ensure that uuid is set + couch_server:get_uuid(), + ok = wait_connected(), ok = sync_admins(), ok = sync_uuid(), -- cgit v1.2.1 From f011a669b444677242024cfa1005bdefee49343b Mon Sep 17 00:00:00 2001 From: Michal Borkowski Date: Mon, 27 Jul 2020 13:03:22 +0200 Subject: added $keyMapMatch Mango operator --- src/mango/src/mango_selector.erl | 32 ++++++++++++++++++++++++++++++++ src/mango/test/03-operator-test.py | 9 +++++++++ 2 files changed, 41 insertions(+) diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index e884dc55c..fc6a6d1a7 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -138,6 +138,11 @@ norm_ops({[{<<"$allMatch">>, {_}=Arg}]}) -> norm_ops({[{<<"$allMatch">>, Arg}]}) -> ?MANGO_ERROR({bad_arg, '$allMatch', Arg}); +norm_ops({[{<<"$keyMapMatch">>, {_}=Arg}]}) -> + {[{<<"$keyMapMatch">>, norm_ops(Arg)}]}; +norm_ops({[{<<"$keyMapMatch">>, Arg}]}) -> + ?MANGO_ERROR({bad_arg, '$keyMapMatch', Arg}); + norm_ops({[{<<"$size">>, Arg}]}) when is_integer(Arg), Arg >= 0 -> {[{<<"$size">>, Arg}]}; norm_ops({[{<<"$size">>, Arg}]}) -> @@ -253,6 +258,10 @@ norm_fields({[{<<"$allMatch">>, Arg}]}, Path) -> Cond = {[{<<"$allMatch">>, norm_fields(Arg)}]}, {[{Path, Cond}]}; +norm_fields({[{<<"$keyMapMatch">>, Arg}]}, Path) -> + Cond = {[{<<"$keyMapMatch">>, norm_fields(Arg)}]}, + {[{Path, Cond}]}; + % The text operator operates against the internal % $default field. This also asserts that the $default @@ -334,6 +343,9 @@ norm_negations({[{<<"$elemMatch">>, Arg}]}) -> norm_negations({[{<<"$allMatch">>, Arg}]}) -> {[{<<"$allMatch">>, norm_negations(Arg)}]}; +norm_negations({[{<<"$keyMapMatch">>, Arg}]}) -> + {[{<<"$keyMapMatch">>, norm_negations(Arg)}]}; + % All other conditions can't introduce negations anywhere % further down the operator tree. norm_negations(Cond) -> @@ -491,6 +503,26 @@ match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) -> match({[{<<"$allMatch">>, _Arg}]}, _Value, _Cmp) -> false; +% Matches when any key in the map value matches the +% sub-selector Arg. +match({[{<<"$keyMapMatch">>, Arg}]}, Value, Cmp) when is_tuple(Value) -> + try + lists:foreach(fun(V) -> + case match(Arg, V, Cmp) of + true -> throw(matched); + _ -> ok + end + end, [Key || {Key, _} <- element(1, Value)]), + false + catch + throw:matched -> + true; + _:_ -> + false + end; +match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, _Cmp) -> + false; + % Our comparison operators are fairly straight forward match({[{<<"$lt">>, Arg}]}, Value, Cmp) -> Cmp(Value, Arg) < 0; diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py index 935f470bb..a67ef91f3 100644 --- a/src/mango/test/03-operator-test.py +++ b/src/mango/test/03-operator-test.py @@ -66,6 +66,15 @@ class OperatorTests: docs = self.db.find({"emptybang": {"$allMatch": {"foo": {"$eq": 2}}}}) self.assertEqual(len(docs), 0) + def test_keymap_match(self): + amdocs = [ + {"foo": {"aa": "bar", "bb": "bang"}}, + {"foo": {"cc": "bar", "bb": "bang"}}, + ] + self.db.save_docs(amdocs, w=3) + docs = self.db.find({"foo": {"$keyMapMatch": {"$eq": "aa"}}}) + self.assertEqual(len(docs), 1) + def test_in_operator_array(self): docs = self.db.find({"manager": True, "favorites": {"$in": ["Ruby", "Python"]}}) self.assertUserIds([2, 6, 7, 9, 11, 12], docs) -- cgit v1.2.1 From 77e1c8c2bc93e07f2508ef4fb8dd7476ccd59425 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 28 Jul 2020 16:10:05 -0400 Subject: Use _scheduler/jobs instead of _active_tasks in replication Elixir tests After _active_tasks was implemented on FDB, single-node (previous) _active_tasks implementation, had stopped working. It turns out were were relying on it to run Elixir replication tests. To not lose test coverage, and before we implement replicator on FDB, switch the tests to use `_scheduler/jobs`. --- test/elixir/test/replication_test.exs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/test/elixir/test/replication_test.exs b/test/elixir/test/replication_test.exs index 78f36602d..8b657d916 100644 --- a/test/elixir/test/replication_test.exs +++ b/test/elixir/test/replication_test.exs @@ -127,7 +127,7 @@ defmodule ReplicationTest do task = get_task(repl_id, 3_000) assert is_map(task) - assert task["replication_id"] == repl_id + assert task["id"] == repl_id repl_body = %{ "replication_id" => repl_id, @@ -1749,8 +1749,13 @@ defmodule ReplicationTest do def wait_for_repl(src_db_name, repl_id, expect_revs_checked, wait_left) do task = get_task(repl_id, 0) - through_seq = task["through_seq"] || "0" - revs_checked = task["revisions_checked"] + info = if task["info"] == :null do + %{"through_seq" => "0", "revisions_checked" => "0"} + else + task["info"] + end + through_seq = info["through_seq"] || "0" + revs_checked = info["revisions_checked"] || "0" changes = get_db_changes(src_db_name, %{:since => through_seq}) if length(changes["results"]) > 0 or revs_checked < expect_revs_checked do @@ -1799,13 +1804,14 @@ defmodule ReplicationTest do end def try_get_task(repl_id) do - resp = Couch.get("/_active_tasks") - assert HTTPotion.Response.success?(resp) - assert is_list(resp.body) + resp = Couch.get("/_scheduler/jobs/#{repl_id}") - Enum.find(resp.body, nil, fn task -> - task["replication_id"] == repl_id - end) + if HTTPotion.Response.success?(resp) do + assert is_map(resp.body) + resp.body + else + nil + end end def set_user(uri, userinfo) do -- cgit v1.2.1 From 5a4da506cfb2cd78864f4dfe7320b94d8599f7f7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 28 Jul 2020 22:18:08 +0100 Subject: Replace the 'true' clauses in visit with more explicit ones --- src/ebtree/src/ebtree.erl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index f08e1e9be..228e1df44 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -241,15 +241,16 @@ full_reduce(Db, #tree{} = Tree) -> reduce(Db, #tree{} = Tree, StartKey, EndKey) -> Fun = fun ({visit, Key, Value}, {MapAcc, ReduceAcc}) -> + BeforeStart = less_than(Tree, Key, StartKey), AfterEnd = greater_than(Tree, Key, EndKey), InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), if + BeforeStart -> + {ok, {MapAcc, ReduceAcc}}; AfterEnd -> {stop, {MapAcc, ReduceAcc}}; InRange -> - {ok, {[{Key, Value} | MapAcc], ReduceAcc}}; - true -> - {ok, {MapAcc, ReduceAcc}} + {ok, {[{Key, Value} | MapAcc], ReduceAcc}} end; ({traverse, FirstKey, LastKey, Reduction}, {MapAcc, ReduceAcc}) -> BeforeStart = less_than(Tree, LastKey, StartKey), @@ -322,6 +323,10 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User KeyGroup = GroupKeyFun(Key), SameGroup = CurrentGroup =:= KeyGroup, if + Dir == fwd andalso BeforeStart -> + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso AfterEnd -> + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; Dir == fwd andalso AfterEnd -> {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; Dir == rev andalso BeforeStart -> @@ -333,9 +338,7 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User InRange -> %% implicit end of current group and start of a new one GroupValue = do_reduce(Tree, MapAcc, ReduceAcc), - {ok, {KeyGroup, UserAccFun({CurrentGroup, GroupValue}, UserAcc), [{Key, Value}], []}}; - true -> - {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} + {ok, {KeyGroup, UserAccFun({CurrentGroup, GroupValue}, UserAcc), [{Key, Value}], []}} end; ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> BeforeStart = less_than(Tree, LastKey, StartKey), -- cgit v1.2.1 From 79cb06c7bbcd2119b70fcc13a0387ee7c3e1399a Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 28 Jul 2020 20:27:50 +0100 Subject: Allow inclusive_start/end We also redefine the internal collation api for clarity. --- src/ebtree/src/ebtree.erl | 182 +++++++++++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 84 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 228e1df44..ceb78fbf5 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -125,14 +125,14 @@ lookup(Db, #tree{} = Tree, Key) -> ({visit, K, V}, _Acc) when K =:= Key -> {stop, {K, V}}; ({visit, K, _V}, Acc) -> - case greater_than(Tree, K, Key) of + case collate(Tree, K, Key, [gt]) of true -> {stop, Acc}; false -> {ok, Acc} end; ({traverse, F, L, _R}, Acc) -> - case {greater_than(Tree, F, Key), less_than_or_equal(Tree, Key, L)} of + case {collate(Tree, F, Key, [gt]), collate(Tree, Key, L, [lt, eq])} of {true, _} -> {stop, Acc}; {false, true} -> @@ -231,19 +231,29 @@ full_reduce(Db, #tree{} = Tree) -> do_reduce(Tree, MapValues, ReduceValues). +%% @equiv reduce(Db, Tree, StartKey, EndKey, []) +-spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term()) -> term(). +reduce(Db, #tree{} = Tree, StartKey, EndKey) -> + reduce(Db, Tree, StartKey, EndKey, []). + %% @doc Calculate the reduce value for all keys in the specified range. %% @param Db An erlfdb database or transaction. %% @param Tree The ebtree. %% @param StartKey The beginning of the range %% @param EndKey The end of the range %% @returns the reduce value for the specified range --spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term()) -> term(). -reduce(Db, #tree{} = Tree, StartKey, EndKey) -> +-spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), + EndKey :: term(), Options :: [reduce_option()]) -> term(). +reduce(Db, #tree{} = Tree, StartKey, EndKey, Options) -> + InclusiveStart = proplists:get_value(inclusive_start, Options, true), + InclusiveEnd = proplists:get_value(inclusive_end, Options, true), + Fun = fun ({visit, Key, Value}, {MapAcc, ReduceAcc}) -> - BeforeStart = less_than(Tree, Key, StartKey), - AfterEnd = greater_than(Tree, Key, EndKey), - InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), + BeforeStart = collate(Tree, Key, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, Key, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + InRange = collate(Tree, Key, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) + andalso collate(Tree, Key, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), if BeforeStart -> {ok, {MapAcc, ReduceAcc}}; @@ -253,9 +263,10 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey) -> {ok, {[{Key, Value} | MapAcc], ReduceAcc}} end; ({traverse, FirstKey, LastKey, Reduction}, {MapAcc, ReduceAcc}) -> - BeforeStart = less_than(Tree, LastKey, StartKey), - AfterEnd = greater_than(Tree, FirstKey, EndKey), - Whole = greater_than_or_equal(Tree, FirstKey, StartKey) andalso less_than_or_equal(Tree, LastKey, EndKey), + BeforeStart = collate(Tree, LastKey, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + Whole = collate(Tree, FirstKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) + andalso collate(Tree, LastKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), if BeforeStart -> {skip, {MapAcc, ReduceAcc}}; @@ -299,10 +310,13 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User %% @param GroupKeyFun A function that takes a key as a parameter and returns the group key. %% @param UserAccFun A function called when a new group reduction is calculated and returns an acc. %% @param UserAcc0 The initial accumulator. -%% @param Options Currently supported options are [{dir, fwd}] and [{dir, rev}] +%% @param Options Currently supported options are {dir, fwd | rev} +%% and {inclusive_start | inclusive_end, true | false} %% @returns the final accumulator. -type group_key() :: term(). +-type reduce_option() :: [{inclusive_start, boolean()} | {inclusive_end, boolean()}]. + -spec group_reduce( Db :: term(), Tree :: #tree{}, @@ -311,15 +325,19 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User GroupKeyFun :: fun((term()) -> group_key()), UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), UserAcc0 :: term(), - Options :: [fold_option()]) -> Acc1 :: term(). + Options :: [fold_option() | reduce_option()]) -> Acc1 :: term(). group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, Options) -> Dir = proplists:get_value(dir, Options, fwd), + InclusiveStart = proplists:get_value(inclusive_start, Options, true), + InclusiveEnd = proplists:get_value(inclusive_end, Options, true), NoGroupYet = ?MIN, Fun = fun ({visit, Key, Value}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> - BeforeStart = less_than(Tree, Key, StartKey), - AfterEnd = greater_than(Tree, Key, EndKey), - InRange = in_range(Tree, StartKey, Key, EndKey), + BeforeStart = collate(Tree, Key, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, Key, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + InRange = + collate(Tree, Key, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso + collate(Tree, Key, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), KeyGroup = GroupKeyFun(Key), SameGroup = CurrentGroup =:= KeyGroup, if @@ -341,11 +359,15 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User {ok, {KeyGroup, UserAccFun({CurrentGroup, GroupValue}, UserAcc), [{Key, Value}], []}} end; ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> - BeforeStart = less_than(Tree, LastKey, StartKey), - AfterEnd = greater_than(Tree, FirstKey, EndKey), + BeforeStart = collate(Tree, LastKey, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), Whole = CurrentGroup =:= GroupKeyFun(FirstKey) andalso CurrentGroup =:= GroupKeyFun(LastKey), - FirstInRange = in_range(Tree, StartKey, FirstKey, EndKey), - LastInRange = in_range(Tree, StartKey, LastKey, EndKey), + FirstInRange = + collate(Tree, FirstKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso + collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), + LastInRange = + collate(Tree, LastKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso + collate(Tree, LastKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), if Dir == fwd andalso BeforeStart -> {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; @@ -389,10 +411,10 @@ range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, - less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], + collate(Tree, StartKey, K, [lt, eq]), collate(Tree, K, EndKey, [lt, eq])], Acc1 = AccFun(InRange, Acc0), LastKey = last_key(Node), - case Node#node.next /= undefined andalso less_than_or_equal(Tree, LastKey, EndKey) of + case Node#node.next /= undefined andalso collate(Tree, LastKey, EndKey, [lt, eq]) of true -> range(Tx, Tree, get_node(Tx, Tree, Node#node.next), StartKey, EndKey, AccFun, Acc1); false -> @@ -422,10 +444,10 @@ reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, - less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], + collate(Tree, StartKey, K, [lt, eq]), collate(Tree, K, EndKey, [lt, eq])], Acc1 = AccFun(lists:reverse(InRange), Acc0), FirstKey = first_key(Node), - case Node#node.prev /= undefined andalso less_than_or_equal(Tree, StartKey, FirstKey) of + case Node#node.prev /= undefined andalso collate(Tree, StartKey, FirstKey, [lt, eq]) of true -> reverse_range(Tx, Tree, get_node(Tx, Tree, Node#node.prev), StartKey, EndKey, AccFun, Acc1); false -> @@ -698,7 +720,7 @@ find_child_int(#tree{} = _Tree, [Child], _Key) -> Child; find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> - case less_than_or_equal(Tree, Key, L) of + case collate(Tree, Key, L, [lt, eq]) of true -> Child; false -> @@ -879,94 +901,83 @@ reduce_values(#tree{} = Tree, Values, Rereduce) when is_list(Values) -> %% collation functions -in_range(#tree{} = Tree, StartOfRange, Key, EndOfRange) -> - greater_than_or_equal(Tree, Key, StartOfRange) andalso less_than_or_equal(Tree, Key, EndOfRange). - - -greater_than(#tree{} = Tree, A, B) -> - not less_than_or_equal(Tree, A, B). - - -greater_than_or_equal(#tree{} = _Tree, A, A) -> - true; - -greater_than_or_equal(#tree{} = Tree, A, B) -> - greater_than(Tree, A, B). - - -less_than(#tree{} = _Tree, A, A) -> - false; - -less_than(#tree{} = Tree, A, B) -> - less_than_or_equal(Tree, A, B). +collate(#tree{} = _Tree, ?MIN, _B) -> + lt; -less_than_or_equal(#tree{} = _Tree, ?MIN, _B) -> - true; +collate(#tree{} = _Tree, _A, ?MIN) -> + gt; -less_than_or_equal(#tree{} = _Tree, _A, ?MIN) -> - false; +collate(#tree{} = _Tree, ?MAX, _B) -> + gt; -less_than_or_equal(#tree{} = _Tree, ?MAX, _B) -> - false; +collate(#tree{} = _Tree, _A, ?MAX) -> + lt; -less_than_or_equal(#tree{} = _Tree, _A, ?MAX) -> - true; - -less_than_or_equal(#tree{} = Tree, A, B) -> +collate(#tree{} = Tree, A, B) -> #tree{collate_fun = CollateFun} = Tree, CollateFun(A, B). +collate(#tree{} = Tree, A, B, Allowed) -> + lists:member(collate(Tree, A, B), Allowed). + + umerge_members(#tree{} = Tree, List1, List2) -> - #tree{collate_fun = CollateFun} = Tree, CollateWrapper = fun ({K1, _V1}, {K2, _V2}) -> - CollateFun(K1, K2); + collate(Tree, K1, K2, [lt, eq]); ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> - CollateFun(L1, L2) + collate(Tree, L1, L2, [lt, eq]) end, lists:umerge(CollateWrapper, List1, List2). sort_keys(#tree{} = Tree, List) -> - #tree{collate_fun = CollateFun} = Tree, - lists:sort(CollateFun, List). + CollateWrapper = fun + (K1, K2) -> + collate(Tree, K1, K2, [lt, eq]) + end, + lists:sort(CollateWrapper, List). sort_nodes(#tree{} = Tree, List) -> - #tree{collate_fun = CollateFun} = Tree, CollateWrapper = fun (#node{} = N1, #node{} = N2) -> - CollateFun(first_key(N1), first_key(N2)) + collate(Tree, first_key(N1), first_key(N2), [lt, eq]) end, lists:sort(CollateWrapper, List). sort_members(#tree{} = Tree, List) -> - #tree{collate_fun = CollateFun} = Tree, CollateWrapper = fun ({K1, _V1}, {K2, _V2}) -> - CollateFun(K1, K2); + collate(Tree, K1, K2, [lt, eq]); ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> - CollateFun(L1, L2) + collate(Tree, L1, L2, [lt, eq]) end, lists:sort(CollateWrapper, List). usort_members(#tree{} = Tree, List) -> - #tree{collate_fun = CollateFun} = Tree, CollateWrapper = fun ({K1, _V1}, {K2, _V2}) -> - CollateFun(K1, K2); + collate(Tree, K1, K2, [lt, eq]); ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> - CollateFun(L1, L2) + collate(Tree, L1, L2, [lt, eq]) end, lists:usort(CollateWrapper, List). -collate_raw(K1, K2) -> - K1 =< K2. +collate_raw(A, B) when A < B -> + lt; + +collate_raw(A, B) when A > B -> + gt; + +collate_raw(A, A) -> + eq. + %% encoding function @@ -1071,16 +1082,9 @@ reduce_stats(Rs, true) -> collation_fun_test_() -> Tree = #tree{collate_fun = fun collate_raw/2}, [ - ?_test(?assert(greater_than(Tree, 4, 3))), - ?_test(?assertNot(greater_than(Tree, 3, 4))), - ?_test(?assert(greater_than_or_equal(Tree, 3, 3))), - ?_test(?assert(greater_than_or_equal(Tree, 3, 3))), - ?_test(?assert(less_than(Tree, 3, 4))), - ?_test(?assertNot(less_than(Tree, 3, 3))), - ?_test(?assertNot(less_than(Tree, 4, 3))), - ?_test(?assert(less_than_or_equal(Tree, 3, 3))), - ?_test(?assert(less_than_or_equal(Tree, 3, 4))), - ?_test(?assertNot(less_than_or_equal(Tree, 4, 3))) + ?_test(?assertEqual(gt, collate(Tree, 4, 3))), + ?_test(?assertEqual(lt, collate(Tree, 3, 4))), + ?_test(?assertEqual(eq, collate(Tree, 3, 3))) ]. @@ -1152,7 +1156,13 @@ count_reduce_test_() -> ?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), ?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), ?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), - ?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))) + ?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))), + ?_test(?assertEqual(Expected(6, 7), reduce(Db, Tree, 5, 7, + [{inclusive_start, false}]))), + ?_test(?assertEqual(Expected(5, 6), reduce(Db, Tree, 5, 7, + [{inclusive_end, false}]))), + ?_test(?assertEqual(Expected(6, 6), reduce(Db, Tree, 5, 7, + [{inclusive_start, false}, {inclusive_end, false}]))) ]. sum_reduce_test_() -> @@ -1224,7 +1234,11 @@ group_reduce_int_test_() -> ?_test(?assertEqual([{null, 100}], group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))), ?_test(?assertEqual([{null, 99}], group_reduce(Db, Tree, 2, ebtree:max(), GroupKeyFun, UserAccFun, []))), - ?_test(?assertEqual([{null, 96}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, []))) + ?_test(?assertEqual([{null, 96}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{null, 95}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, [], [{inclusive_start, false}]))), + ?_test(?assertEqual([{null, 95}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, [], [{inclusive_end, false}]))), + ?_test(?assertEqual([{null, 94}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, [], + [{inclusive_start, false}, {inclusive_end, false}]))) ]. @@ -1238,7 +1252,7 @@ raw_collation_test() -> custom_collation_test() -> Db = erlfdb_util:get_test_db([empty]), - CollateFun = fun(A, B) -> B =< A end, + CollateFun = fun(A, B) -> collate_raw(B, A) end, Tree = open(Db, <<1,2,3>>, 4, [{collate_fun, CollateFun}]), insert(Db, Tree, 1, 1), insert(Db, Tree, 2, 2), @@ -1302,7 +1316,7 @@ custom_collation_range_test_() -> Db = erlfdb_util:get_test_db([empty]), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - CollateFun = fun(A, B) -> B =< A end, + CollateFun = fun(A, B) -> collate_raw(B, A) end, Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( @@ -1326,7 +1340,7 @@ custom_collation_reverse_range_test_() -> Db = erlfdb_util:get_test_db([empty]), Max = 1000, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - CollateFun = fun(A, B) -> B =< A end, + CollateFun = fun(A, B) -> collate_raw(B, A) end, Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( -- cgit v1.2.1 From f8fdf9721e2ac932022065bc075301641568d67c Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 29 Jul 2020 21:15:01 +0100 Subject: Call collate for group equality --- src/ebtree/src/ebtree.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index ceb78fbf5..bae0ff310 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -339,7 +339,7 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User collate(Tree, Key, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso collate(Tree, Key, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), KeyGroup = GroupKeyFun(Key), - SameGroup = CurrentGroup =:= KeyGroup, + SameGroup = collate(Tree, CurrentGroup, KeyGroup, [eq]), if Dir == fwd andalso BeforeStart -> {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; @@ -361,7 +361,9 @@ group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, User ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> BeforeStart = collate(Tree, LastKey, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), AfterEnd = collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), - Whole = CurrentGroup =:= GroupKeyFun(FirstKey) andalso CurrentGroup =:= GroupKeyFun(LastKey), + Whole = + collate(Tree, CurrentGroup, GroupKeyFun(FirstKey), [eq]) andalso + collate(Tree, CurrentGroup, GroupKeyFun(LastKey), [eq]), FirstInRange = collate(Tree, FirstKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), -- cgit v1.2.1 From e1b4259a9e0714fb78f740ee763dd12f84bbca1a Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Mon, 20 Jul 2020 11:37:30 -0700 Subject: Strip last_msg from logs --- src/couch_log/src/couch_log_config.erl | 11 +- src/couch_log/src/couch_log_config_dyn.erl | 3 +- src/couch_log/src/couch_log_formatter.erl | 24 ++++- src/couch_log/src/couch_log_sup.erl | 2 + src/couch_log/test/eunit/couch_log_config_test.erl | 37 ++++++- .../test/eunit/couch_log_formatter_test.erl | 114 ++++++++++++++++++++- 6 files changed, 179 insertions(+), 12 deletions(-) diff --git a/src/couch_log/src/couch_log_config.erl b/src/couch_log/src/couch_log_config.erl index 766d068a4..a7a469cc3 100644 --- a/src/couch_log/src/couch_log_config.erl +++ b/src/couch_log/src/couch_log_config.erl @@ -49,7 +49,8 @@ entries() -> [ {level, "level", "info"}, {level_int, "level", "info"}, - {max_message_size, "max_message_size", "16000"} + {max_message_size, "max_message_size", "16000"}, + {strip_last_msg, "strip_last_msg", "true"} ]. @@ -97,4 +98,10 @@ transform(max_message_size, SizeStr) -> Size -> Size catch _:_ -> 16000 - end. \ No newline at end of file + end; + +transform(strip_last_msg, "false") -> + false; + +transform(strip_last_msg, _) -> + true. \ No newline at end of file diff --git a/src/couch_log/src/couch_log_config_dyn.erl b/src/couch_log/src/couch_log_config_dyn.erl index f7541f61f..b39dcf2f5 100644 --- a/src/couch_log/src/couch_log_config_dyn.erl +++ b/src/couch_log/src/couch_log_config_dyn.erl @@ -25,4 +25,5 @@ get(level) -> info; get(level_int) -> 2; -get(max_message_size) -> 16000. +get(max_message_size) -> 16000; +get(strip_last_msg) -> true. diff --git a/src/couch_log/src/couch_log_formatter.erl b/src/couch_log/src/couch_log_formatter.erl index 4d81f184f..26997a8a6 100644 --- a/src/couch_log/src/couch_log_formatter.erl +++ b/src/couch_log/src/couch_log_formatter.erl @@ -68,7 +68,13 @@ format(Event) -> do_format({error, _GL, {Pid, "** Generic server " ++ _, Args}}) -> %% gen_server terminate - [Name, LastMsg, State, Reason | Extra] = Args, + [Name, LastMsg0, State, Reason | Extra] = Args, + LastMsg = case couch_log_config:get(strip_last_msg) of + true -> + redacted; + false -> + LastMsg0 + end, MsgFmt = "gen_server ~w terminated with reason: ~s~n" ++ " last msg: ~p~n state: ~p~n extra: ~p", MsgArgs = [Name, format_reason(Reason), LastMsg, State, Extra], @@ -76,7 +82,13 @@ do_format({error, _GL, {Pid, "** Generic server " ++ _, Args}}) -> do_format({error, _GL, {Pid, "** State machine " ++ _, Args}}) -> %% gen_fsm terminate - [Name, LastMsg, StateName, State, Reason | Extra] = Args, + [Name, LastMsg0, StateName, State, Reason | Extra] = Args, + LastMsg = case couch_log_config:get(strip_last_msg) of + true -> + redacted; + false -> + LastMsg0 + end, MsgFmt = "gen_fsm ~w in state ~w terminated with reason: ~s~n" ++ " last msg: ~p~n state: ~p~n extra: ~p", MsgArgs = [Name, StateName, format_reason(Reason), LastMsg, State, Extra], @@ -84,7 +96,13 @@ do_format({error, _GL, {Pid, "** State machine " ++ _, Args}}) -> do_format({error, _GL, {Pid, "** gen_event handler" ++ _, Args}}) -> %% gen_event handler terminate - [ID, Name, LastMsg, State, Reason] = Args, + [ID, Name, LastMsg0, State, Reason] = Args, + LastMsg = case couch_log_config:get(strip_last_msg) of + true -> + redacted; + false -> + LastMsg0 + end, MsgFmt = "gen_event ~w installed in ~w terminated with reason: ~s~n" ++ " last msg: ~p~n state: ~p", MsgArgs = [ID, Name, format_reason(Reason), LastMsg, State], diff --git a/src/couch_log/src/couch_log_sup.erl b/src/couch_log/src/couch_log_sup.erl index 6219a36e9..fc1ac7812 100644 --- a/src/couch_log/src/couch_log_sup.erl +++ b/src/couch_log/src/couch_log_sup.erl @@ -63,6 +63,8 @@ handle_config_change("log", Key, _, _, S) -> couch_log_config:reconfigure(); "max_message_size" -> couch_log_config:reconfigure(); + "strip_last_msg" -> + couch_log_config:reconfigure(); _ -> % Someone may have changed the config for % the writer so we need to re-initialize. diff --git a/src/couch_log/test/eunit/couch_log_config_test.erl b/src/couch_log/test/eunit/couch_log_config_test.erl index c4677f37f..a4c4bcff2 100644 --- a/src/couch_log/test/eunit/couch_log_config_test.erl +++ b/src/couch_log/test/eunit/couch_log_config_test.erl @@ -25,7 +25,9 @@ couch_log_config_test_() -> fun check_level/0, fun check_max_message_size/0, fun check_bad_level/0, - fun check_bad_max_message_size/0 + fun check_bad_max_message_size/0, + fun check_strip_last_msg/0, + fun check_bad_strip_last_msg/0 ] }. @@ -108,3 +110,36 @@ check_bad_max_message_size() -> couch_log_test_util:wait_for_config(), ?assertEqual(16000, couch_log_config:get(max_message_size)) end). + + +check_strip_last_msg() -> + % Default is true + ?assertEqual(true, couch_log_config:get(strip_last_msg)), + + couch_log_test_util:with_config_listener(fun() -> + config:set("log", "strip_last_msg", "false"), + couch_log_test_util:wait_for_config(), + ?assertEqual(false, couch_log_config:get(strip_last_msg)), + + config:delete("log", "strip_last_msg"), + couch_log_test_util:wait_for_config(), + ?assertEqual(true, couch_log_config:get(strip_last_msg)) + end). + +check_bad_strip_last_msg() -> + % Default is true + ?assertEqual(true, couch_log_config:get(strip_last_msg)), + + couch_log_test_util:with_config_listener(fun() -> + config:set("log", "strip_last_msg", "false"), + couch_log_test_util:wait_for_config(), + ?assertEqual(false, couch_log_config:get(strip_last_msg)), + + config:set("log", "strip_last_msg", "this is not a boolean"), + couch_log_test_util:wait_for_config(), + ?assertEqual(true, couch_log_config:get(strip_last_msg)), + + config:delete("log", "strip_last_msg"), + couch_log_test_util:wait_for_config(), + ?assertEqual(true, couch_log_config:get(strip_last_msg)) + end). diff --git a/src/couch_log/test/eunit/couch_log_formatter_test.erl b/src/couch_log/test/eunit/couch_log_formatter_test.erl index 795efcf29..24de346c6 100644 --- a/src/couch_log/test/eunit/couch_log_formatter_test.erl +++ b/src/couch_log/test/eunit/couch_log_formatter_test.erl @@ -81,7 +81,7 @@ gen_server_error_test() -> do_matches(do_format(Event), [ "gen_server a_gen_server terminated", "with reason: some_reason", - "last msg: {foo,bar}", + "last msg: redacted", "state: server_state", "extra: \\[\\]" ]). @@ -108,7 +108,7 @@ gen_server_error_with_extra_args_test() -> do_matches(do_format(Event), [ "gen_server a_gen_server terminated", "with reason: some_reason", - "last msg: {foo,bar}", + "last msg: redacted", "state: server_state", "extra: \\[sad,args\\]" ]). @@ -135,7 +135,7 @@ gen_fsm_error_test() -> do_matches(do_format(Event), [ "gen_fsm a_gen_fsm in state state_name", "with reason: barf", - "last msg: {ohai,there}", + "last msg: redacted", "state: curr_state", "extra: \\[\\]" ]). @@ -162,7 +162,7 @@ gen_fsm_error_with_extra_args_test() -> do_matches(do_format(Event), [ "gen_fsm a_gen_fsm in state state_name", "with reason: barf", - "last msg: {ohai,there}", + "last msg: redacted", "state: curr_state", "extra: \\[sad,args\\]" ]). @@ -195,7 +195,7 @@ gen_event_error_test() -> do_matches(do_format(Event), [ "gen_event handler_id installed in a_gen_event", "reason: barf", - "last msg: {ohai,there}", + "last msg: redacted", "state: curr_state" ]). @@ -850,6 +850,110 @@ coverage_test() -> }) ). +gen_server_error_with_last_msg_test() -> + Pid = self(), + Event = { + error, + erlang:group_leader(), + { + Pid, + "** Generic server and some stuff", + [a_gen_server, {foo, bar}, server_state, some_reason] + } + }, + ?assertMatch( + #log_entry{ + level = error, + pid = Pid + }, + do_format(Event) + ), + with_last(fun() -> + do_matches(do_format(Event), [ + "gen_server a_gen_server terminated", + "with reason: some_reason", + "last msg: {foo,bar}", + "state: server_state", + "extra: \\[\\]" + ]) + end). + +gen_event_error_with_last_msg_test() -> + Pid = self(), + Event = { + error, + erlang:group_leader(), + { + Pid, + "** gen_event handler did a thing", + [ + handler_id, + a_gen_event, + {ohai,there}, + curr_state, + barf + ] + } + }, + ?assertMatch( + #log_entry{ + level = error, + pid = Pid + }, + do_format(Event) + ), + with_last(fun() -> + do_matches(do_format(Event), [ + "gen_event handler_id installed in a_gen_event", + "reason: barf", + "last msg: {ohai,there}", + "state: curr_state" + ]) + end). + + +gen_fsm_error_with_last_msg_test() -> + Pid = self(), + Event = { + error, + erlang:group_leader(), + { + Pid, + "** State machine did a thing", + [a_gen_fsm, {ohai,there}, state_name, curr_state, barf] + } + }, + ?assertMatch( + #log_entry{ + level = error, + pid = Pid + }, + do_format(Event) + ), + with_last(fun() -> + do_matches(do_format(Event), [ + "gen_fsm a_gen_fsm in state state_name", + "with reason: barf", + "last msg: {ohai,there}", + "state: curr_state", + "extra: \\[\\]" + ]) + end). + + +with_last(Fun) -> + meck:new(couch_log_config_dyn, [passthrough]), + try + meck:expect(couch_log_config_dyn, get, fun(Case) -> + case Case of + strip_last_msg -> false; + Case -> meck:passthrough([Case]) + end + end), + Fun() + after + meck:unload(couch_log_config_dyn) + end. do_format(Event) -> E = couch_log_formatter:format(Event), -- cgit v1.2.1 From 97e7a95c3c25c1ece7db25ee87e6476ba0a11282 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 23 Jul 2020 08:05:12 -0700 Subject: Add format_status/2 callback in gen_server implementations --- src/couch/include/couch_db.hrl | 3 +++ src/couch/src/couch_lru.erl | 5 ++++- src/couch/src/couch_multidb_changes.erl | 14 +++++++++++- src/couch/src/couch_native_process.erl | 17 +++++++++++++- src/couch/src/couch_proc_manager.erl | 16 ++++++++++++- src/couch/src/couch_server.erl | 6 ++++- src/couch/src/couch_stream.erl | 16 ++++++++++++- src/couch/src/couch_work_queue.erl | 25 ++++++++++++++++----- src/couch_index/src/couch_index.erl | 19 +++++++++++++++- src/couch_jobs/src/couch_jobs_notifier.erl | 22 +++++++++++++++++- src/couch_js/src/couch_js_native_process.erl | 18 ++++++++++++++- src/couch_js/src/couch_js_proc_manager.erl | 16 ++++++++++++- src/couch_log/src/couch_log_config.erl | 2 +- src/couch_mrview/src/couch_mrview_index.erl | 12 ++++++++++ src/couch_peruser/src/couch_peruser.erl | 13 ++++++++++- .../src/couch_replicator_auth_session.erl | 2 +- .../src/couch_replicator_httpc_pool.erl | 14 +++++++++++- src/couch_stats/src/couch_stats_aggregator.erl | 17 +++++++++++++- src/couch_views/src/couch_views_server.erl | 17 +++++++++++++- src/ddoc_cache/src/ddoc_cache_entry.erl | 21 ++++++++++++++++- src/dreyfus/src/dreyfus_index.erl | 26 +++++++++++++++++++++- src/fabric/src/fabric2_txids.erl | 15 ++++++++++++- src/global_changes/src/global_changes_server.erl | 11 +++++++-- src/ken/src/ken_server.erl | 16 ++++++++++++- 24 files changed, 316 insertions(+), 27 deletions(-) diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl index 830b9bcf4..cc1fb5def 100644 --- a/src/couch/include/couch_db.hrl +++ b/src/couch/include/couch_db.hrl @@ -219,3 +219,6 @@ -type sec_props() :: [tuple()]. -type sec_obj() :: {sec_props()}. + +-define(record_to_keyval(Name, Record), + lists:zip(record_info(fields, Name), tl(tuple_to_list(Record)))). diff --git a/src/couch/src/couch_lru.erl b/src/couch/src/couch_lru.erl index 6ad7c65cd..a3057136f 100644 --- a/src/couch/src/couch_lru.erl +++ b/src/couch/src/couch_lru.erl @@ -11,13 +11,16 @@ % the License. -module(couch_lru). --export([new/0, insert/2, update/2, close/1]). +-export([new/0, sizes/1, insert/2, update/2, close/1]). -include("couch_server_int.hrl"). new() -> {gb_trees:empty(), dict:new()}. +sizes({Tree, Dict}) -> + {gb_trees:size(Tree), dict:size(Dict)}. + insert(DbName, {Tree0, Dict0}) -> Lru = couch_util:unique_monotonic_integer(), {gb_trees:insert(Lru, DbName, Tree0), dict:store(DbName, Lru, Dict0)}. diff --git a/src/couch/src/couch_multidb_changes.erl b/src/couch/src/couch_multidb_changes.erl index e2bbda3e3..09278656e 100644 --- a/src/couch/src/couch_multidb_changes.erl +++ b/src/couch/src/couch_multidb_changes.erl @@ -24,7 +24,8 @@ handle_call/3, handle_info/2, handle_cast/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -174,6 +175,17 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #state{ + pids=Pids + } = State, + Scrubbed = State#state{ + pids={length, length(Pids)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + % Private functions -spec register_with_event_server(pid()) -> reference(). diff --git a/src/couch/src/couch_native_process.erl b/src/couch/src/couch_native_process.erl index eee8b2860..0a228d4c5 100644 --- a/src/couch/src/couch_native_process.erl +++ b/src/couch/src/couch_native_process.erl @@ -42,7 +42,7 @@ -vsn(1). -export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, - handle_info/2]). + handle_info/2,format_status/2]). -export([set_timeout/2, prompt/2]). -define(STATE, native_proc_state). @@ -125,6 +125,21 @@ handle_info({'EXIT',_,Reason}, State) -> terminate(_Reason, _State) -> ok. code_change(_OldVersion, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #evstate{ + ddocs = DDocs, + funs = Functions, + query_config = QueryConfig + } = State, + Scrubbed = State#evstate{ + ddocs = {dict_size, dict:size(DDocs)}, + funs = {length, length(Functions)}, + query_config = {length, length(QueryConfig)} + }, + [{data, [{"State", + ?record_to_keyval(evstate, Scrubbed) + }]}]. + run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> Pid ! {self(), list_row, Row}, receive diff --git a/src/couch/src/couch_proc_manager.erl b/src/couch/src/couch_proc_manager.erl index 376e12e74..b83d78882 100644 --- a/src/couch/src/couch_proc_manager.erl +++ b/src/couch/src/couch_proc_manager.erl @@ -31,7 +31,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -268,6 +269,19 @@ handle_info(_Msg, State) -> code_change(_OldVsn, #state{}=State, _Extra) -> {ok, State}. + +format_status(_Opt, [_PDict, State]) -> + #state{ + counts=Counts + } = State, + Scrubbed = State#state{ + counts={dict_size, dict:size(Counts)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + + handle_config_terminate(_, stop, _) -> ok; handle_config_terminate(_Server, _Reason, _State) -> diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index 18fa3fe61..f8de56b78 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -18,7 +18,7 @@ -export([open/2,create/2,delete/2,get_version/0,get_version/1,get_git_sha/0,get_uuid/0]). -export([all_databases/0, all_databases/2]). -export([init/1, handle_call/3,sup_start_link/0]). --export([handle_cast/2,code_change/3,handle_info/2,terminate/2]). +-export([handle_cast/2,code_change/3,handle_info/2,terminate/2,format_status/2]). -export([dev_start/0,is_admin/2,has_admins/0,get_stats/0]). -export([close_lru/0]). -export([close_db_if_idle/1]). @@ -288,6 +288,10 @@ terminate(Reason, Srv) -> end, nil, couch_dbs), ok. +format_status(_Opt, [_PDict, Srv]) -> + Scrubbed = Srv#server{lru=couch_lru:sizes(Srv#server.lru)}, + [{data, [{"State", ?record_to_keyval(server, Scrubbed)}]}]. + handle_config_change("couchdb", "database_dir", _, _, _) -> exit(whereis(couch_server), config_change), remove_handler; diff --git a/src/couch/src/couch_stream.erl b/src/couch/src/couch_stream.erl index 2ab46d7e7..d8b7e0ffe 100644 --- a/src/couch/src/couch_stream.erl +++ b/src/couch/src/couch_stream.erl @@ -36,7 +36,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). @@ -294,6 +295,19 @@ handle_info(_Info, State) -> {noreply, State}. +format_status(_Opt, [_PDict, Stream]) -> + #stream{ + written_pointers=Pointers, + buffer_list = Buffer + } = Stream, + Scrubbed = Stream#stream{ + written_pointers={length, length(Pointers)}, + buffer_list = {length, length(Buffer)} + }, + [{data, [{"State", + ?record_to_keyval(stream, Scrubbed) + }]}]. + do_seek({Engine, EngineState}, Offset) -> {ok, NewState} = Engine:seek(EngineState, Offset), {Engine, NewState}. diff --git a/src/couch/src/couch_work_queue.erl b/src/couch/src/couch_work_queue.erl index 5d747de82..01271bb35 100644 --- a/src/couch/src/couch_work_queue.erl +++ b/src/couch/src/couch_work_queue.erl @@ -21,7 +21,7 @@ % gen_server callbacks -export([init/1, terminate/2]). --export([handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([handle_call/3, handle_cast/2, code_change/3, handle_info/2, format_status/2]). -record(q, { queue = queue:new(), @@ -49,7 +49,7 @@ queue(Wq, Item) -> dequeue(Wq) -> dequeue(Wq, all). - + dequeue(Wq, MaxItems) -> try gen_server:call(Wq, {dequeue, MaxItems}, infinity) @@ -76,7 +76,7 @@ size(Wq) -> close(Wq) -> gen_server:cast(Wq, close). - + init(Options) -> Q = #q{ @@ -90,7 +90,7 @@ init(Options) -> terminate(_Reason, #q{work_waiters=Workers}) -> lists:foreach(fun({W, _}) -> gen_server:reply(W, closed) end, Workers). - + handle_call({queue, Item, Size}, From, #q{work_waiters = []} = Q0) -> Q = Q0#q{size = Q0#q.size + Size, items = Q0#q.items + 1, @@ -172,7 +172,7 @@ dequeue_items(NumItems, Size, Queue, Blocked, DequeuedAcc) -> end, dequeue_items( NumItems - 1, Size - ItemSize, Queue2, Blocked2, [Item | DequeuedAcc]). - + handle_cast(close, #q{items = 0} = Q) -> {stop, normal, Q}; @@ -186,3 +186,18 @@ code_change(_OldVsn, State, _Extra) -> handle_info(X, Q) -> {stop, X, Q}. + +format_status(_Opt, [_PDict, Queue]) -> + #q{ + queue = Q, + blocked = Blocked, + work_waiters = Waiters + } = Queue, + Scrubbed = Queue#q{ + queue = {queue_length, queue:len(Q)}, + blocked = {length, length(Blocked)}, + work_waiters = {length, length(Waiters)} + }, + [{data, [{"State", + ?record_to_keyval(q, Scrubbed) + }]}]. diff --git a/src/couch_index/src/couch_index.erl b/src/couch_index/src/couch_index.erl index cfe0d9e4f..09bd48c61 100644 --- a/src/couch_index/src/couch_index.erl +++ b/src/couch_index/src/couch_index.erl @@ -23,7 +23,7 @@ -export([compact/1, compact/2, get_compactor_pid/1]). %% gen_server callbacks --export([init/1, terminate/2, code_change/3]). +-export([init/1, terminate/2, code_change/3, format_status/2]). -export([handle_call/3, handle_cast/2, handle_info/2]). @@ -375,6 +375,23 @@ handle_info({'DOWN', _, _, _Pid, _}, #st{mod=Mod, idx_state=IdxState}=State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(Opt, [PDict, State]) -> + #st{ + mod = Mod, + waiters = Waiters, + idx_state = IdxState + } = State, + Scrubbed = State#st{waiters = {length, length(Waiters)}}, + IdxSafeState = case erlang:function_exported(Mod, format_status, 2) of + true -> + Mod:format_status(Opt, [PDict, IdxState]); + false -> + [] + end, + [{data, [{"State", + ?record_to_keyval(st, Scrubbed) ++ IdxSafeState + }]}]. + maybe_restart_updater(#st{waiters=[]}) -> ok; maybe_restart_updater(#st{idx_state=IdxState}=State) -> diff --git a/src/couch_jobs/src/couch_jobs_notifier.erl b/src/couch_jobs/src/couch_jobs_notifier.erl index ff4492bc5..99581cb79 100644 --- a/src/couch_jobs/src/couch_jobs_notifier.erl +++ b/src/couch_jobs/src/couch_jobs_notifier.erl @@ -27,7 +27,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). @@ -135,6 +136,25 @@ code_change(_OldVsn, St, _Extra) -> {ok, St}. +format_status(_Opt, [_PDict, State]) -> + #st{ + jtx=JTx, + type=Type, + monitor_pid=MonitorPid, + subs=Subs, + pidmap=PidMap, + refmap=RefMap + } = State, + [{data, [{"State", [ + {jtx, JTx}, + {type, Type}, + {monitor_pid, MonitorPid}, + {subs, {map_size, maps:size(Subs)}}, + {pidmap, {map_size, maps:size(PidMap)}}, + {refmap, {map_size, maps:size(RefMap)}} + ]}]}]. + + update_subs(JobId, Refs, #st{subs = Subs} = St) when map_size(Refs) =:= 0 -> St#st{subs = maps:remove(JobId, Subs)}; diff --git a/src/couch_js/src/couch_js_native_process.erl b/src/couch_js/src/couch_js_native_process.erl index d2c4c1ee0..d5ed3f94f 100644 --- a/src/couch_js/src/couch_js_native_process.erl +++ b/src/couch_js/src/couch_js_native_process.erl @@ -42,7 +42,7 @@ -vsn(1). -export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, - handle_info/2]). + handle_info/2,format_status/2]). -export([set_timeout/2, prompt/2]). -define(STATE, native_proc_state). @@ -125,6 +125,22 @@ handle_info({'EXIT',_,Reason}, State) -> terminate(_Reason, _State) -> ok. code_change(_OldVersion, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #evstate{ + ddocs = DDocs, + funs = Funs, + query_config = Config + } = State, + Scrubbed = State#evstate{ + ddocs = {dict_size, dict:size(DDocs)}, + funs = {length, length(Funs)}, + query_config = {length, length(Config)} + }, + [{data, [{"State", + ?record_to_keyval(evstate, Scrubbed) + }]}]. + + run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> Pid ! {self(), list_row, Row}, receive diff --git a/src/couch_js/src/couch_js_proc_manager.erl b/src/couch_js/src/couch_js_proc_manager.erl index 45f173668..db5c492f5 100644 --- a/src/couch_js/src/couch_js_proc_manager.erl +++ b/src/couch_js/src/couch_js_proc_manager.erl @@ -30,7 +30,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -267,6 +268,19 @@ handle_info(_Msg, State) -> code_change(_OldVsn, #state{}=State, _Extra) -> {ok, State}. + +format_status(_Opt, [_PDict, State]) -> + #state{ + counts = Counts + } = State, + Scrubbed = State#state{ + counts = {dict_size, dict:size(Counts)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + + handle_config_terminate(_, stop, _) -> ok; handle_config_terminate(_Server, _Reason, _State) -> diff --git a/src/couch_log/src/couch_log_config.erl b/src/couch_log/src/couch_log_config.erl index a7a469cc3..ab076cc69 100644 --- a/src/couch_log/src/couch_log_config.erl +++ b/src/couch_log/src/couch_log_config.erl @@ -104,4 +104,4 @@ transform(strip_last_msg, "false") -> false; transform(strip_last_msg, _) -> - true. \ No newline at end of file + true. diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index cc013c5bd..6ae7874c9 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -20,6 +20,7 @@ -export([index_file_exists/1]). -export([update_local_purge_doc/2, verify_index_exists/2]). -export([ensure_local_purge_docs/2]). +-export([format_status/2]). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). @@ -324,3 +325,14 @@ update_local_purge_doc(Db, State, PSeq) -> BaseDoc end, couch_db:update_doc(Db, Doc, []). + +format_status(_Opt, [_PDict, State]) -> + Scrubbed = State#mrst{ + lib = nil, + views = nil, + id_btree = nil, + doc_acc = nil, + doc_queue = nil, + write_queue = nil + }, + ?record_to_keyval(mrst, Scrubbed). diff --git a/src/couch_peruser/src/couch_peruser.erl b/src/couch_peruser/src/couch_peruser.erl index 886fb4f6e..4c06e8f27 100644 --- a/src/couch_peruser/src/couch_peruser.erl +++ b/src/couch_peruser/src/couch_peruser.erl @@ -19,7 +19,7 @@ % gen_server callbacks -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). + terminate/2, code_change/3, format_status/2]). -export([init_changes_handler/1, changes_handler/3]). @@ -410,3 +410,14 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. + + format_status(_Opt, [_PDict, State]) -> + #state{ + states = States + } = State, + Scrubbed = State#state{ + states = {length, length(States)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. \ No newline at end of file diff --git a/src/couch_replicator/src/couch_replicator_auth_session.erl b/src/couch_replicator/src/couch_replicator_auth_session.erl index 30f499a33..a59c770b4 100644 --- a/src/couch_replicator/src/couch_replicator_auth_session.erl +++ b/src/couch_replicator/src/couch_replicator_auth_session.erl @@ -187,7 +187,7 @@ format_status(_Opt, [_PDict, State]) -> [ {epoch, State#state.epoch}, {user, State#state.user}, - {session_url, State#state.session_url}, + {session_url, couch_util:url_strip_password(State#state.session_url)}, {refresh_tstamp, State#state.refresh_tstamp} ]. diff --git a/src/couch_replicator/src/couch_replicator_httpc_pool.erl b/src/couch_replicator/src/couch_replicator_httpc_pool.erl index 90234a6a0..c63a5efa6 100644 --- a/src/couch_replicator/src/couch_replicator_httpc_pool.erl +++ b/src/couch_replicator/src/couch_replicator_httpc_pool.erl @@ -20,7 +20,7 @@ % gen_server API -export([init/1, handle_call/3, handle_info/2, handle_cast/2]). --export([code_change/3, terminate/2]). +-export([code_change/3, terminate/2, format_status/2]). -include_lib("couch/include/couch_db.hrl"). @@ -145,6 +145,18 @@ code_change(_OldVsn, #state{}=State, _Extra) -> terminate(_Reason, _State) -> ok. +format_status(_Opt, [_PDict, State]) -> + #state{ + url = Url, + proxy_url = ProxyURL, + limit = Limit + } = State, + {[ + {url, couch_util:url_strip_password(Url)}, + {proxy_url, couch_util:url_strip_password(ProxyURL)}, + {limit, Limit} + ]}. + monitor_client(Callers, Worker, {ClientPid, _}) -> [{Worker, erlang:monitor(process, ClientPid)} | Callers]. diff --git a/src/couch_stats/src/couch_stats_aggregator.erl b/src/couch_stats/src/couch_stats_aggregator.erl index 0416636c9..8d8cdf7e5 100644 --- a/src/couch_stats/src/couch_stats_aggregator.erl +++ b/src/couch_stats/src/couch_stats_aggregator.erl @@ -27,7 +27,8 @@ handle_cast/2, handle_info/2, code_change/3, - terminate/2 + terminate/2, + format_status/2 ]). @@ -88,6 +89,20 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #st{ + descriptions=Descs, + stats=Stats, + collect_timer=CollectT, + reload_timer=ReloadT + } = State, + [{data, [{"State", [ + {descriptions, {set_size, sets:size(Descs)}}, + {stats, {length, length(Stats)}}, + {collect_timer,CollectT}, + {reload_timer,ReloadT} + ]}]}]. + comparison_set(Metrics) -> sets:from_list( [{Name, proplists:get_value(type, Props)} || {Name, Props} <- Metrics] diff --git a/src/couch_views/src/couch_views_server.erl b/src/couch_views/src/couch_views_server.erl index e45a9f315..71a4abb8d 100644 --- a/src/couch_views/src/couch_views_server.erl +++ b/src/couch_views/src/couch_views_server.erl @@ -30,7 +30,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -define(MAX_ACCEPTORS, 5). @@ -108,6 +109,20 @@ code_change(_OldVsn, St, _Extra) -> {ok, St}. +format_status(_Opt, [_PDict, State]) -> + #{ + workers := Workers, + acceptors := Acceptors + } = State, + Scrubbed = State#{ + workers => {map_size, maps:size(Workers)}, + acceptors => {map_size, maps:size(Acceptors)} + }, + [{data, [{"State", + Scrubbed + }]}]. + + % Worker process exit handlers handle_acceptor_exit(#{acceptors := Acceptors} = St, Pid, Reason) -> diff --git a/src/ddoc_cache/src/ddoc_cache_entry.erl b/src/ddoc_cache/src/ddoc_cache_entry.erl index 4cc3d7e52..ed0311bbd 100644 --- a/src/ddoc_cache/src/ddoc_cache_entry.erl +++ b/src/ddoc_cache/src/ddoc_cache_entry.erl @@ -34,7 +34,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -282,6 +283,24 @@ code_change(_, St, _) -> {ok, St}. +format_status(_Opt, [_PDict, State]) -> + #st{ + key = Key, + val = Val, + opener = Opener, + waiters = Waiters, + ts = TS, + accessed = Accepted + } = State, + [{data, [{"State", [ + {key, Key}, + {val, Val}, + {opener, Opener}, + {waiters, {length, length(Waiters)}}, + {ts, TS}, + {accessed, Accepted} + ]}]}]. + spawn_opener(Key) -> {Pid, _} = erlang:spawn_monitor(?MODULE, do_open, [Key]), Pid. diff --git a/src/dreyfus/src/dreyfus_index.erl b/src/dreyfus/src/dreyfus_index.erl index 2bf560f37..7236eb16b 100644 --- a/src/dreyfus/src/dreyfus_index.erl +++ b/src/dreyfus/src/dreyfus_index.erl @@ -29,7 +29,7 @@ % gen_server api. -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3]). + code_change/3, format_status/2]). % private definitions. -record(state, { @@ -244,6 +244,30 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, #state{index = #index{} = Index} = State]) -> + #index{ + ddoc_id=Id, + name=IndexName, + sig=Sig + } = Index, + IndexScrubbed = [{ + {ddoc_id, Id}, + {name, IndexName}, + {sig, Sig} + }], + Scrubbed = State#state{ + index = IndexScrubbed, + waiting_list = {length, length(State#state.waiting_list)} + }, + ?record_to_keyval(state, Scrubbed); + +format_status(_Opt, [_PDict, #state{} = State]) -> + Scrubbed = State#state{ + index = nil, + waiting_list = {length, length(State#state.waiting_list)} + }, + ?record_to_keyval(state, Scrubbed). + % private functions. open_index(DbName, #index{analyzer=Analyzer, sig=Sig}) -> diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl index 046a7484a..285e342ed 100644 --- a/src/fabric/src/fabric2_txids.erl +++ b/src/fabric/src/fabric2_txids.erl @@ -28,7 +28,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). @@ -110,6 +111,18 @@ code_change(_OldVsn, St, _Extra) -> {ok, St}. +format_status(_Opt, [_PDict, State]) -> + #{ + txids := TxIds + } = State, + Scrubbed = State#{ + txids => {length, length(TxIds)} + }, + [{data, [{"State", + Scrubbed + }]}]. + + clean(St, NeedsSweep) -> #{ last_sweep := LastSweep, diff --git a/src/global_changes/src/global_changes_server.erl b/src/global_changes/src/global_changes_server.erl index 7e3062586..a116e0668 100644 --- a/src/global_changes/src/global_changes_server.erl +++ b/src/global_changes/src/global_changes_server.erl @@ -25,7 +25,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -143,7 +144,13 @@ handle_info(_, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - +format_status(_Opt, [_PDict, State]) -> + Scrubbed = State#state{ + pending_updates=nil + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. flush_updates(State) -> DocIds = sets:to_list(State#state.pending_updates), diff --git a/src/ken/src/ken_server.erl b/src/ken/src/ken_server.erl index b33d01f35..74c8e25ac 100644 --- a/src/ken/src/ken_server.erl +++ b/src/ken/src/ken_server.erl @@ -16,7 +16,9 @@ -behaviour(gen_server). -vsn(1). -export([init/1, terminate/2]). --export([handle_call/3, handle_cast/2, handle_info/2, code_change/3]). +-export([ + handle_call/3, handle_cast/2, handle_info/2, code_change/3,format_status/2 +]). % Public interface -export([start_link/0]). @@ -228,6 +230,18 @@ handle_info(Msg, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. + +format_status(_Opt, [_PDict, State]) -> + #state{ + q = Queue + } = State, + Scrubbed = State#state{ + q = {queue_length, queue:len(Queue)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + %% private functions maybe_start_next_queued_job(#state{dbworker = {_,_}} = State) -> -- cgit v1.2.1 From 52d532727a3cc7b94516e9ba4b0e1c7ba447fd3c Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 23 Jul 2020 08:05:50 -0700 Subject: Do not log sensitive data during _cluster_setup --- src/setup/src/setup_httpd.erl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/setup/src/setup_httpd.erl b/src/setup/src/setup_httpd.erl index 949675b6a..48b1b2a5a 100644 --- a/src/setup/src/setup_httpd.erl +++ b/src/setup/src/setup_httpd.erl @@ -19,7 +19,7 @@ handle_setup_req(#httpd{method='POST'}=Req) -> ok = chttpd:verify_is_server_admin(Req), couch_httpd:validate_ctype(Req, "application/json"), Setup = get_body(Req), - couch_log:notice("Setup: ~p~n", [Setup]), + couch_log:notice("Setup: ~p~n", [remove_sensitive(Setup)]), Action = binary_to_list(couch_util:get_value(<<"action">>, Setup, <<"missing">>)), case handle_action(Action, Setup) of ok -> @@ -91,7 +91,7 @@ handle_action("enable_cluster", Setup) -> handle_action("finish_cluster", Setup) -> - couch_log:notice("finish_cluster: ~p~n", [Setup]), + couch_log:notice("finish_cluster: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {ensure_dbs_exist, <<"ensure_dbs_exist">>} @@ -105,7 +105,7 @@ handle_action("finish_cluster", Setup) -> end; handle_action("enable_single_node", Setup) -> - couch_log:notice("enable_single_node: ~p~n", [Setup]), + couch_log:notice("enable_single_node: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {ensure_dbs_exist, <<"ensure_dbs_exist">>}, @@ -125,7 +125,7 @@ handle_action("enable_single_node", Setup) -> handle_action("add_node", Setup) -> - couch_log:notice("add_node: ~p~n", [Setup]), + couch_log:notice("add_node: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {username, <<"username">>}, @@ -147,10 +147,10 @@ handle_action("add_node", Setup) -> end; handle_action("remove_node", Setup) -> - couch_log:notice("remove_node: ~p~n", [Setup]); + couch_log:notice("remove_node: ~p~n", [remove_sensitive(Setup)]); handle_action("receive_cookie", Setup) -> - couch_log:notice("receive_cookie: ~p~n", [Setup]), + couch_log:notice("receive_cookie: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {cookie, <<"cookie">>} ], Setup), @@ -173,3 +173,8 @@ get_body(Req) -> couch_log:notice("Body Fail: ~p~n", [Else]), couch_httpd:send_error(Req, 400, <<"bad_request">>, <<"Missing JSON body'">>) end. + +remove_sensitive(KVList0) -> + KVList1 = lists:keyreplace(<<"username">>, 1, KVList0, {<<"username">>, <<"****">>}), + KVList2 = lists:keyreplace(<<"password">>, 1, KVList1, {<<"password">>, <<"****">>}), + KVList2. \ No newline at end of file -- cgit v1.2.1 From 8360026459ac969325357aabc3ecb9850365d589 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Wed, 29 Jul 2020 07:11:06 -0700 Subject: Do not log admin credentials --- rebar.config.script | 2 +- src/chttpd/src/chttpd_node.erl | 4 +++- src/setup/src/setup.erl | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/rebar.config.script b/rebar.config.script index f8a24163f..007724088 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -151,7 +151,7 @@ SubDirs = [ DepDescs = [ %% Independent Apps -{config, "config", {tag, "2.1.7"}}, +{config, "config", {tag, "2.1.8"}}, {b64url, "b64url", {tag, "1.0.2"}}, {erlfdb, "erlfdb", {tag, "v1.2.2"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl index 033abd68d..1ca4bbd5e 100644 --- a/src/chttpd/src/chttpd_node.erl +++ b/src/chttpd/src/chttpd_node.erl @@ -70,7 +70,9 @@ handle_node_req(#httpd{method='PUT', path_parts=[_, Node, <<"_config">>, Section Value = couch_util:trim(chttpd:json_body(Req)), Persist = chttpd:header_value(Req, "X-Couch-Persist") /= "false", OldValue = call_node(Node, config, get, [Section, Key, ""]), - case call_node(Node, config, set, [Section, Key, ?b2l(Value), Persist]) of + IsSensitive = Section == <<"admins">>, + Opts = #{persisit => Persist, sensitive => IsSensitive}, + case call_node(Node, config, set, [Section, Key, ?b2l(Value), Opts]) of ok -> send_json(Req, 200, list_to_binary(OldValue)); {error, Reason} -> diff --git a/src/setup/src/setup.erl b/src/setup/src/setup.erl index 3d23229b8..c748cbcdc 100644 --- a/src/setup/src/setup.erl +++ b/src/setup/src/setup.erl @@ -165,7 +165,7 @@ enable_cluster_int(Options, false) -> couch_log:debug("Enable Cluster: ~p~n", [Options]). set_admin(Username, Password) -> - config:set("admins", binary_to_list(Username), binary_to_list(Password)). + config:set("admins", binary_to_list(Username), binary_to_list(Password), #{sensitive => true}). setup_node(NewCredentials, NewBindAddress, NodeCount, Port) -> case NewCredentials of -- cgit v1.2.1 From e4555a42e35448cd0978e21d440735bd2fd5c185 Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Thu, 30 Jul 2020 05:19:49 -0700 Subject: Update config app --- rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config.script b/rebar.config.script index 007724088..963d97fb1 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -166,7 +166,7 @@ DepDescs = [ %% Third party deps {folsom, "folsom", {tag, "CouchDB-0.8.3"}}, {hyper, "hyper", {tag, "CouchDB-2.2.0-6"}}, -{ibrowse, "ibrowse", {tag, "CouchDB-4.0.1-1"}}, +{ibrowse, "ibrowse", {tag, "CouchDB-4.0.1-2"}}, {jaeger_passage, "jaeger-passage", {tag, "CouchDB-0.1.14-1"}}, {jiffy, "jiffy", {tag, "CouchDB-1.0.4-1"}}, {local, "local", {tag, "0.2.1"}}, -- cgit v1.2.1 From 8b49b0d63485afe68d559f3532be4827eee5bd8c Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 30 Jul 2020 17:21:25 -0400 Subject: Allow interactive requests to reopen a re-created db instance Previously, if a database was re-created on another node, a request with that database might have found the previous db instance in the cache. In that case it would have correctly reopened the db while in a transaction, but, because the old db instance was deleted it would throw a database_does_not_exist which was not the correct behavior. To prevent that from happening, introduce an interactive = true|false option when opening a database. User requests may specify that option and then when the db is re-opened, it will allow it to automatically upgrade to the new db instance instead returning an error. Background processes will still get a database_doest_not_exist error if they keep a db open which has now been re-created. The interactive option may also be used in the future to set other transaction parameters like timeouts and retries that might be different for interactive requests vs background tasks. --- src/chttpd/src/chttpd_db.erl | 3 +- src/fabric/src/fabric2_db.erl | 8 +++- src/fabric/src/fabric2_fdb.erl | 24 ++++++++---- src/fabric/src/fabric2_server.erl | 3 +- src/fabric/test/fabric2_db_crud_tests.erl | 28 ++++++++++++++ src/fabric/test/fabric2_db_security_tests.erl | 55 +++++++++++++++++++++------ 6 files changed, 99 insertions(+), 22 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index fdaf4af8c..8acccb461 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -395,7 +395,8 @@ delete_db_req(#httpd{user_ctx=Ctx}=Req, DbName) -> end. do_db_req(#httpd{path_parts=[DbName|_], user_ctx=Ctx}=Req, Fun) -> - {ok, Db} = fabric2_db:open(DbName, [{user_ctx, Ctx}]), + Options = [{user_ctx, Ctx}, {interactive, true}], + {ok, Db} = fabric2_db:open(DbName, Options), Fun(Req, Db). db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 667cf35c6..4ac105589 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -201,7 +201,8 @@ open(DbName, Options) -> case fabric2_server:fetch(DbName, UUID) of #{} = Db -> Db1 = maybe_set_user_ctx(Db, Options), - {ok, require_member_check(Db1)}; + Db2 = maybe_set_interactive(Db1, Options), + {ok, require_member_check(Db2)}; undefined -> Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> fabric2_fdb:open(TxDb, Options) @@ -1426,6 +1427,11 @@ get_all_docs_meta(TxDb, Options) -> end ++ [{total, DocCount}, {offset, null}]. +maybe_set_interactive(#{} = Db, Options) -> + Interactive = fabric2_util:get_value(interactive, Options, false), + Db#{interactive := Interactive}. + + maybe_set_user_ctx(Db, Options) -> case fabric2_util:get_value(user_ctx, Options) of #user_ctx{} = UserCtx -> diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index f721ca4ab..52303cef1 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -243,7 +243,8 @@ create(#{} = Db0, Options) -> after_doc_read => undefined, % All other db things as we add features, - db_options => Options1 + db_options => Options1, + interactive => false }, aegis:init_db(Db2, Options). @@ -270,6 +271,9 @@ open(#{} = Db0, Options) -> UUID = fabric2_util:get_value(uuid, Options1), Options2 = lists:keydelete(uuid, 1, Options1), + Interactive = fabric2_util:get_value(interactive, Options2, false), + Options3 = lists:keydelete(interactive, 1, Options2), + Db2 = Db1#{ db_prefix => DbPrefix, db_version => DbVersion, @@ -287,7 +291,8 @@ open(#{} = Db0, Options) -> before_doc_update => undefined, after_doc_read => undefined, - db_options => Options2 + db_options => Options3, + interactive => Interactive }, Db3 = load_config(Db2), @@ -318,7 +323,8 @@ refresh(#{tx := undefined, name := DbName} = Db) -> #{md_version := Ver} = Db1 when Ver > OldVer -> Db1#{ user_ctx := maps:get(user_ctx, Db), - security_fun := maps:get(security_fun, Db) + security_fun := maps:get(security_fun, Db), + interactive := maps:get(interactive, Db) }; _ -> Db @@ -337,18 +343,20 @@ reopen(#{} = OldDb) -> uuid := UUID, db_options := Options, user_ctx := UserCtx, - security_fun := SecurityFun + security_fun := SecurityFun, + interactive := Interactive } = OldDb, Options1 = lists:keystore(user_ctx, 1, Options, {user_ctx, UserCtx}), NewDb = open(init_db(Tx, DbName, Options1), Options1), % Check if database was re-created - case maps:get(uuid, NewDb) of - UUID -> ok; - _OtherUUID -> error(database_does_not_exist) + case {Interactive, maps:get(uuid, NewDb)} of + {true, _} -> ok; + {false, UUID} -> ok; + {false, _OtherUUID} -> error(database_does_not_exist) end, - NewDb#{security_fun := SecurityFun}. + NewDb#{security_fun := SecurityFun, interactive := Interactive}. delete(#{} = Db) -> diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl index b557da8c7..be674b10e 100644 --- a/src/fabric/src/fabric2_server.erl +++ b/src/fabric/src/fabric2_server.erl @@ -271,5 +271,6 @@ sanitize(#{} = Db) -> Db#{ tx := undefined, user_ctx := #user_ctx{}, - security_fun := undefined + security_fun := undefined, + interactive := false }. diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 000f3709c..3d90c65b5 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -38,6 +38,8 @@ crud_test_() -> ?TDEF_FE(open_db), ?TDEF_FE(delete_db), ?TDEF_FE(recreate_db), + ?TDEF_FE(recreate_db_interactive), + ?TDEF_FE(recreate_db_non_interactive), ?TDEF_FE(undelete_db), ?TDEF_FE(remove_deleted_db), ?TDEF_FE(scheduled_remove_deleted_db, 15), @@ -179,6 +181,32 @@ recreate_db(_) -> ?assertError(database_does_not_exist, fabric2_db:open(DbName, BadOpts)). +recreate_db_interactive(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db1} = fabric2_db:open(DbName, [{interactive, true}]), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db1)). + + +recreate_db_non_interactive(_) -> + % This is also the default case, but we check that parsing the `false` open + % value works correctly. + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db1} = fabric2_db:open(DbName, [{interactive, false}]), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db1)). + + undelete_db(_) -> DbName = ?tempdb(), ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl index 063979a3f..3d7167a00 100644 --- a/src/fabric/test/fabric2_db_security_tests.erl +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -40,7 +40,8 @@ security_test_() -> ?TDEF(check_set_user_ctx), ?TDEF(check_forbidden), ?TDEF(check_fail_no_opts), - ?TDEF(check_fail_name_null) + ?TDEF(check_fail_name_null), + ?TDEF(check_forbidden_with_interactive_reopen) ]) } }. @@ -51,6 +52,18 @@ setup() -> DbName = ?tempdb(), PubDbName = ?tempdb(), {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ok = set_test_security(Db1), + {ok, _} = fabric2_db:create(PubDbName, [?ADMIN_CTX]), + {DbName, PubDbName, Ctx}. + + +cleanup({DbName, PubDbName, Ctx}) -> + ok = fabric2_db:delete(DbName, []), + ok = fabric2_db:delete(PubDbName, []), + test_util:stop_couch(Ctx). + + +set_test_security(Db) -> SecProps = {[ {<<"admins">>, {[ {<<"names">>, [<<"admin_name1">>, <<"admin_name2">>]}, @@ -61,16 +74,7 @@ setup() -> {<<"roles">>, [<<"member_role1">>, <<"member_role2">>]} ]}} ]}, - ok = fabric2_db:set_security(Db1, SecProps), - {ok, _} = fabric2_db:create(PubDbName, [?ADMIN_CTX]), - {DbName, PubDbName, Ctx}. - - -cleanup({DbName, PubDbName, Ctx}) -> - ok = fabric2_db:delete(DbName, []), - ok = fabric2_db:delete(PubDbName, []), - test_util:stop_couch(Ctx). - + ok = fabric2_db:set_security(Db, SecProps). check_is_admin({DbName, _, _}) -> @@ -184,3 +188,32 @@ check_fail_name_null({DbName, _, _}) -> UserCtx = #user_ctx{name = null}, {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). + + +check_forbidden_with_interactive_reopen({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"foo">>}, + Options = [{user_ctx, UserCtx}, {interactive, true}], + + {ok, Db1} = fabric2_db:open(DbName, Options), + + % Verify foo is forbidden by default + ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db1)), + + % Allow foo + {ok, Db2} = fabric2_db:open(DbName, [?ADMIN_CTX]), + AllowFoo = {[ + {<<"members">>, {[ + {<<"names">>, [<<"foo">>]} + ]}} + ]}, + ok = fabric2_db:set_security(Db2, AllowFoo), + + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db1)), + + % Recreate test db instance with the default security + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]), + {ok, Db3} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ok = set_test_security(Db3), + + % Original handle is forbidden to again + ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db1)). -- cgit v1.2.1 From f43f78a9f9b2a157c2443fea596178c7dedb7e2c Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Sat, 1 Aug 2020 23:10:46 -0400 Subject: Windows: provide full path to epmd --- rel/files/couchdb.cmd.in | 1 + 1 file changed, 1 insertion(+) diff --git a/rel/files/couchdb.cmd.in b/rel/files/couchdb.cmd.in index df9944196..244803bc8 100644 --- a/rel/files/couchdb.cmd.in +++ b/rel/files/couchdb.cmd.in @@ -31,6 +31,7 @@ IF NOT DEFINED COUCHDB_FAUXTON_DOCROOT SET COUCHDB_FAUXTON_DOCROOT={{fauxton_roo "%BINDIR%\erl" -boot "%ROOTDIR%\releases\%APP_VSN%\couchdb" ^ -args_file "%ROOTDIR%\etc\vm.args" ^ +-epmd "%BINDIR%\epmd.exe" ^ -config "%ROOTDIR%\releases\%APP_VSN%\sys.config" %* :: EXIT /B -- cgit v1.2.1 From bd536782778ff9bfd7ed2d7022915942389151b6 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 4 Aug 2020 12:18:40 +0100 Subject: Optionally add a key manager application as a dependency --- src/aegis/src/aegis.app.src | 34 ------------------------ src/aegis/src/aegis.app.src.script | 53 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 34 deletions(-) delete mode 100644 src/aegis/src/aegis.app.src create mode 100644 src/aegis/src/aegis.app.src.script diff --git a/src/aegis/src/aegis.app.src b/src/aegis/src/aegis.app.src deleted file mode 100644 index deb152674..000000000 --- a/src/aegis/src/aegis.app.src +++ /dev/null @@ -1,34 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - -{application, aegis, - [ - {description, "If it's good enough for Zeus, it's good enough for CouchDB"}, - {vsn, git}, - {mod, {aegis_app, []}}, - {registered, [ - aegis_server - ]}, - {applications, - [kernel, - stdlib, - crypto, - couch_log, - erlfdb - ]}, - {env,[]}, - {modules, []}, - {maintainers, []}, - {licenses, []}, - {links, []} - ] -}. diff --git a/src/aegis/src/aegis.app.src.script b/src/aegis/src/aegis.app.src.script new file mode 100644 index 000000000..f54688cf2 --- /dev/null +++ b/src/aegis/src/aegis.app.src.script @@ -0,0 +1,53 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +CouchConfig = case filelib:is_file(os:getenv("COUCHDB_CONFIG")) of + true -> + {ok, Result} = file:consult(os:getenv("COUCHDB_CONFIG")), + Result; + false -> + [] +end. + +AegisKeyManagerApp = case lists:keyfind(aegis_key_manager_app, 1, CouchConfig) of + {aegis_key_manager_app, AppName} when AppName /= "" -> + [list_to_atom(AppName)]; + _ -> + [] +end. + +BaseApplications = [ + kernel, + stdlib, + crypto, + couch_log, + erlfdb +]. + +Applications = AegisKeyManagerApp ++ BaseApplications. + +{application, aegis, + [ + {description, "If it's good enough for Zeus, it's good enough for CouchDB"}, + {vsn, git}, + {mod, {aegis_app, []}}, + {registered, [ + aegis_server + ]}, + {applications, Applications}, + {env,[]}, + {modules, []}, + {maintainers, []}, + {licenses, []}, + {links, []} + ] +}. -- cgit v1.2.1 From 683335cb4f5b50fe0af0ea12317d501e995922f7 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 4 Aug 2020 21:28:58 +0100 Subject: Validate the result from collate_fun --- src/ebtree/src/ebtree.erl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index bae0ff310..c28693dbc 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -918,7 +918,12 @@ collate(#tree{} = _Tree, _A, ?MAX) -> collate(#tree{} = Tree, A, B) -> #tree{collate_fun = CollateFun} = Tree, - CollateFun(A, B). + case CollateFun(A, B) of + lt -> lt; + eq -> eq; + gt -> gt; + _ -> error(invalid_collation_result) + end. collate(#tree{} = Tree, A, B, Allowed) -> @@ -1090,6 +1095,11 @@ collation_fun_test_() -> ]. +collate_validation_test() -> + Tree = #tree{collate_fun = fun(_A, _B) -> foo end}, + ?assertError(invalid_collation_result, collate(Tree, 1, 2)). + + lookup_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From 282e85814b7025da76e88e8cfc2413aacd6808b8 Mon Sep 17 00:00:00 2001 From: garren smith Date: Wed, 5 Aug 2020 09:25:29 +0200 Subject: add local_seq option to views (#3043) add local_seq option to views --- src/couch_views/src/couch_views_indexer.erl | 29 ++++++++++----- src/couch_views/test/couch_views_map_test.erl | 51 ++++++++++++++++++++++++++- test/elixir/test/map_test.exs | 27 ++++++++++++++ 3 files changed, 97 insertions(+), 10 deletions(-) diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 9183d982e..9c8be6fca 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -195,9 +195,10 @@ do_update(Db, Mrst0, State0) -> limit := Limit, limiter := Limiter, view_vs := ViewVS, - changes_done := ChangesDone0 + changes_done := ChangesDone0, + design_opts := DesignOpts } = State2, - DocAcc1 = fetch_docs(TxDb, DocAcc), + DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), couch_rate:in(Limiter, Count), {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), @@ -379,7 +380,7 @@ write_docs(TxDb, Mrst, Docs, State) -> DocsNumber. -fetch_docs(Db, Changes) -> +fetch_docs(Db, DesignOpts, Changes) -> {Deleted, NotDeleted} = lists:partition(fun(Doc) -> #{deleted := Deleted} = Doc, Deleted @@ -407,17 +408,27 @@ fetch_docs(Db, Changes) -> } end, #{}, erlfdb:wait_for_all(RevFutures)), + AddLocalSeq = fabric2_util:get_value(<<"local_seq">>, DesignOpts, false), + BodyFutures = maps:keys(BodyState), ChangesWithDocs = lists:map(fun (BodyFuture) -> {Id, RevInfo, Change} = maps:get(BodyFuture, BodyState), Doc = fabric2_fdb:get_doc_body_wait(Db, Id, RevInfo, BodyFuture), - BranchCount = maps:get(branch_count, RevInfo, 1), - Doc1 = if BranchCount == 1 -> Doc; true -> - RevConflicts = fabric2_fdb:get_all_revs(Db, Id), - {ok, DocWithConflicts} = fabric2_db:apply_open_doc_opts(Doc, - RevConflicts, [conflicts]), - DocWithConflicts + Doc1 = case maps:get(branch_count, RevInfo, 1) of + 1 when AddLocalSeq -> + {ok, DocWithLocalSeq} = fabric2_db:apply_open_doc_opts(Doc, + [RevInfo], [local_seq]), + DocWithLocalSeq; + 1 -> + Doc; + _ -> + RevConflicts = fabric2_fdb:get_all_revs(Db, Id), + DocOpts = if not AddLocalSeq -> []; true -> [local_seq] end, + + {ok, DocWithConflicts} = fabric2_db:apply_open_doc_opts(Doc, + RevConflicts, [conflicts | DocOpts]), + DocWithConflicts end, Change#{doc => Doc1} end, erlfdb:wait_for_all(BodyFutures)), diff --git a/src/couch_views/test/couch_views_map_test.erl b/src/couch_views/test/couch_views_map_test.erl index 2b679f07c..c419546e1 100644 --- a/src/couch_views/test/couch_views_map_test.erl +++ b/src/couch_views/test/couch_views_map_test.erl @@ -58,7 +58,8 @@ map_views_test_() -> ?TDEF(should_map_with_doc_emit), ?TDEF(should_map_update_is_false), ?TDEF(should_map_update_is_lazy), - ?TDEF(should_map_wait_for_interactive) + ?TDEF(should_map_wait_for_interactive), + ?TDEF(should_map_local_seq) % fun should_give_ext_size_seq_indexed_test/1 ] } @@ -440,6 +441,36 @@ should_map_wait_for_interactive() -> ]}, Result). +should_map_local_seq() -> + ExpectedTrue = [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]} + ], + check_local_seq(true, ExpectedTrue), + + ExpectedFalse = [], + check_local_seq(false, ExpectedFalse), + + Error = {bad_request,invalid_design_doc, + <<"`options.local_seq` field must have boolean type">>}, + ?assertThrow(Error, check_local_seq(something_else, null)). + + +check_local_seq(Val, Expected) -> + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_local_seq_ddoc(Val), + Docs = make_docs(5), + fabric2_db:update_docs(Db, [DDoc | Docs]), + + {ok, Result} = couch_views:query(Db, DDoc, <<"idx_01">>, fun default_cb/2, [], + #{limit => 3}), + + ?assertEqual(Expected, Result). + + % should_give_ext_size_seq_indexed_test(Db) -> % DDoc = couch_doc:from_json_obj({[ % {<<"_id">>, <<"_design/seqdoc">>}, @@ -550,6 +581,24 @@ create_interactive_ddoc() -> ]}). +create_local_seq_ddoc(Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc_local_seq">>}, + {<<"options">>, {[{<<"local_seq">>, Val}]}}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, << + "function(doc) {" + "if (doc._local_seq) {" + "emit(doc.val, doc.val);" + "}" + "}">>} + ]}} + ]}} + ]}). + + make_docs(Count) -> [doc(I) || I <- lists:seq(1, Count)]. diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs index 84325659d..9254cc4c3 100644 --- a/test/elixir/test/map_test.exs +++ b/test/elixir/test/map_test.exs @@ -586,6 +586,33 @@ defmodule ViewMapTest do assert get_ids(resp) == ["doc-id-1"] end + test "_local_seq is supported", context do + db_name = context[:db_name] + ddoc = %{ + _id: "_design/local_seq", + views: %{ + view: %{ + map: """ + function (doc) { + emit(doc._local_seq, doc._id); + } + """ + } + }, + options: %{ + local_seq: true + } + } + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => [ddoc]}) + assert resp.status_code == 201 + + url = "/#{db_name}/_design/local_seq/_view/view" + resp = Couch.get(url, query: %{limit: 1}) + key = Enum.at(resp.body["rows"], 0)["key"] + assert key != :null + end + def update_doc_value(db_name, id, value) do resp = Couch.get("/#{db_name}/#{id}") doc = convert(resp.body) -- cgit v1.2.1 From f9259195529a144def25e3d3ba86ac26a76d4b51 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Aug 2020 13:19:30 -0500 Subject: Export reduce/5 --- src/ebtree/src/ebtree.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index c28693dbc..566cf2a55 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -25,6 +25,7 @@ fold/4, fold/5, reduce/4, + reduce/5, full_reduce/2, group_reduce/7, group_reduce/8, -- cgit v1.2.1 From a32bc83084e8b927931fe23fb234f5c9fa3f8dbd Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Aug 2020 13:19:52 -0500 Subject: Handle empty reduce batches --- src/ebtree/src/ebtree.erl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 566cf2a55..cd08e753e 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -283,6 +283,9 @@ reduce(Db, #tree{} = Tree, StartKey, EndKey, Options) -> do_reduce(Tree, MapValues, ReduceValues). +do_reduce(#tree{} = Tree, [], []) -> + reduce_values(Tree, [], false); + do_reduce(#tree{} = Tree, [], ReduceValues) when is_list(ReduceValues) -> reduce_values(Tree, ReduceValues, true); @@ -1131,6 +1134,12 @@ range_after_delete_test() -> ?assertEqual(50, reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). +full_reduce_empty_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), + ?assertEqual(0, full_reduce(Db, Tree)). + + full_reduce_test_() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), -- cgit v1.2.1 From 29d649850874735de919ad39a9e04bef29c96267 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Aug 2020 13:20:36 -0500 Subject: Fix range scans over an empty tree --- src/ebtree/src/ebtree.erl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index cd08e753e..15a21a695 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -415,6 +415,9 @@ range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> end). +range(_Tx, #tree{}, #node{id = ?NODE_ROOT_ID, members = []}, _StartKey, _EndKey, _AccFun, Acc0) -> + Acc0; + range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, collate(Tree, StartKey, K, [lt, eq]), collate(Tree, K, EndKey, [lt, eq])], @@ -448,6 +451,9 @@ reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> end). +reverse_range(_Tx, #tree{}, #node{id = ?NODE_ROOT_ID, members = []}, _StartKey, _EndKey, _AccFun, Acc0) -> + Acc0; + reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> InRange = [{K, V} || {K, V} <- Node#node.members, collate(Tree, StartKey, K, [lt, eq]), collate(Tree, K, EndKey, [lt, eq])], @@ -1301,6 +1307,15 @@ lookup_test_fun(Max, Order) -> [Order, Max, 1000 * (Max / msec(T1 - T0)), 1000 * (Max / msec(T2 - T1))]). +empty_range_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1, 2, 3>>, 10), + ?assertEqual( + blah, + range(Db, Tree, min(), max(), fun(_, A) -> A end, blah) + ). + + range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), @@ -1317,6 +1332,15 @@ range_test_() -> end}. +empty_reverse_range_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1, 2, 3>>, 10), + ?assertEqual( + blah, + reverse_range(Db, Tree, min(), max(), fun(_, A) -> A end, blah) + ). + + reverse_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), -- cgit v1.2.1 From 31b467c26a1ee7b34cf9cb3794815a3da90d9687 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Wed, 5 Aug 2020 21:04:22 +0100 Subject: Speed up ebtree test suite without losing coverage --- src/ebtree/src/ebtree.erl | 59 +++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 41 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 15a21a695..6a4f4a80e 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -1113,7 +1113,7 @@ collate_validation_test() -> lookup_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), - Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), ?assertEqual(false, lookup(Db, Tree, 101)). @@ -1122,7 +1122,7 @@ lookup_test() -> delete_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), - Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), @@ -1132,12 +1132,12 @@ delete_test() -> range_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), - Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), - lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, lists:seq(1, 100, 2)), - ?assertEqual(50, range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)), - ?assertEqual(50, reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, lists:seq(1, 16, 2)), + ?assertEqual(8, range(Db, Tree, 1, 16, fun(E, A) -> length(E) + A end, 0)), + ?assertEqual(8, reverse_range(Db, Tree, 1, 16, fun(E, A) -> length(E) + A end, 0)). full_reduce_empty_test() -> @@ -1163,7 +1163,7 @@ full_reduce_test_() -> full_reduce_after_delete_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), - Max = 100, + Max = 16, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)), @@ -1287,26 +1287,6 @@ custom_collation_test() -> ?assertEqual([{2, 2}, {1, 1}], range(Db, Tree, 3, 0, fun(E, A) -> A ++ E end, [])). -intense_lookup_test_() -> - [ - {timeout, 1000, fun() -> lookup_test_fun(1000, 20) end}, - {timeout, 1000, fun() -> lookup_test_fun(1000, 50) end}, - {timeout, 1000, fun() -> lookup_test_fun(1000, 500) end} - ]. - - -lookup_test_fun(Max, Order) -> - Db = erlfdb_util:get_test_db([empty]), - Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max, 2)])], - T0 = erlang:monotonic_time(), - Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, Order), Keys), - T1 = erlang:monotonic_time(), - lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), - T2 = erlang:monotonic_time(), - ?debugFmt("~B order. ~B iterations. insert rate: ~.2f/s, lookup rate: ~.2f/s", - [Order, Max, 1000 * (Max / msec(T1 - T0)), 1000 * (Max / msec(T2 - T1))]). - - empty_range_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1, 2, 3>>, 10), @@ -1319,7 +1299,7 @@ empty_range_test() -> range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - Max = 1000, + Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), lists:foreach( @@ -1328,7 +1308,7 @@ range_test_() -> ?assertEqual([{K, K + 1} || K <- lists:seq(StartKey, EndKey)], range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) ) end, - lists:seq(1, 1000)) + lists:seq(1, 100)) end}. @@ -1344,26 +1324,26 @@ empty_reverse_range_test() -> reverse_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - Max = 1000, + Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], - Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 8), Keys), lists:foreach( fun(_) -> [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), ?assertEqual([{K, K + 1} || K <- lists:seq(EndKey, StartKey, -1)], reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) ) end, - lists:seq(1, 1000)) + lists:seq(1, 100)) end}. custom_collation_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - Max = 1000, + Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], CollateFun = fun(A, B) -> collate_raw(B, A) end, - Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), + Tree = open(Db, <<1,2,3>>, 6, [{collate_fun, CollateFun}]), lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( fun(_) -> @@ -1377,17 +1357,17 @@ custom_collation_range_test_() -> ?assertEqual([{K, K + 1} || K <- Seq], range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) ) end, - lists:seq(1, 1000)) + lists:seq(1, 100)) end}. custom_collation_reverse_range_test_() -> {timeout, 1000, fun() -> Db = erlfdb_util:get_test_db([empty]), - Max = 1000, + Max = 100, Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], CollateFun = fun(A, B) -> collate_raw(B, A) end, - Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), + Tree = open(Db, <<1,2,3>>, 6, [{collate_fun, CollateFun}]), lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), lists:foreach( fun(_) -> @@ -1401,11 +1381,8 @@ custom_collation_reverse_range_test_() -> ?assertEqual([{K, K + 1} || K <- lists:reverse(Seq)], reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) ) end, - lists:seq(1, 1000)) + lists:seq(1, 100)) end}. -msec(Native) -> - erlang:max(1, erlang:convert_time_unit(Native, native, millisecond)). - -endif. -- cgit v1.2.1 From 042347e47e1f8de0fc1f5de9a66bc9cc2296711d Mon Sep 17 00:00:00 2001 From: jiangph Date: Wed, 5 Aug 2020 15:37:45 +0800 Subject: fixup: Build couch_js for redhat linux When building couch_js in RHEL, there is one error occurring with "undefined reference to symbol '_ZTVN10__cxxabiv117__class_type_infoE@@CXXABI_1.3'". This commit is to adjust binding library to address this issue. --- src/couch/rebar.config.script | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index 80e6bd12e..aa8939ab5 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -121,7 +121,7 @@ end, {unix, _} when SMVsn == "60" -> { "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14", - "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm" + "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm -lstdc++" } end. -- cgit v1.2.1 From 51a131d59c2c62a827b2f646aced2f53bf789176 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 6 Aug 2020 12:58:46 +0100 Subject: Tighten expectation of members format by level --- src/ebtree/src/ebtree.erl | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 6a4f4a80e..d3ec7a988 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -542,8 +542,8 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> Parent1 = Parent0#node{ members = - umerge_members(Tree, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], - umerge_members(Tree, [{FirstRightKey, LastRightKey, RightId, RightReduction}], + umerge_members(Tree, Parent0#node.level, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], + umerge_members(Tree, Parent0#node.level, [{FirstRightKey, LastRightKey, RightId, RightReduction}], lists:keydelete(Child#node.id, 3, Parent0#node.members))) }, clear_node(Tx, Tree, Child), @@ -569,7 +569,7 @@ update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> Node1 = Node0#node{ - members = umerge_members(Tree, [{Key, Value}], Node0#node.members) + members = umerge_members(Tree, 0, [{Key, Value}], Node0#node.members) }, set_node(Tx, Tree, Node0, Node1), reduce_node(Tree, Node1); @@ -658,7 +658,8 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Members1 = lists:keydelete(ChildId0, 3, Members0), Members2 = lists:keydelete(Sibling#node.id, 3, Members1), Members3 = lists:foldl(fun(N, Acc) -> - umerge_members(Tree, [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) + umerge_members(Tree, Parent0#node.level, + [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) end, Members2, NewNodes), Parent1 = Parent0#node{ @@ -842,8 +843,8 @@ validate_node(#tree{} = Tree, #node{} = Node) -> NumKeys = length(Node#node.members), IsLeaf = Node#node.level =:= 0, IsRoot = ?NODE_ROOT_ID == Node#node.id, - OutOfOrder = Node#node.members /= sort_members(Tree, Node#node.members), - Duplicates = Node#node.members /= usort_members(Tree, Node#node.members), + OutOfOrder = Node#node.members /= sort_members(Tree, Node#node.level, Node#node.members), + Duplicates = Node#node.members /= usort_members(Tree, Node#node.level, Node#node.members), if Node#node.id == undefined -> erlang:error({node_without_id, Node}); @@ -940,11 +941,11 @@ collate(#tree{} = Tree, A, B, Allowed) -> lists:member(collate(Tree, A, B), Allowed). -umerge_members(#tree{} = Tree, List1, List2) -> +umerge_members(#tree{} = Tree, Level, List1, List2) -> CollateWrapper = fun - ({K1, _V1}, {K2, _V2}) -> + ({K1, _V1}, {K2, _V2}) when Level == 0 -> collate(Tree, K1, K2, [lt, eq]); - ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> collate(Tree, L1, L2, [lt, eq]) end, lists:umerge(CollateWrapper, List1, List2). @@ -966,21 +967,21 @@ sort_nodes(#tree{} = Tree, List) -> lists:sort(CollateWrapper, List). -sort_members(#tree{} = Tree, List) -> +sort_members(#tree{} = Tree, Level, List) -> CollateWrapper = fun - ({K1, _V1}, {K2, _V2}) -> + ({K1, _V1}, {K2, _V2}) when Level == 0 -> collate(Tree, K1, K2, [lt, eq]); - ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> collate(Tree, L1, L2, [lt, eq]) end, lists:sort(CollateWrapper, List). -usort_members(#tree{} = Tree, List) -> +usort_members(#tree{} = Tree, Level, List) -> CollateWrapper = fun - ({K1, _V1}, {K2, _V2}) -> + ({K1, _V1}, {K2, _V2}) when Level == 0 -> collate(Tree, K1, K2, [lt, eq]); - ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> collate(Tree, L1, L2, [lt, eq]) end, lists:usort(CollateWrapper, List). -- cgit v1.2.1 From 887e835d21ef7689ab7f232e98b9c0cb2bfde605 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 6 Aug 2020 16:34:00 +0100 Subject: extra tests --- src/ebtree/src/ebtree.erl | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index d3ec7a988..c31f503d5 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -1111,6 +1111,25 @@ collate_validation_test() -> ?assertError(invalid_collation_result, collate(Tree, 1, 2)). +order_is_preserved_test() -> + Db = erlfdb_util:get_test_db([empty]), + open(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, 8), + ?assertEqual(4, Tree#tree.max). + + +min_not_allowed_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + ?assertError(min_not_allowed, ebtree:insert(Db, Tree, ebtree:min(), foo)). + + +max_not_allowed_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + ?assertError(max_not_allowed, ebtree:insert(Db, Tree, ebtree:max(), foo)). + + lookup_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), @@ -1386,4 +1405,34 @@ custom_collation_reverse_range_test_() -> end}. +validate_tree_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + [ebtree:insert(Db, Tree, I, I) || I <- lists:seq(1, 16)], + validate_tree(Db, Tree). + + +validate_node_test_() -> + [ + ?_test(?assertError({node_without_id, _}, validate_node( + #tree{}, #node{id = undefined}))), + ?_test(?assertError({too_few_keys, _}, validate_node( + #tree{collate_fun = fun collate_raw/2, min = 2}, + #node{id = 1, members = [{1, 1}]}))), + ?_test(?assertError({too_many_keys, _}, validate_node( + #tree{collate_fun = fun collate_raw/2, min = 2, max = 2}, + #node{id = 1, members = [{1, 1}, {2, 2}, {3, 3}]}))), + ?_test(?assertError({non_leaf_with_prev, _}, validate_node( + #tree{min = 0}, #node{id = 1, level = 1, prev = 1}))), + ?_test(?assertError({non_leaf_with_next, _}, validate_node( + #tree{min = 0}, #node{id = 1, level = 1, next = 1}))), + ?_test(?assertError({out_of_order, _}, validate_node( + #tree{min = 0, collate_fun = fun collate_raw/2}, + #node{id = 1, members = [{2, 2}, {1, 1}]}))), + ?_test(?assertError({duplicates, _}, validate_node( + #tree{min = 0, collate_fun = fun collate_raw/2}, + #node{id = 1, members = [{1, 1}, {1, 1}]}))) + ]. + + -endif. -- cgit v1.2.1 From a74675fd03240ae610cb9daefc87b42c0e7def7b Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 6 Aug 2020 20:04:58 +0100 Subject: Pluggable persist_fun --- src/ebtree/src/ebtree.erl | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index c31f503d5..536d3b139 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -46,7 +46,8 @@ max, collate_fun, reduce_fun, - encode_fun + encode_fun, + persist_fun }). -define(META, 0). @@ -83,12 +84,14 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/3), + PersistFun = proplists:get_value(persist_fun, Options, fun simple_persist/3), Tree = #tree{ prefix = Prefix, reduce_fun = ReduceFun, collate_fun = CollateFun, - encode_fun = EncodeFun + encode_fun = EncodeFun, + persist_fun = PersistFun }, erlfdb:transactional(Db, fun(Tx) -> @@ -772,8 +775,7 @@ meta_key(Prefix, MetaKey) when is_binary(Prefix) -> get_node(Tx, #tree{} = Tree, Id) -> Key = node_key(Tree#tree.prefix, Id), - Future = erlfdb:get(Tx, Key), - Value = erlfdb:wait(Future), + Value = persist(Tree, Tx, get, Key), decode_node(Tree, Id, Key, Value). @@ -785,7 +787,7 @@ clear_nodes(Tx, #tree{} = Tree, Nodes) -> clear_node(Tx, #tree{} = Tree, #node{} = Node) -> Key = node_key(Tree#tree.prefix, Node#node.id), - erlfdb:clear(Tx, Key). + persist(Tree, Tx, clear, Key). set_nodes(Tx, #tree{} = Tree, Nodes) -> @@ -805,7 +807,7 @@ set_node(Tx, #tree{} = Tree, #node{} = Node) -> validate_node(Tree, Node), Key = node_key(Tree#tree.prefix, Node#node.id), Value = encode_node(Tree, Key, Node), - erlfdb:set(Tx, Key, Value). + persist(Tree, Tx, set, [Key, Value]). node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) -> @@ -1006,6 +1008,23 @@ encode_erlang(encode, _Key, Value) -> encode_erlang(decode, _Key, Value) -> binary_to_term(Value, [safe]). +%% persist function + +persist(#tree{} = Tree, Tx, Action, Args) -> + #tree{persist_fun = PersistFun} = Tree, + PersistFun(Tx, Action, Args). + + +simple_persist(Tx, set, [Key, Value]) -> + erlfdb:set(Tx, Key, Value); + +simple_persist(Tx, get, Key) -> + erlfdb:wait(erlfdb:get(Tx, Key)); + +simple_persist(Tx, clear, Key) -> + erlfdb:clear(Tx, Key). + + %% private functions init_order(#tree{} = Tree, Order) -- cgit v1.2.1 From e0cbe1cc26552e4b96051b61a674e59012768541 Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Mon, 10 Aug 2020 17:47:22 +0000 Subject: Remove wrongly commited file from #2955 (#3070) --- src/chttpd/src/chttpd_auth.erl.orig | 89 ------------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 src/chttpd/src/chttpd_auth.erl.orig diff --git a/src/chttpd/src/chttpd_auth.erl.orig b/src/chttpd/src/chttpd_auth.erl.orig deleted file mode 100644 index 607f09a8a..000000000 --- a/src/chttpd/src/chttpd_auth.erl.orig +++ /dev/null @@ -1,89 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(chttpd_auth). - --export([authenticate/2]). --export([authorize/2]). - --export([default_authentication_handler/1]). --export([cookie_authentication_handler/1]). --export([proxy_authentication_handler/1]). --export([party_mode_handler/1]). - --export([handle_session_req/1]). - --include_lib("couch/include/couch_db.hrl"). - --define(SERVICE_ID, chttpd_auth). - - -%% ------------------------------------------------------------------ -%% API Function Definitions -%% ------------------------------------------------------------------ - -authenticate(HttpReq, Default) -> - maybe_handle(authenticate, [HttpReq], Default). - -authorize(HttpReq, Default) -> - maybe_handle(authorize, [HttpReq], Default). - - -%% ------------------------------------------------------------------ -%% Default callbacks -%% ------------------------------------------------------------------ - -default_authentication_handler(Req) -> - couch_httpd_auth:default_authentication_handler(Req, chttpd_auth_cache). - -cookie_authentication_handler(Req) -> - couch_httpd_auth:cookie_authentication_handler(Req, chttpd_auth_cache). - -proxy_authentication_handler(Req) -> - couch_httpd_auth:proxy_authentication_handler(Req). - -party_mode_handler(#httpd{method='POST', path_parts=[<<"_session">>]} = Req) -> - % See #1947 - users should always be able to attempt a login - Req#httpd{user_ctx=#user_ctx{}}; -party_mode_handler(Req) -> - RequireValidUser = config:get_boolean("chttpd", "require_valid_user", false), - ExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", true), - case RequireValidUser andalso not ExceptUp of - true -> - throw({unauthorized, <<"Authentication required.">>}); - false -> - case config:get("admins") of - [] -> - Req#httpd{user_ctx = ?ADMIN_USER}; - _ -> - Req#httpd{user_ctx=#user_ctx{}} - end - end. - -handle_session_req(Req) -> - couch_httpd_auth:handle_session_req(Req, chttpd_auth_cache). - - -%% ------------------------------------------------------------------ -%% Internal Function Definitions -%% ------------------------------------------------------------------ - -maybe_handle(Func, Args, Default) -> - Handle = couch_epi:get_handle(?SERVICE_ID), - case couch_epi:decide(Handle, ?SERVICE_ID, Func, Args, []) of - no_decision when is_function(Default) -> - apply(Default, Args); - no_decision -> - Default; - {decided, Result} -> - Result - end. -- cgit v1.2.1 From 57e35019c71f1d864b4955055a1d07e09cbf6231 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 10 Aug 2020 16:22:54 +0100 Subject: Unlink index pid and swallow EXIT message if present This should prevent unexpected exit messages arriving which crash couch_index_server. Patch suggested by davisp. Closes #3061. --- src/couch_index/src/couch_index_server.erl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/couch_index/src/couch_index_server.erl b/src/couch_index/src/couch_index_server.erl index 49d1e61b7..67f0f8c6f 100644 --- a/src/couch_index/src/couch_index_server.erl +++ b/src/couch_index/src/couch_index_server.erl @@ -243,9 +243,15 @@ reset_indexes(DbName, Root) -> end, dict:new(), ets:lookup(?BY_DB, DbName)), Fun = fun({Sig, DDocIds}) -> [{_, Pid}] = ets:lookup(?BY_SIG, {DbName, Sig}), - MRef = erlang:monitor(process, Pid), + unlink(Pid), gen_server:cast(Pid, delete), - receive {'DOWN', MRef, _, _, _} -> ok end, + receive + {'EXIT', Pid, _} -> + ok + after + 0 -> + ok + end, rem_from_ets(DbName, Sig, DDocIds, Pid) end, lists:foreach(Fun, dict:to_list(SigDDocIds)), -- cgit v1.2.1 From 7c9094cfdd9d5069b903b27105bb156c621a624b Mon Sep 17 00:00:00 2001 From: Eric Avdey Date: Thu, 13 Aug 2020 14:28:52 -0300 Subject: Validate shard specific query params on db create request --- src/chttpd/src/chttpd_db.erl | 162 ++++++++++++++++++++++++++++++++++++++++--- src/couch/src/couch_util.erl | 12 ++++ 2 files changed, 165 insertions(+), 9 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 6a3df6def..5af65937c 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -383,17 +383,10 @@ handle_design_info_req(Req, _Db, _DDoc) -> create_db_req(#httpd{}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), - N = chttpd:qs_value(Req, "n", config:get("cluster", "n", "3")), - Q = chttpd:qs_value(Req, "q", config:get("cluster", "q", "8")), - P = chttpd:qs_value(Req, "placement", config:get("cluster", "placement")), + ShardsOpt = parse_shards_opt(Req), EngineOpt = parse_engine_opt(Req), DbProps = parse_partitioned_opt(Req), - Options = [ - {n, N}, - {q, Q}, - {placement, P}, - {props, DbProps} - ] ++ EngineOpt, + Options = lists:append([ShardsOpt, [{props, DbProps}], EngineOpt]), DocUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), case fabric:create_db(DbName, Options) of ok -> @@ -1702,6 +1695,40 @@ get_md5_header(Req) -> parse_doc_query(Req) -> lists:foldl(fun parse_doc_query/2, #doc_query_args{}, chttpd:qs(Req)). +parse_shards_opt(Req) -> + [ + {n, parse_shards_opt("n", Req, config:get("cluster", "n", "3"))}, + {q, parse_shards_opt("q", Req, config:get("cluster", "q", "8"))}, + {placement, parse_shards_opt( + "placement", Req, config:get("cluster", "placement"))} + ]. + +parse_shards_opt("placement", Req, Default) -> + Err = <<"The `placement` value should be in a format `zone:n`.">>, + case chttpd:qs_value(Req, "placement", Default) of + Default -> Default; + [] -> throw({bad_request, Err}); + Val -> + try + true = lists:all(fun(Rule) -> + [_, N] = string:tokens(Rule, ":"), + couch_util:validate_positive_int(N) + end, string:tokens(Val, ",")), + Val + catch _:_ -> + throw({bad_request, Err}) + end + end; + +parse_shards_opt(Param, Req, Default) -> + Val = chttpd:qs_value(Req, Param, Default), + Err = ?l2b(["The `", Param, "` value should be a positive integer."]), + case couch_util:validate_positive_int(Val) of + true -> Val; + false -> throw({bad_request, Err}) + end. + + parse_engine_opt(Req) -> case chttpd:qs_value(Req, "engine") of undefined -> @@ -2118,8 +2145,26 @@ parse_partitioned_opt_test_() -> ] }. +parse_shards_opt_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + t_should_allow_valid_q(), + t_should_default_on_missing_q(), + t_should_throw_on_invalid_q(), + t_should_allow_valid_n(), + t_should_default_on_missing_n(), + t_should_throw_on_invalid_n(), + t_should_allow_valid_placement(), + t_should_default_on_missing_placement(), + t_should_throw_on_invalid_placement() + ] + }. setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end), ok. teardown(_) -> @@ -2158,4 +2203,103 @@ t_returns_empty_array_for_no_partitioned_qs() -> ?assertEqual(parse_partitioned_opt(Req), []) end). +t_should_allow_valid_q() -> + ?_test(begin + Req = mock_request("/all-test21?q=1"), + Opts = parse_shards_opt(Req), + ?assertEqual("1", couch_util:get_value(q, Opts)) + end). + +t_should_default_on_missing_q() -> + ?_test(begin + Req = mock_request("/all-test21"), + Opts = parse_shards_opt(Req), + ?assertEqual("8", couch_util:get_value(q, Opts)) + end). + +t_should_throw_on_invalid_q() -> + ?_test(begin + Req = mock_request("/all-test21?q="), + Err = <<"The `q` value should be a positive integer.">>, + ?assertThrow({bad_request, Err}, parse_shards_opt(Req)) + end). + +t_should_allow_valid_n() -> + ?_test(begin + Req = mock_request("/all-test21?n=1"), + Opts = parse_shards_opt(Req), + ?assertEqual("1", couch_util:get_value(n, Opts)) + end). + +t_should_default_on_missing_n() -> + ?_test(begin + Req = mock_request("/all-test21"), + Opts = parse_shards_opt(Req), + ?assertEqual("3", couch_util:get_value(n, Opts)) + end). + +t_should_throw_on_invalid_n() -> + ?_test(begin + Req = mock_request("/all-test21?n="), + Err = <<"The `n` value should be a positive integer.">>, + ?assertThrow({bad_request, Err}, parse_shards_opt(Req)) + end). + +t_should_allow_valid_placement() -> + { + foreach, + fun() -> ok end, + [ + {"single zone", + ?_test(begin + Req = mock_request("/all-test21?placement=az:1"), + Opts = parse_shards_opt(Req), + ?assertEqual("az:1", couch_util:get_value(placement, Opts)) + end)}, + {"multi zone", + ?_test(begin + Req = mock_request("/all-test21?placement=az:1,co:3"), + Opts = parse_shards_opt(Req), + ?assertEqual("az:1,co:3", + couch_util:get_value(placement, Opts)) + end)} + ] + }. + +t_should_default_on_missing_placement() -> + ?_test(begin + Req = mock_request("/all-test21"), + Opts = parse_shards_opt(Req), + ?assertEqual(undefined, couch_util:get_value(placement, Opts)) + end). + +t_should_throw_on_invalid_placement() -> + Err = <<"The `placement` value should be in a format `zone:n`.">>, + { + foreach, + fun() -> ok end, + [ + {"empty placement", + ?_test(begin + Req = mock_request("/all-test21?placement="), + ?assertThrow({bad_request, Err}, parse_shards_opt(Req)) + end)}, + {"invalid format", + ?_test(begin + Req = mock_request("/all-test21?placement=moon"), + ?assertThrow({bad_request, Err}, parse_shards_opt(Req)) + end)}, + {"invalid n", + ?_test(begin + Req = mock_request("/all-test21?placement=moon:eagle"), + ?assertThrow({bad_request, Err}, parse_shards_opt(Req)) + end)}, + {"one invalid zone", + ?_test(begin + Req = mock_request("/all-test21?placement=az:1,co:moon"), + ?assertThrow({bad_request, Err}, parse_shards_opt(Req)) + end)} + ] + }. + -endif. diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index dffb68152..95780e8cc 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -31,6 +31,7 @@ -export([with_db/2]). -export([rfc1123_date/0, rfc1123_date/1]). -export([integer_to_boolean/1, boolean_to_integer/1]). +-export([validate_positive_int/1]). -export([find_in_binary/2]). -export([callback_exists/3, validate_callback_exists/3]). -export([with_proc/4]). @@ -624,6 +625,17 @@ boolean_to_integer(false) -> 0. +validate_positive_int(N) when is_list(N) -> + try + I = list_to_integer(N), + validate_positive_int(I) + catch error:badarg -> + false + end; +validate_positive_int(N) when is_integer(N), N > 0 -> true; +validate_positive_int(_) -> false. + + find_in_binary(_B, <<>>) -> not_found; -- cgit v1.2.1 From 5004f997a6a30831e65c864b96ca82c926f3dbac Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 13 Aug 2020 14:23:24 +0100 Subject: Don't crash couch_index_server if the db isn't known yet If a ddoc is added immediately after database creation (_users and _replicator when couchdb is used in a multi-tenant fashion), we can crash couch_index_server in handle_db_event, as mem3_shards:local throws an error. --- src/couch_index/src/couch_index_server.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/couch_index/src/couch_index_server.erl b/src/couch_index/src/couch_index_server.erl index 67f0f8c6f..6bebff2d8 100644 --- a/src/couch_index/src/couch_index_server.erl +++ b/src/couch_index/src/couch_index_server.erl @@ -284,7 +284,11 @@ handle_db_event(<<"shards/", _/binary>> = DbName, {ddoc_updated, DDocResult = couch_util:with_db(DbName, fun(Db) -> couch_db:open_doc(Db, DDocId, [ejson_body, ?ADMIN_CTX]) end), - DbShards = [mem3:name(Sh) || Sh <- mem3:local_shards(mem3:dbname(DbName))], + LocalShards = try mem3:local_shards(mem3:dbname(DbName)) + catch error:database_does_not_exist -> + [] + end, + DbShards = [mem3:name(Sh) || Sh <- LocalShards], lists:foreach(fun(DbShard) -> lists:foreach(fun({_DbShard, {_DDocId, Sig}}) -> % check if there are other ddocs with the same Sig for the same db -- cgit v1.2.1 From 1c0c9f40225668a67e73d11583c7bd22c042d1f2 Mon Sep 17 00:00:00 2001 From: garren smith Date: Wed, 19 Aug 2020 16:16:55 +0200 Subject: add has_failures to couch_rate_limiter (#3088) Fixes the case where no writes are done for an index, the rater limiter assumed it was a failure. --- src/couch_rate/src/couch_rate_limiter.erl | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/couch_rate/src/couch_rate_limiter.erl b/src/couch_rate/src/couch_rate_limiter.erl index 6f852b1d8..97a630206 100644 --- a/src/couch_rate/src/couch_rate_limiter.erl +++ b/src/couch_rate/src/couch_rate_limiter.erl @@ -80,7 +80,8 @@ regular_delay = 100 :: timeout(), congested_delay = 5000 :: timeout(), initial_budget = 100, - latency = 0 + latency = 0, + has_failures = false }). -type state() :: #?STATE{}. @@ -199,7 +200,8 @@ success(_Id, #?STATE{} = State, Writes) -> writes = Writes, mean_writes = average(MeanWrites, WinSize, Writes), mean_reads = average(MeanReads, WinSize, Reads), - latency = TimerFun() - TS + latency = TimerFun() - TS, + has_failures = false })}. @@ -215,7 +217,8 @@ failure(_Id, #?STATE{} = State) -> } = State, {ok, update_min(State#?STATE{ writes = 0, - latency = TimerFun() - TS + latency = TimerFun() - TS, + has_failures = true })}. @@ -266,18 +269,18 @@ pattern(Id, #?STATE{} = State) -> #?STATE{ underload_threshold = UnderloadThreshold, overload_threshold = OverloadThreshold, - writes = W, - mean_writes = MW + mean_writes = MW, + has_failures = HasFailures } = State, case min_latency(Id, State) of MinRollingLatency when MinRollingLatency > OverloadThreshold -> overloaded; MinRollingLatency when MinRollingLatency > UnderloadThreshold -> optimal; - MinRollingLatency when MinRollingLatency > 0 andalso W == 0 -> - failed; MinRollingLatency when MinRollingLatency == 0 andalso MW == 0.0 -> init; + _ when HasFailures -> + failed; _ -> underloaded end. -- cgit v1.2.1 From 11e8d0d62f9930df0d40a041452c866a5adf3320 Mon Sep 17 00:00:00 2001 From: jiangph Date: Wed, 5 Aug 2020 15:23:19 +0800 Subject: fixup: Build couch_js for redhat linux When building couch_js in RHEL, there is one error occurring with "undefined reference to symbol '_ZTVN10__cxxabiv117__class_type_infoE@@CXXABI_1.3'". This commit is to adjust binding library to address this issue. --- src/couch/rebar.config.script | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index 4a5ef36e7..93608cea7 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -123,12 +123,12 @@ end. {unix, _} when SMVsn == "60" -> { "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14 -Wno-invalid-offsetof", - "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm" + "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm -lstdc++" }; {unix, _} when SMVsn == "68" -> { "-DXP_UNIX -I/usr/include/mozjs-68 -I/usr/local/include/mozjs-68 -std=c++14 -Wno-invalid-offsetof", - "-L/usr/local/lib -std=c++14 -lmozjs-68 -lm" + "-L/usr/local/lib -std=c++14 -lmozjs-68 -lm -lstdc++" } end. -- cgit v1.2.1 From bdfb129c1242e26cf312d2bc5cf1fe3af2e1e56d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 20 Aug 2020 11:57:56 -0400 Subject: Handle jiffy returning an iolist when encoding atts_since query string If we don't handle it, it throws an error when trying to encode the full URL string, for example: ``` badarg,[ {mochiweb_util,quote_plus,2,[{file,"src/mochiweb_util.erl"},{line,192}]}, {couch_replicator_httpc,query_args_to_string,2,[{file,"src/couch_replicator_httpc.erl"},{line,421}]}, {couch_replicator_httpc,full_url,2,[{file,"src/couch_replicator_httpc.erl"},{line,413}]}, {couch_replicator_api_wrap,open_doc_revs,6,[{file,"src/couch_replicator_api_wrap.erl"},{line,255}]} ] ``` This is also similar to what we did for open_revs encoding: https://github.com/apache/couchdb/commit/a2d0c4290dde2015e5fb6184696fec3f89c81a4b --- src/couch_replicator/src/couch_replicator_api_wrap.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_replicator/src/couch_replicator_api_wrap.erl b/src/couch_replicator/src/couch_replicator_api_wrap.erl index a21de4242..8dc7f2f01 100644 --- a/src/couch_replicator/src/couch_replicator_api_wrap.erl +++ b/src/couch_replicator/src/couch_replicator_api_wrap.erl @@ -546,7 +546,7 @@ options_to_query_args(HttpDb, Path, Options0) -> length("GET " ++ FullUrl ++ " HTTP/1.1\r\n") + length("&atts_since=") + 6, % +6 = % encoded [ and ] PAs, MaxLen, []), - [{"atts_since", ?JSON_ENCODE(RevList)} | QueryArgs1] + [{"atts_since", ?b2l(iolist_to_binary(?JSON_ENCODE(RevList)))} | QueryArgs1] end. -- cgit v1.2.1 From 46222f4e8ce1d729342ea5abd470441eb6155345 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 20 Aug 2020 20:01:52 +0100 Subject: Don't log client disconnects --- src/chttpd/src/chttpd.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 5a3e3fa38..325369ecd 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -1000,6 +1000,8 @@ maybe_handle_error(Error) -> Result; {Err, Reason} -> {500, couch_util:to_binary(Err), couch_util:to_binary(Reason)}; + normal -> + exit(normal); Error -> {500, <<"unknown_error">>, couch_util:to_binary(Error)} end. -- cgit v1.2.1 From 452ce75350de652a462a9c633844cfb37672fda1 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 20 Aug 2020 15:36:38 -0400 Subject: Do not use (catch ...) in couch_views_reader:load_docs/4 Any error there would just be generating a case clause. Remove the `{not_found, missing}` clause since it was accidentally matching on the Rev string and the case was included in the `_Else` clause anyway. --- src/couch_views/src/couch_views_reader.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl index ce7f16354..61a78d7f8 100644 --- a/src/couch_views/src/couch_views_reader.erl +++ b/src/couch_views/src/couch_views_reader.erl @@ -210,8 +210,7 @@ load_doc(TxDb, Id, null, DocOpts) -> load_doc(TxDb, Id, Rev, DocOpts) -> Rev1 = couch_doc:parse_rev(Rev), - case (catch fabric2_db:open_doc_revs(TxDb, Id, [Rev1], DocOpts)) of + case fabric2_db:open_doc_revs(TxDb, Id, [Rev1], DocOpts) of {ok, [{ok, Doc}]} -> couch_doc:to_json_obj(Doc, DocOpts); - {ok, [{{not_found, missing}, Rev}]} -> null; {ok, [_Else]} -> null end. -- cgit v1.2.1 From 6b45ef738aab540d9f742a17d96910f1098f2dfe Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 25 Aug 2020 18:18:45 +0100 Subject: Clear sensitive flag at end of public api functions --- src/aegis/src/aegis_server.erl | 59 ++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/src/aegis/src/aegis_server.erl b/src/aegis/src/aegis_server.erl index 21932626c..15fea4c63 100644 --- a/src/aegis/src/aegis_server.erl +++ b/src/aegis/src/aegis_server.erl @@ -59,27 +59,27 @@ start_link() -> -spec init_db(Db :: #{}, Options :: list()) -> boolean(). init_db(#{uuid := UUID} = Db, Options) -> - process_flag(sensitive, true), - - case ?AEGIS_KEY_MANAGER:init_db(Db, Options) of - {ok, DbKey} -> - gen_server:call(?MODULE, {insert_key, UUID, DbKey}), - true; - false -> - false - end. + sensitive(fun() -> + case ?AEGIS_KEY_MANAGER:init_db(Db, Options) of + {ok, DbKey} -> + gen_server:call(?MODULE, {insert_key, UUID, DbKey}), + true; + false -> + false + end + end). -spec open_db(Db :: #{}) -> boolean(). open_db(#{} = Db) -> - process_flag(sensitive, true), - - case do_open_db(Db) of - {ok, _DbKey} -> - true; - false -> - false - end. + sensitive(fun() -> + case do_open_db(Db) of + {ok, _DbKey} -> + true; + false -> + false + end + end). -spec encrypt(Db :: #{}, Key :: binary(), Value :: binary()) -> binary(). @@ -100,10 +100,10 @@ encrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> erlang:error(Reason) end; false -> - process_flag(sensitive, true), - - {ok, DbKey} = do_open_db(Db), - do_encrypt(DbKey, Db, Key, Value) + sensitive(fun() -> + {ok, DbKey} = do_open_db(Db), + do_encrypt(DbKey, Db, Key, Value) + end) end. @@ -125,10 +125,10 @@ decrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> erlang:error(Reason) end; false -> - process_flag(sensitive, true), - - {ok, DbKey} = do_open_db(Db), - do_decrypt(DbKey, Db, Key, Value) + sensitive(fun() -> + {ok, DbKey} = do_open_db(Db), + do_decrypt(DbKey, Db, Key, Value) + end) end. @@ -410,3 +410,12 @@ expiration_check_interval() -> cache_limit() -> config:get_integer("aegis", "cache_limit", ?CACHE_LIMIT). + + +sensitive(Fun) when is_function(Fun, 0) -> + OldValue = process_flag(sensitive, true), + try + Fun() + after + process_flag(sensitive, OldValue) + end. -- cgit v1.2.1 From 978f7dae6748e09830593f132e6977682e7d9453 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Tue, 25 Aug 2020 11:22:06 -0700 Subject: clear jobs data in active area during removal During job removal, it was not cleared from the active area so active_tasks would mistakenly believe the job still existed. When we try to actually open the data it is not there and not_found error would be issued.@nickva found this issue during replication work. --- src/couch_jobs/src/couch_jobs_fdb.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 891aedc79..3fcad554a 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -119,8 +119,9 @@ remove(#{jtx := true} = JTx0, #{job := true} = Job) -> #{type := Type, id := JobId} = Job, Key = job_key(JTx, Job), case get_job_val(Tx, Key) of - #jv{stime = STime} -> + #jv{stime = STime, seq = Seq} -> couch_jobs_pending:remove(JTx, Type, JobId, STime), + clear_activity(JTx, Type, Seq), erlfdb:clear(Tx, Key), update_watch(JTx, Type), ok; -- cgit v1.2.1 From 7dbd0adc3fd622a78ce56e0017d438dbdb824302 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Wed, 26 Aug 2020 13:56:14 -0700 Subject: bypass partition query limit for mango When partition_query_limit is set for couch_mrview, it limits how many docs can be scanned when executing partitioned queries. But this limits mango's doc scans internally. This leads to documents not being scanned to fulfill a query. This fixes: https://github.com/apache/couchdb/issues/2795 --- src/couch_mrview/src/couch_mrview_util.erl | 9 +++-- src/mango/src/mango_cursor_view.erl | 6 +++- test/elixir/test/partition_mango_test.exs | 53 ++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index e971720c9..d318a3f4a 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -425,9 +425,12 @@ validate_args(#mrst{} = State, Args0) -> apply_limit(ViewPartitioned, Args) -> - LimitType = case ViewPartitioned of - true -> "partition_query_limit"; - false -> "query_limit" + Options = Args#mrargs.extra, + IgnorePQLimit = lists:keyfind(ignore_partition_query_limit, 1, Options), + LimitType = case {ViewPartitioned, IgnorePQLimit} of + {true, false} -> "partition_query_limit"; + {true, _} -> "query_limit"; + {false, _} -> "query_limit" end, MaxLimit = config:get_integer("query_server_config", diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 240ef501d..68d7c3b62 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -116,7 +116,11 @@ base_args(#cursor{index = Idx, selector = Selector} = Cursor) -> start_key = StartKey, end_key = EndKey, include_docs = true, - extra = [{callback, {?MODULE, view_cb}}, {selector, Selector}] + extra = [ + {callback, {?MODULE, view_cb}}, + {selector, Selector}, + {ignore_partition_query_limit, true} + ] }. diff --git a/test/elixir/test/partition_mango_test.exs b/test/elixir/test/partition_mango_test.exs index 992999fb9..9e4f1e783 100644 --- a/test/elixir/test/partition_mango_test.exs +++ b/test/elixir/test/partition_mango_test.exs @@ -546,6 +546,59 @@ defmodule PartitionMangoTest do assert_correct_partition(partitions, "foo") end + @tag :with_partitioned_db + test "partitioned query with query server config set", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["value"]) + + # this is to test that we bypass partition_query_limit for mango + set_config({"query_server_config", "partition_query_limit", "1"}) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post( + url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + }, + limit: 3 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 3 + assert_correct_partition(partitions, "foo") + + %{:body => %{"bookmark" => bookmark}} = resp + + resp = + Couch.post( + url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + }, + limit: 3, + bookmark: bookmark + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 2 + assert_correct_partition(partitions, "foo") + end + @tag :with_partitioned_db test "global query uses global index", context do db_name = context[:db_name] -- cgit v1.2.1 From 3004513c12592b19b5b77a6edcfdef5f4450716d Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Thu, 27 Aug 2020 12:55:05 -0700 Subject: update dev/run formatting to adhere to python format checks --- dev/run | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/run b/dev/run index 573c80c9b..6d8bc5201 100755 --- a/dev/run +++ b/dev/run @@ -427,7 +427,10 @@ def boot_haproxy(ctx): def hack_default_ini(ctx, node, contents): contents = re.sub( - "^\[httpd\]$", "[httpd]\nenable = true", contents, flags=re.MULTILINE, + "^\[httpd\]$", + "[httpd]\nenable = true", + contents, + flags=re.MULTILINE, ) if ctx["enable_erlang_views"]: -- cgit v1.2.1 From c14569c9f4d568e45229641e31e5d3790a813a9f Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Sat, 29 Aug 2020 09:43:09 -0700 Subject: fix bookmark passing with text indexes Previously, we passed in the unpacked version of the bookmark with the cursor inside the options field. This worked fine for _find because we didn't need to return it to the user. But for _explain, we return the value back as unpacked tuple instead of a string and jiffy:encode/1 complains. Now we correctly extract the bookmark out of options, unpack it, and then pass it separately in it's own field. This way options retains it's original string form for the user so that invalid_ejson is not thrown. --- src/mango/src/mango_cursor_text.erl | 17 +++++++++-------- src/mango/test/08-text-limit-test.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/mango/src/mango_cursor_text.erl b/src/mango/src/mango_cursor_text.erl index 43ef84e4c..5989f342e 100644 --- a/src/mango/src/mango_cursor_text.erl +++ b/src/mango/src/mango_cursor_text.erl @@ -43,7 +43,7 @@ }). -create(Db, Indexes, Selector, Opts0) -> +create(Db, Indexes, Selector, Opts) -> Index = case Indexes of [Index0] -> Index0; @@ -51,7 +51,7 @@ create(Db, Indexes, Selector, Opts0) -> ?MANGO_ERROR(multiple_text_indexes) end, - Opts = unpack_bookmark(couch_db:name(Db), Opts0), + Bookmark = unpack_bookmark(couch_db:name(Db), Opts), DreyfusLimit = get_dreyfus_limit(), Limit = erlang:min(DreyfusLimit, couch_util:get_value(limit, Opts, mango_opts:default_limit())), @@ -66,7 +66,8 @@ create(Db, Indexes, Selector, Opts0) -> opts = Opts, limit = Limit, skip = Skip, - fields = Fields + fields = Fields, + bookmark = Bookmark }}. @@ -90,7 +91,8 @@ execute(Cursor, UserFun, UserAcc) -> skip = Skip, selector = Selector, opts = Opts, - execution_stats = Stats + execution_stats = Stats, + bookmark = Bookmark } = Cursor, Query = mango_selector_text:convert(Selector), QueryArgs = #index_query_args{ @@ -104,7 +106,7 @@ execute(Cursor, UserFun, UserAcc) -> dbname = couch_db:name(Db), ddocid = ddocid(Idx), idx_name = mango_idx:name(Idx), - bookmark = get_bookmark(Opts), + bookmark = Bookmark, limit = Limit, skip = Skip, query_args = QueryArgs, @@ -282,7 +284,7 @@ pack_bookmark(Bookmark) -> unpack_bookmark(DbName, Opts) -> - NewBM = case lists:keyfind(bookmark, 1, Opts) of + case lists:keyfind(bookmark, 1, Opts) of {_, nil} -> []; {_, Bin} -> @@ -291,8 +293,7 @@ unpack_bookmark(DbName, Opts) -> catch _:_ -> ?MANGO_ERROR({invalid_bookmark, Bin}) end - end, - lists:keystore(bookmark, 1, Opts, {bookmark, NewBM}). + end. ddocid(Idx) -> diff --git a/src/mango/test/08-text-limit-test.py b/src/mango/test/08-text-limit-test.py index ae827813d..ef0509ff3 100644 --- a/src/mango/test/08-text-limit-test.py +++ b/src/mango/test/08-text-limit-test.py @@ -133,3 +133,13 @@ class LimitTests(mango.LimitDocsTextTests): assert json["bookmark"] != bm bm = json["bookmark"] assert len(seen_docs) == len(limit_docs.DOCS) + + def run_explain_check(self, size): + q = {"age": {"$gt": 0}} + seen_docs = set() + bm = None + results1 = self.db.find(q, limit=size, bookmark=bm, return_raw=True) + assert results1["bookmark"] != bm + bm = results1["bookmark"] + results2 = self.db.find(q, limit=size, bookmark=bm, explain=True) + assert results2["bookmark"] == bm -- cgit v1.2.1 From 6235f0f92b27f755cfea3cd6ab8464a71ca23ecb Mon Sep 17 00:00:00 2001 From: ILYA Khlopotov Date: Fri, 21 Aug 2020 08:32:03 -0700 Subject: Fix ordering of page_size based pagination for views The pagination relied on id of the document. However for views it should use combination of key and id. --- src/chttpd/src/chttpd_db.erl | 8 +- src/chttpd/src/chttpd_view.erl | 8 +- src/chttpd/test/exunit/pagination_test.exs | 242 +++++++++++++++++++++++++++-- src/couch_views/src/couch_views_http.erl | 31 ++-- 4 files changed, 263 insertions(+), 26 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 8acccb461..c458cba12 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -864,7 +864,9 @@ paginate_multi_all_docs_view(Req, Db, OP, Args0, Queries) -> ArgQueries = chttpd_view:parse_queries(Req, Args1, Queries, fun(QArgs) -> set_namespace(OP, QArgs) end), - KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), undefined} + end, #mrargs{page_size = PageSize} = Args0, #httpd{path_parts = Parts} = Req, UpdateSeq = fabric2_db:get_update_seq(Db), @@ -911,7 +913,9 @@ paginate_all_docs_view(Req, Db, Args0, OP) -> Args1 = Args0#mrargs{view_type=map}, Args2 = chttpd_view:validate_args(Req, Args1), Args3 = set_namespace(OP, Args2), - KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), undefined} + end, #httpd{path_parts = Parts} = Req, UpdateSeq = fabric2_db:get_update_seq(Db), EtagTerm = {Parts, UpdateSeq, Args3}, diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index 8e2a08e2b..8d401013c 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -58,7 +58,9 @@ paginate_multi_query_view(Req, Db, DDoc, ViewName, Args0, Queries) -> ArgQueries = parse_queries(Req, Args0, Queries, fun(QueryArg) -> couch_mrview_util:set_view_type(QueryArg, ViewName, Views) end), - KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), couch_util:get_value(key, Props)} + end, #mrargs{page_size = PageSize} = Args0, #httpd{path_parts = Parts} = Req, UpdateSeq = fabric2_db:get_update_seq(Db), @@ -100,7 +102,9 @@ stream_fabric_query_view(Db, Req, DDoc, ViewName, Args) -> paginate_fabric_query_view(Db, Req, DDoc, ViewName, Args0) -> - KeyFun = fun({Props}) -> couch_util:get_value(id, Props) end, + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), couch_util:get_value(key, Props)} + end, #httpd{path_parts = Parts} = Req, UpdateSeq = fabric2_db:get_update_seq(Db), ETagTerm = {Parts, UpdateSeq, Args0}, diff --git a/src/chttpd/test/exunit/pagination_test.exs b/src/chttpd/test/exunit/pagination_test.exs index 7fd962381..6544017df 100644 --- a/src/chttpd/test/exunit/pagination_test.exs +++ b/src/chttpd/test/exunit/pagination_test.exs @@ -68,6 +68,25 @@ defmodule Couch.Test.Pagination do %{view_name: "all", ddoc_id: ddoc_id} end + defp with_same_key_docs(context) do + assert Map.has_key?(context, :n_docs), "Please define '@describetag n_docs: 10'" + + docs = + for id <- 1..context.n_docs do + str_id = docid(id) + %{"_id" => str_id, "integer" => id, "string" => docid(div(id, context.page_size))} + end + + docs = + docs + |> Enum.map(fn doc -> + created_doc = create_doc(context.session, context.db_name, doc) + Map.merge(doc, created_doc) + end) + + %{docs: docs} + end + defp all_docs(context) do assert Map.has_key?(context, :page_size), "Please define '@describetag page_size: 4'" @@ -86,6 +105,50 @@ defmodule Couch.Test.Pagination do } end + defp paginate_queries(context, opts) do + paginate_queries(context, [], opts) + end + + defp paginate_queries(context, acc, opts) do + {paginate_opts, client_opts} = Keyword.split(opts, [:url, :direction]) + + resp = + Couch.Session.post(context.session, Keyword.get(paginate_opts, :url), client_opts) + + results = resp.body["results"] + view_url = String.replace_suffix(Keyword.get(paginate_opts, :url), "/queries", "") + + opts = + opts + |> Keyword.replace!(:url, view_url) + |> Keyword.delete(:body) + + final = + Enum.map(results, fn result -> + paginate(context, result, [Map.get(result, "rows")], opts) + end) + + final + end + + defp paginate(context, current, acc, opts) do + {paginate_opts, client_opts} = Keyword.split(opts, [:url, :direction]) + direction_key = Keyword.get(paginate_opts, :direction, "next") + + if Map.has_key?(current, direction_key) do + bookmark = current[direction_key] + client_opts = Keyword.replace!(client_opts, :query, %{bookmark: bookmark}) + + resp = + Couch.Session.get(context.session, Keyword.get(paginate_opts, :url), client_opts) + + result = resp.body + paginate(context, result, [Map.get(result, "rows") | acc], opts) + else + Enum.reverse(acc) + end + end + defp paginate(context) do if Map.has_key?(context.response, "next") do bookmark = context.response["next"] @@ -148,7 +211,8 @@ defmodule Couch.Test.Pagination do docs |> Enum.map(fn doc -> - create_doc(session, db_name, doc) + created_doc = create_doc(session, db_name, doc) + Map.merge(doc, created_doc) end) end @@ -157,9 +221,11 @@ defmodule Couch.Test.Pagination do end defp make_docs(id_range) do + max = Enum.max(id_range) + for id <- id_range do str_id = docid(id) - %{"_id" => str_id, "integer" => id, "string" => str_id} + %{"_id" => str_id, "integer" => id, "string" => docid(max - id)} end end @@ -339,8 +405,8 @@ defmodule Couch.Test.Pagination do assert resp.status_code == 200, "got error #{inspect(resp.body)}" [q1, q2] = resp.body["results"] - q1 = Enum.map(q1["rows"], fn row -> row["id"] end) - q2 = Enum.map(q2["rows"], fn row -> row["id"] end) + q1 = Enum.map(q1["rows"], fn row -> row["key"] end) + q2 = Enum.map(q2["rows"], fn row -> row["key"] end) assert q1 == Enum.reverse(q2) assert q1 == Enum.sort(q1) end @@ -361,8 +427,8 @@ defmodule Couch.Test.Pagination do assert resp.status_code == 200, "got error #{inspect(resp.body)}" [q1, q2] = resp.body["results"] - q1 = Enum.map(q1["rows"], fn row -> row["id"] end) - q2 = Enum.map(q2["rows"], fn row -> row["id"] end) + q1 = Enum.map(q1["rows"], fn row -> row["key"] end) + q2 = Enum.map(q2["rows"], fn row -> row["key"] end) assert ctx.page_size == length(q1) assert q2 == [] end @@ -943,7 +1009,7 @@ defmodule Couch.Test.Pagination do assert resp.status_code == 200, "got error #{inspect(resp.body)}" next_bookmark = resp.body["next"] - first_page_ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + first_page_keys = Enum.map(resp.body["rows"], fn row -> row["key"] end) resp = Couch.Session.get( @@ -963,8 +1029,8 @@ defmodule Couch.Test.Pagination do ) assert resp.status_code == 200, "got error #{inspect(resp.body)}" - ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) - assert first_page_ids == ids + keys = Enum.map(resp.body["rows"], fn row -> row["key"] end) + assert first_page_keys == keys end end end @@ -1163,7 +1229,165 @@ defmodule Couch.Test.Pagination do assert length(result["rows"]) > ctx.page_size end) end + + test "can retrieve all pages", ctx do + [descending_query, limit_query] = + paginate_queries( + ctx, + url: + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + results = List.flatten(descending_query) + assert ctx.n_docs == length(results) + expected_key_order = :descending + expected_ids_order = :ascending + + assert expected_key_order == ordering?(results, "key"), + "expecting keys in #{expected_key_order} order, got: #{ + inspect(field(results, "key")) + }" + + assert expected_ids_order == ordering?(results, "id"), + "expecting ids in #{expected_ids_order} order, got: #{ + inspect(field(results, "id")) + }" + + results = List.flatten(limit_query) + [_descendiing_query, query] = ctx.queries[:queries] + + expected_length = + if ctx.n_docs - query.skip > query.limit do + query.limit + else + query.limit - query.skip + end + + assert expected_length == length(results) + + {expected_key_order, expected_ids_order} = + if ctx.descending do + {:descending, :ascending} + else + {:ascending, :descending} + end + + assert expected_key_order == ordering?(results, "key"), + ~s(expecting keys in #{expected_key_order} order, got: #{ + inspect(field(results, "key")) + }) + + assert expected_ids_order == ordering?(results, "id"), + ~s(expecting keys in #{expected_ids_order} order, got: #{ + inspect(field(results, "id")) + }) + + keys = Enum.map(results, &Map.get(&1, "key")) + end end end end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view/queries?page_size=#{ + n + }&descending=#{descending} : pages with same key" do + @describetag descending: descending + @describetag n_docs: 10 + @describetag page_size: n + + @describetag queries: %{ + queries: [ + %{ + descending: true + }, + %{ + limit: n + 1, + skip: 2 + } + ] + } + setup [:with_session, :random_db, :with_view, :with_same_key_docs] + + test "handle same key", ctx do + ''' + make sure the results are first sorted by key and then by id + ''' + + [descending_query, limit_query] = + paginate_queries( + ctx, + url: + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + aggregate = fn pages -> + Enum.reduce(pages, {[], %{}}, fn page, acc -> + Enum.reduce(page, acc, fn row, {keys, in_acc} -> + id = Map.get(row, "id") + key = Map.get(row, "key") + {keys ++ [key], Map.update(in_acc, key, [id], &(&1 ++ [id]))} + end) + end) + end + + {keys, aggregated} = aggregate.(descending_query) + + # keys are sorted in reverse order + assert :descending == ordering?(keys), + ~s(expecting keys in descending order, got: #{inspect(keys)}) + + Enum.each(Map.values(aggregated), fn ids -> + # keys are sorted in reverse order by id + assert :descending == ordering?(ids), + ~s(expecting ids in descending order, got: #{inspect(ids)}) + end) + + {keys, aggregated} = aggregate.(limit_query) + + {expected_key_order, expected_ids_order} = + if ctx.descending do + {:descending, :descending} + else + {:ascending, :ascending} + end + + # keys are sorted + assert expected_key_order == ordering?(keys) or :equal == ordering?(keys), + ~s(expecting keys in #{expected_key_order} order, got: #{inspect(keys)}) + + Enum.each(Map.values(aggregated), fn ids -> + # Keys are sorted by id + assert expected_ids_order == ordering?(ids) or :equal == ordering?(ids), + ~s(expecting ids in #{expected_ids_order} order, got: #{inspect(ids)}) + end) + end + end + end + end + + defp ordering?(maps, key) do + ordering?(field(maps, key)) + end + + defp ordering?(elements) do + ascending = Enum.sort(elements) + descending = Enum.reverse(Enum.sort(elements)) + + case {ascending, descending} do + {^elements, ^elements} -> :equal + {^elements, _} -> :ascending + {_, ^descending} -> :descending + _ -> :unordered + end + end + + defp field(maps, key) do + Enum.map(maps, &Map.get(&1, key)) + end end diff --git a/src/couch_views/src/couch_views_http.erl b/src/couch_views/src/couch_views_http.erl index 2aa9e9e85..e21acfb9f 100644 --- a/src/couch_views/src/couch_views_http.erl +++ b/src/couch_views/src/couch_views_http.erl @@ -147,7 +147,7 @@ do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> maybe_add_next_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> #mrargs{ page_size = RequestedLimit, - extra = Extra + extra = Extra0 } = Args0, case check_completion(OriginalLimit, RequestedLimit, Items) of {Rows, nil} -> @@ -156,15 +156,15 @@ maybe_add_next_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) total_rows => length(Rows) }); {Rows, Next} -> - FirstKey = first_key(KeyFun, Rows), - NextKey = KeyFun(Next), - if is_binary(NextKey) -> ok; true -> - throw("Provided KeyFun should return binary") - end, + {FirstId, FirstKey} = first_key(KeyFun, Rows), + {NextId, NextKey} = KeyFun(Next), + Extra1 = lists:keystore(fid, 1, Extra0, {fid, FirstId}), + Extra2 = lists:keystore(fk, 1, Extra1, {fk, FirstKey}), Args = Args0#mrargs{ page_size = PageSize, start_key = NextKey, - extra = lists:keystore(fk, 1, Extra, {fk, FirstKey}) + start_key_docid = NextId, + extra = Extra2 }, Bookmark = bookmark_encode(Args), maps:merge(Response, #{ @@ -177,18 +177,23 @@ maybe_add_next_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) maybe_add_previous_bookmark(#mrargs{extra = Extra} = Args, #{rows := Rows} = Result, KeyFun) -> StartKey = couch_util:get_value(fk, Extra), - case {StartKey, first_key(KeyFun, Rows)} of - {undefined, _} -> + StartId = couch_util:get_value(fid, Extra), + case {{StartId, StartKey}, first_key(KeyFun, Rows)} of + {{undefined, undefined}, {_, _}} -> Result; - {_, undefined} -> + {{_, _}, {undefined, undefined}} -> Result; - {StartKey, StartKey} -> + {{StartId, _}, {StartId, _}} -> Result; - {StartKey, EndKey} -> + {{undefined, StartKey}, {undefined, StartKey}} -> + Result; + {{StartId, StartKey}, {EndId, EndKey}} -> Bookmark = bookmark_encode( Args#mrargs{ start_key = StartKey, + start_key_docid = StartId, end_key = EndKey, + end_key_docid = EndId, inclusive_end = false } ), @@ -197,7 +202,7 @@ maybe_add_previous_bookmark(#mrargs{extra = Extra} = Args, #{rows := Rows} = Res first_key(_KeyFun, []) -> - undefined; + {undefined, undefined}; first_key(KeyFun, [First | _]) -> KeyFun(First). -- cgit v1.2.1 From bf61a0051f6a0ae88cc40b7e6cd7710033fef33f Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 31 Aug 2020 18:59:15 -0400 Subject: Handle empty maps in active_tasks data structure Empty maps maybe useful to initialize the data in some cases but we don't want to emit an entry in the output with just an empty map. While at it, add some tests to check the basics. --- src/fabric/src/fabric2_active_tasks.erl | 3 +- src/fabric/test/fabric2_active_tasks_tests.erl | 120 +++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 src/fabric/test/fabric2_active_tasks_tests.erl diff --git a/src/fabric/src/fabric2_active_tasks.erl b/src/fabric/src/fabric2_active_tasks.erl index 2c03ec3a9..e706ebaa4 100644 --- a/src/fabric/src/fabric2_active_tasks.erl +++ b/src/fabric/src/fabric2_active_tasks.erl @@ -34,7 +34,8 @@ get_active_tasks() -> {ok, Data} = couch_jobs:get_job_data(JTx, Type, JobId), case maps:get(?ACTIVE_TASK_INFO, Data, not_found) of not_found -> false; - Info -> {true, Info} + #{} = Map when map_size(Map) == 0 -> false; + #{} = Info -> {true, Info} end end, JobIds), TaskAcc ++ Tasks diff --git a/src/fabric/test/fabric2_active_tasks_tests.erl b/src/fabric/test/fabric2_active_tasks_tests.erl new file mode 100644 index 000000000..891450027 --- /dev/null +++ b/src/fabric/test/fabric2_active_tasks_tests.erl @@ -0,0 +1,120 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_active_tasks_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2_test.hrl"). + + +-define(JOB_TYPE, <<"fabric2_active_tasks_tests_type">>). +-define(JOB_ID, <<"job_id">>). + + +active_tasks_test_() -> + { + "Test cleanup of stale indices", + { + setup, + fun setup_all/0, + fun cleanup_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(no_active_tasks_defined), + ?TDEF_FE(empty_map_info), + ?TDEF_FE(can_read_active_tasks), + ?TDEF_FE(only_running_tasks_appear) + ] + } + } + }. + + +setup_all() -> + Ctx = test_util:start_couch([fabric, couch_jobs]), + couch_jobs:set_type_timeout(?JOB_TYPE, 5000), + meck:new(couch_jobs, [passthrough]), + meck:expect(couch_jobs, get_types, 1, [?JOB_TYPE]), + Ctx. + + +cleanup_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +setup() -> + ok = couch_jobs:add(undefined, ?JOB_TYPE, ?JOB_ID, #{}), + ok. + + +cleanup(_) -> + meck:reset(couch_jobs), + couch_jobs:remove(undefined, ?JOB_TYPE, ?JOB_ID). + + +no_active_tasks_defined(_) -> + {ok, Job1, #{}} = couch_jobs:accept(?JOB_TYPE), + ?assertEqual([], fabric2_active_tasks:get_active_tasks()), + ok = couch_jobs:finish(undefined, Job1). + + +empty_map_info(_) -> + {ok, Job1, Data} = couch_jobs:accept(?JOB_TYPE), + + Data1 = fabric2_active_tasks:update_active_task_info(Data, #{}), + {ok, Job2} = couch_jobs:update(undefined, Job1, Data1), + ?assertEqual([], fabric2_active_tasks:get_active_tasks()), + ok = couch_jobs:finish(undefined, Job2). + + +can_read_active_tasks(_) -> + {ok, Job1, Data} = couch_jobs:accept(?JOB_TYPE), + + Info = #{<<"x">> => 1}, + Data1 = fabric2_active_tasks:update_active_task_info(Data, Info), + {ok, Job2} = couch_jobs:update(undefined, Job1, Data1), + ?assertEqual([#{<<"x">> => 1}], fabric2_active_tasks:get_active_tasks()), + + Info1 = fabric2_active_tasks:get_active_task_info(Data1), + Info2 = Info1#{<<"y">> => 2}, + Data2 = fabric2_active_tasks:update_active_task_info(Data1, Info2), + {ok, Job3} = couch_jobs:update(undefined, Job2, Data2), + ?assertEqual([#{<<"x">> => 1, <<"y">> => 2}], + fabric2_active_tasks:get_active_tasks()), + ok = couch_jobs:finish(undefined, Job3). + + +only_running_tasks_appear(_) -> + {ok, Job1, Data} = couch_jobs:accept(?JOB_TYPE), + + Info = #{<<"x">> => 1}, + Data1 = fabric2_active_tasks:update_active_task_info(Data, Info), + {ok, Job2} = couch_jobs:update(undefined, Job1, Data1), + + ?assertEqual([#{<<"x">> => 1}], fabric2_active_tasks:get_active_tasks()), + {ok, _} = couch_jobs:resubmit(undefined, Job2), + + ok = couch_jobs:finish(undefined, Job2), + + ?assertEqual([], fabric2_active_tasks:get_active_tasks()), + {ok, Job3, #{}} = couch_jobs:accept(?JOB_TYPE), + ?assertEqual([#{<<"x">> => 1}], fabric2_active_tasks:get_active_tasks()), + + ok = couch_jobs:finish(undefined, Job3), + ?assertEqual([], fabric2_active_tasks:get_active_tasks()). -- cgit v1.2.1 From 253d64adff235826e176f82d7cd081ad45bc0cab Mon Sep 17 00:00:00 2001 From: jiangph Date: Tue, 1 Sep 2020 13:16:15 +0800 Subject: Allow to continue to cleanup search index even if there is invalid ddoc In some situation where design document for search index created by customer is not valid, the _search_cleanup endpoint will stop to clean up. This will leave some search index orphan. The change is to allow to continue to clean up search index even if there is invalid design document for search. --- src/dreyfus/src/dreyfus_fabric_cleanup.erl | 16 ++++++++++------ src/dreyfus/test/elixir/test/search_test.exs | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/dreyfus/src/dreyfus_fabric_cleanup.erl b/src/dreyfus/src/dreyfus_fabric_cleanup.erl index 2840a2f2d..681712748 100644 --- a/src/dreyfus/src/dreyfus_fabric_cleanup.erl +++ b/src/dreyfus/src/dreyfus_fabric_cleanup.erl @@ -30,12 +30,16 @@ go(DbName) -> ok. active_sigs(#doc{body={Fields}}=Doc) -> - {RawIndexes} = couch_util:get_value(<<"indexes">>, Fields, {[]}), - {IndexNames, _} = lists:unzip(RawIndexes), - [begin - {ok, Index} = dreyfus_index:design_doc_to_index(Doc, IndexName), - Index#index.sig - end || IndexName <- IndexNames]. + try + {RawIndexes} = couch_util:get_value(<<"indexes">>, Fields, {[]}), + {IndexNames, _} = lists:unzip(RawIndexes), + [begin + {ok, Index} = dreyfus_index:design_doc_to_index(Doc, IndexName), + Index#index.sig + end || IndexName <- IndexNames] + catch error:{badmatch, _Error} -> + [] + end. cleanup_local_purge_doc(DbName, ActiveSigs) -> {ok, BaseDir} = clouseau_rpc:get_root_dir(), diff --git a/src/dreyfus/test/elixir/test/search_test.exs b/src/dreyfus/test/elixir/test/search_test.exs index e524a5cf4..829b3395f 100644 --- a/src/dreyfus/test/elixir/test/search_test.exs +++ b/src/dreyfus/test/elixir/test/search_test.exs @@ -37,6 +37,20 @@ defmodule SearchTest do assert Map.has_key?(resp.body, "ok") == true end + def create_invalid_ddoc(db_name, opts \\ %{}) do + invalid_ddoc = %{ + :indexes => [ + %{"name" => "foo", "ddoc" => "bar", "type" => "text"}, + ] + } + + ddoc = Enum.into(opts, invalid_ddoc) + + resp = Couch.put("/#{db_name}/_design/search", body: ddoc) + assert resp.status_code in [201, 202] + assert Map.has_key?(resp.body, "ok") == true + end + def get_items (resp) do %{:body => %{"rows" => rows}} = resp Enum.map(rows, fn row -> row["doc"]["item"] end) @@ -198,4 +212,15 @@ defmodule SearchTest do ids = get_items(resp) assert Enum.sort(ids) == ["apple"] end + + @tag :with_db + test "clean up search index with invalid design document", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + create_invalid_ddoc(db_name) + + resp = Couch.post("/#{db_name}/_search_cleanup") + assert resp.status_code in [201, 202] + end end -- cgit v1.2.1 From a57b7170099e1a6830cef53f3ffff6c874d7b75e Mon Sep 17 00:00:00 2001 From: Alessio Biancalana Date: Mon, 31 Aug 2020 19:01:49 +0200 Subject: Tag elixir tests into meaningful groups --- test/elixir/test/all_docs_test.exs | 1 + test/elixir/test/attachment_names_test.exs | 1 + test/elixir/test/attachment_paths_test.exs | 1 + test/elixir/test/attachment_ranges_test.exs | 1 + test/elixir/test/attachment_views_test.exs | 1 + test/elixir/test/attachments_multipart_test.exs | 1 + test/elixir/test/attachments_test.exs | 1 + test/elixir/test/auth_cache_test.exs | 1 + test/elixir/test/basics_test.exs | 1 + test/elixir/test/batch_save_test.exs | 1 + test/elixir/test/bulk_docs_test.exs | 1 + test/elixir/test/changes_async_test.exs | 1 + test/elixir/test/changes_test.exs | 1 + test/elixir/test/cluster_with_quorum_test.exs | 1 + test/elixir/test/cluster_without_quorum_test.exs | 1 + test/elixir/test/coffee_test.exs | 1 + test/elixir/test/compact_test.exs | 1 + test/elixir/test/config_test.exs | 1 + test/elixir/test/conflicts_test.exs | 1 + test/elixir/test/cookie_auth_test.exs | 1 + test/elixir/test/copy_doc_test.exs | 1 + test/elixir/test/design_docs_query_test.exs | 1 + test/elixir/test/design_docs_test.exs | 1 + test/elixir/test/design_options_test.exs | 1 + test/elixir/test/design_paths_test.exs | 1 + test/elixir/test/erlang_views_test.exs | 1 + test/elixir/test/etags_head_test.exs | 1 + test/elixir/test/form_submit_test.exs | 1 + test/elixir/test/helper_test.exs | 3 +++ test/elixir/test/http_test.exs | 1 + test/elixir/test/invalid_docids_test.exs | 1 + test/elixir/test/jsonp_test.exs | 1 + test/elixir/test/jwtauth_test.exs | 1 + test/elixir/test/large_docs_text.exs | 2 ++ test/elixir/test/local_docs_test.exs | 1 + test/elixir/test/lots_of_docs_test.exs | 2 ++ test/elixir/test/method_override_test.exs | 1 + test/elixir/test/multiple_rows_test.exs | 1 + test/elixir/test/partition_all_docs_test.exs | 3 +++ test/elixir/test/partition_crud_test.exs | 3 +++ test/elixir/test/partition_ddoc_test.exs | 3 +++ test/elixir/test/partition_design_docs_test.exs | 3 +++ test/elixir/test/partition_mango_test.exs | 4 ++++ test/elixir/test/partition_size_limit_test.exs | 3 +++ test/elixir/test/partition_size_test.exs | 3 +++ test/elixir/test/partition_view_test.exs | 3 +++ test/elixir/test/partition_view_update_test.exs | 4 ++++ test/elixir/test/proxyauth_test.exs | 1 + test/elixir/test/purge_test.exs | 1 + test/elixir/test/reader_acl_test.exs | 1 + test/elixir/test/recreate_doc_test.exs | 1 + test/elixir/test/reduce_builtin_test.exs | 1 + test/elixir/test/reduce_false_test.exs | 1 + test/elixir/test/reduce_test.exs | 1 + test/elixir/test/replication_test.exs | 3 +++ test/elixir/test/replicator_db_bad_rep_id_test.exs | 3 +++ test/elixir/test/replicator_db_by_doc_id_test.exs | 3 +++ test/elixir/test/reshard_all_docs_test.exs | 2 ++ test/elixir/test/reshard_basic_test.exs | 2 ++ test/elixir/test/reshard_changes_feed.exs | 2 ++ test/elixir/test/rev_stemming_test.exs | 1 + test/elixir/test/rewrite_test.exs | 1 + test/elixir/test/security_validation_test.exs | 1 + test/elixir/test/update_documents_test.exs | 2 ++ test/elixir/test/users_db_test.exs | 1 + test/elixir/test/utf8_test.exs | 1 + test/elixir/test/uuids_test.exs | 3 +++ test/elixir/test/view_collation_raw_test.exs | 2 ++ test/elixir/test/view_collation_test.exs | 2 ++ test/elixir/test/view_compaction_test.exs | 3 +++ test/elixir/test/view_multi_key_all_docs_test.exs | 2 ++ test/elixir/test/view_multi_key_design_test.exs | 2 ++ test/elixir/test/view_offsets_test.exs | 1 + test/elixir/test/view_pagination_test.exs | 1 + test/elixir/test/view_sandboxing_test.exs | 2 ++ test/elixir/test/view_test.exs | 1 + test/elixir/test/view_update_seq_test.exs | 1 + 77 files changed, 120 insertions(+) diff --git a/test/elixir/test/all_docs_test.exs b/test/elixir/test/all_docs_test.exs index a091dce55..b9fb6f241 100644 --- a/test/elixir/test/all_docs_test.exs +++ b/test/elixir/test/all_docs_test.exs @@ -2,6 +2,7 @@ defmodule AllDocsTest do use CouchTestCase @moduletag :all_docs + @moduletag kind: :single_node @moduledoc """ Test CouchDB _all_docs diff --git a/test/elixir/test/attachment_names_test.exs b/test/elixir/test/attachment_names_test.exs index 4593a8504..66596c865 100644 --- a/test/elixir/test/attachment_names_test.exs +++ b/test/elixir/test/attachment_names_test.exs @@ -2,6 +2,7 @@ defmodule AttachmentNamesTest do use CouchTestCase @moduletag :attachments + @moduletag kind: :single_node @good_doc """ { diff --git a/test/elixir/test/attachment_paths_test.exs b/test/elixir/test/attachment_paths_test.exs index b776feabf..4c79eca1c 100644 --- a/test/elixir/test/attachment_paths_test.exs +++ b/test/elixir/test/attachment_paths_test.exs @@ -2,6 +2,7 @@ defmodule AttachmentPathsTest do use CouchTestCase @moduletag :attachments + @moduletag kind: :single_node @bin_att_doc """ { diff --git a/test/elixir/test/attachment_ranges_test.exs b/test/elixir/test/attachment_ranges_test.exs index 01c1239bc..de39836b0 100644 --- a/test/elixir/test/attachment_ranges_test.exs +++ b/test/elixir/test/attachment_ranges_test.exs @@ -2,6 +2,7 @@ defmodule AttachmentRangesTest do use CouchTestCase @moduletag :attachments + @moduletag kind: :single_node @moduledoc """ Test CouchDB attachment range requests diff --git a/test/elixir/test/attachment_views_test.exs b/test/elixir/test/attachment_views_test.exs index 3da62f042..044008531 100644 --- a/test/elixir/test/attachment_views_test.exs +++ b/test/elixir/test/attachment_views_test.exs @@ -2,6 +2,7 @@ defmodule AttachmentViewTest do use CouchTestCase @moduletag :attachments + @moduletag kind: :single_node @moduledoc """ Test CouchDB attachment views requests diff --git a/test/elixir/test/attachments_multipart_test.exs b/test/elixir/test/attachments_multipart_test.exs index f7d5d9519..f635377a1 100644 --- a/test/elixir/test/attachments_multipart_test.exs +++ b/test/elixir/test/attachments_multipart_test.exs @@ -2,6 +2,7 @@ defmodule AttachmentMultipartTest do use CouchTestCase @moduletag :attachments + @moduletag kind: :single_node @moduledoc """ Test CouchDB attachment multipart requests diff --git a/test/elixir/test/attachments_test.exs b/test/elixir/test/attachments_test.exs index 020003377..8e7f7d352 100644 --- a/test/elixir/test/attachments_test.exs +++ b/test/elixir/test/attachments_test.exs @@ -2,6 +2,7 @@ defmodule AttachmentsTest do use CouchTestCase @moduletag :attachments + @moduletag kind: :single_node # MD5 Digests of compressible attachments and therefore Etags # will vary depending on platform gzip implementation. diff --git a/test/elixir/test/auth_cache_test.exs b/test/elixir/test/auth_cache_test.exs index 8b7c29c71..5c446f039 100644 --- a/test/elixir/test/auth_cache_test.exs +++ b/test/elixir/test/auth_cache_test.exs @@ -2,6 +2,7 @@ defmodule AuthCacheTest do use CouchTestCase @moduletag :authentication + @moduletag kind: :single_node @tag :pending @tag :with_db diff --git a/test/elixir/test/basics_test.exs b/test/elixir/test/basics_test.exs index a03fa2922..359ba6040 100644 --- a/test/elixir/test/basics_test.exs +++ b/test/elixir/test/basics_test.exs @@ -2,6 +2,7 @@ defmodule BasicsTest do use CouchTestCase @moduletag :basics + @moduletag kind: :single_node @moduledoc """ Test CouchDB basics. diff --git a/test/elixir/test/batch_save_test.exs b/test/elixir/test/batch_save_test.exs index 030fcdfba..f13fcdc9f 100644 --- a/test/elixir/test/batch_save_test.exs +++ b/test/elixir/test/batch_save_test.exs @@ -2,6 +2,7 @@ defmodule BatchSaveTest do use CouchTestCase @moduletag :batch_save + @moduletag kind: :performance @moduledoc """ Test CouchDB batch save diff --git a/test/elixir/test/bulk_docs_test.exs b/test/elixir/test/bulk_docs_test.exs index 1a7c11045..cbbc53340 100644 --- a/test/elixir/test/bulk_docs_test.exs +++ b/test/elixir/test/bulk_docs_test.exs @@ -2,6 +2,7 @@ defmodule BulkDocsTest do use CouchTestCase @moduletag :bulk_docs + @moduletag kind: :single_node @moduledoc """ Test CouchDB bulk docs diff --git a/test/elixir/test/changes_async_test.exs b/test/elixir/test/changes_async_test.exs index 07afcdc7c..36876aedf 100644 --- a/test/elixir/test/changes_async_test.exs +++ b/test/elixir/test/changes_async_test.exs @@ -2,6 +2,7 @@ defmodule ChangesAsyncTest do use CouchTestCase @moduletag :changes + @moduletag kind: :single_node @moduledoc """ Test CouchDB /{db}/_changes diff --git a/test/elixir/test/changes_test.exs b/test/elixir/test/changes_test.exs index 5bb376b9c..ad579a99a 100644 --- a/test/elixir/test/changes_test.exs +++ b/test/elixir/test/changes_test.exs @@ -2,6 +2,7 @@ defmodule ChangesTest do use CouchTestCase @moduletag :changes + @moduletag kind: :single_node @moduledoc """ Test CouchDB /{db}/_changes diff --git a/test/elixir/test/cluster_with_quorum_test.exs b/test/elixir/test/cluster_with_quorum_test.exs index fc3b28a0b..dc3d66be3 100644 --- a/test/elixir/test/cluster_with_quorum_test.exs +++ b/test/elixir/test/cluster_with_quorum_test.exs @@ -2,6 +2,7 @@ defmodule WithQuorumTest do use CouchTestCase @moduletag :with_quorum_test + @moduletag kind: :cluster @moduledoc """ Test CouchDB API in a cluster without quorum. diff --git a/test/elixir/test/cluster_without_quorum_test.exs b/test/elixir/test/cluster_without_quorum_test.exs index e0095c351..63371f1a6 100644 --- a/test/elixir/test/cluster_without_quorum_test.exs +++ b/test/elixir/test/cluster_without_quorum_test.exs @@ -2,6 +2,7 @@ defmodule WithoutQuorumTest do use CouchTestCase @moduletag :without_quorum_test + @moduletag kind: :degraded_cluster @moduledoc """ Test CouchDB API in a cluster without quorum. diff --git a/test/elixir/test/coffee_test.exs b/test/elixir/test/coffee_test.exs index 3b26f5e59..3c7a1052b 100644 --- a/test/elixir/test/coffee_test.exs +++ b/test/elixir/test/coffee_test.exs @@ -2,6 +2,7 @@ defmodule CoffeeTest do use CouchTestCase @moduletag :coffee + @moduletag kind: :single_node @moduledoc """ Test basic coffeescript functionality. diff --git a/test/elixir/test/compact_test.exs b/test/elixir/test/compact_test.exs index 461a1d347..18aeab2de 100644 --- a/test/elixir/test/compact_test.exs +++ b/test/elixir/test/compact_test.exs @@ -2,6 +2,7 @@ defmodule CompactTest do use CouchTestCase @moduletag :compact + @moduletag kind: :single_node @moduledoc """ Test CouchDB compaction diff --git a/test/elixir/test/config_test.exs b/test/elixir/test/config_test.exs index 53c5bc82e..bb89d8683 100644 --- a/test/elixir/test/config_test.exs +++ b/test/elixir/test/config_test.exs @@ -2,6 +2,7 @@ defmodule ConfigTest do use CouchTestCase @moduletag :config + @moduletag kind: :single_node @moduledoc """ Test CouchDB config API diff --git a/test/elixir/test/conflicts_test.exs b/test/elixir/test/conflicts_test.exs index a45f5c4ed..adf16bc4d 100644 --- a/test/elixir/test/conflicts_test.exs +++ b/test/elixir/test/conflicts_test.exs @@ -2,6 +2,7 @@ defmodule RevisionTest do use CouchTestCase @moduletag :conflicts + @moduletag kind: :single_node @moduledoc """ Test CouchDB conflicts diff --git a/test/elixir/test/cookie_auth_test.exs b/test/elixir/test/cookie_auth_test.exs index abc0fd767..87de1abd3 100644 --- a/test/elixir/test/cookie_auth_test.exs +++ b/test/elixir/test/cookie_auth_test.exs @@ -2,6 +2,7 @@ defmodule CookieAuthTest do use CouchTestCase @moduletag :authentication + @moduletag kind: :single_node @users_db "_users" diff --git a/test/elixir/test/copy_doc_test.exs b/test/elixir/test/copy_doc_test.exs index 4641ff6ea..f227fdac5 100644 --- a/test/elixir/test/copy_doc_test.exs +++ b/test/elixir/test/copy_doc_test.exs @@ -2,6 +2,7 @@ defmodule CopyDocTest do use CouchTestCase @moduletag :copy_doc + @moduletag kind: :single_node @moduledoc """ Test CouchDB Copy Doc diff --git a/test/elixir/test/design_docs_query_test.exs b/test/elixir/test/design_docs_query_test.exs index b439a2e02..9784f8deb 100644 --- a/test/elixir/test/design_docs_query_test.exs +++ b/test/elixir/test/design_docs_query_test.exs @@ -2,6 +2,7 @@ defmodule DesignDocsQueryTest do use CouchTestCase @moduletag :design_docs + @moduletag kind: :single_node @moduledoc """ Test CouchDB /{db}/_design_docs diff --git a/test/elixir/test/design_docs_test.exs b/test/elixir/test/design_docs_test.exs index 258f5f72f..86bdd5aa5 100644 --- a/test/elixir/test/design_docs_test.exs +++ b/test/elixir/test/design_docs_test.exs @@ -2,6 +2,7 @@ defmodule DesignDocsTest do use CouchTestCase @moduletag :design_docs + @moduletag kind: :single_node @design_doc %{ _id: "_design/test", diff --git a/test/elixir/test/design_options_test.exs b/test/elixir/test/design_options_test.exs index 95a938e38..feb47714d 100644 --- a/test/elixir/test/design_options_test.exs +++ b/test/elixir/test/design_options_test.exs @@ -2,6 +2,7 @@ defmodule DesignOptionsTest do use CouchTestCase @moduletag :design_docs + @moduletag kind: :single_node @moduledoc """ Test CouchDB design documents options include_design and local_seq diff --git a/test/elixir/test/design_paths_test.exs b/test/elixir/test/design_paths_test.exs index b3e10c165..f90172a08 100644 --- a/test/elixir/test/design_paths_test.exs +++ b/test/elixir/test/design_paths_test.exs @@ -2,6 +2,7 @@ defmodule DesignPathTest do use CouchTestCase @moduletag :design_docs + @moduletag kind: :single_node @moduledoc """ Test CouchDB design documents path diff --git a/test/elixir/test/erlang_views_test.exs b/test/elixir/test/erlang_views_test.exs index 3346c2274..afe9d6ccb 100644 --- a/test/elixir/test/erlang_views_test.exs +++ b/test/elixir/test/erlang_views_test.exs @@ -2,6 +2,7 @@ defmodule ErlangViewsTest do use CouchTestCase @moduletag :erlang_views + @moduletag kind: :single_node @moduledoc """ basic 'smoke tests' of erlang views. diff --git a/test/elixir/test/etags_head_test.exs b/test/elixir/test/etags_head_test.exs index 9b9ff8bb0..beb12bdd0 100644 --- a/test/elixir/test/etags_head_test.exs +++ b/test/elixir/test/etags_head_test.exs @@ -2,6 +2,7 @@ defmodule EtagsHeadTest do use CouchTestCase @moduletag :etags + @moduletag kind: :single_node @tag :with_db test "etag header on creation", context do diff --git a/test/elixir/test/form_submit_test.exs b/test/elixir/test/form_submit_test.exs index 1baf947ac..099f395fc 100644 --- a/test/elixir/test/form_submit_test.exs +++ b/test/elixir/test/form_submit_test.exs @@ -2,6 +2,7 @@ defmodule FormSubmitTest do use CouchTestCase @moduletag :form_submit + @moduletag kind: :single_node @moduledoc """ Test that form submission is invalid diff --git a/test/elixir/test/helper_test.exs b/test/elixir/test/helper_test.exs index 19d70eac8..1e498a15c 100644 --- a/test/elixir/test/helper_test.exs +++ b/test/elixir/test/helper_test.exs @@ -5,6 +5,9 @@ defmodule HelperTest do Test helper code """ + @moduletag :helper + @moduletag kind: :single_node + test "retry_until handles boolean conditions", _context do retry_until(fn -> true diff --git a/test/elixir/test/http_test.exs b/test/elixir/test/http_test.exs index 09d743060..14cecfe7b 100644 --- a/test/elixir/test/http_test.exs +++ b/test/elixir/test/http_test.exs @@ -2,6 +2,7 @@ defmodule HttpTest do use CouchTestCase @moduletag :http + @moduletag kind: :single_node @tag :with_db test "location header", context do diff --git a/test/elixir/test/invalid_docids_test.exs b/test/elixir/test/invalid_docids_test.exs index edce5cc65..c1d30c5fc 100644 --- a/test/elixir/test/invalid_docids_test.exs +++ b/test/elixir/test/invalid_docids_test.exs @@ -2,6 +2,7 @@ defmodule InvalidDocIDsTest do use CouchTestCase @moduletag :invalid_doc_ids + @moduletag kind: :single_node @moduledoc """ Test invalid document ids diff --git a/test/elixir/test/jsonp_test.exs b/test/elixir/test/jsonp_test.exs index 3fdc2ba5f..2e1934a22 100644 --- a/test/elixir/test/jsonp_test.exs +++ b/test/elixir/test/jsonp_test.exs @@ -2,6 +2,7 @@ defmodule JsonpTest do use CouchTestCase @moduletag :jsonp + @moduletag kind: :single_node @tag :with_db test "jsonp not configured callbacks", context do diff --git a/test/elixir/test/jwtauth_test.exs b/test/elixir/test/jwtauth_test.exs index 7281ed146..7b6fe4a33 100644 --- a/test/elixir/test/jwtauth_test.exs +++ b/test/elixir/test/jwtauth_test.exs @@ -2,6 +2,7 @@ defmodule JwtAuthTest do use CouchTestCase @moduletag :authentication + @moduletag kind: :single_node test "jwt auth with HMAC secret", _context do diff --git a/test/elixir/test/large_docs_text.exs b/test/elixir/test/large_docs_text.exs index 4d2c5dede..b80add60e 100644 --- a/test/elixir/test/large_docs_text.exs +++ b/test/elixir/test/large_docs_text.exs @@ -2,6 +2,8 @@ defmodule LargeDocsTest do use CouchTestCase @moduletag :large_docs + @moduletag kind: :single_node + @long_string "0123456789\n" @moduledoc """ diff --git a/test/elixir/test/local_docs_test.exs b/test/elixir/test/local_docs_test.exs index ff071f3e6..d7ed137c8 100644 --- a/test/elixir/test/local_docs_test.exs +++ b/test/elixir/test/local_docs_test.exs @@ -2,6 +2,7 @@ defmodule LocalDocsTest do use CouchTestCase @moduletag :local_docs + @moduletag kind: :single_node @moduledoc """ Test CouchDB _local_docs diff --git a/test/elixir/test/lots_of_docs_test.exs b/test/elixir/test/lots_of_docs_test.exs index c0cc99198..6f2e9f7c8 100644 --- a/test/elixir/test/lots_of_docs_test.exs +++ b/test/elixir/test/lots_of_docs_test.exs @@ -2,6 +2,8 @@ defmodule LotsOfDocsTest do use CouchTestCase @moduletag :lots_of_docs + @moduletag kind: :performance + @docs_range 0..499 @moduledoc """ diff --git a/test/elixir/test/method_override_test.exs b/test/elixir/test/method_override_test.exs index c67fe3966..e264a870a 100644 --- a/test/elixir/test/method_override_test.exs +++ b/test/elixir/test/method_override_test.exs @@ -2,6 +2,7 @@ defmodule MethodOverrideTest do use CouchTestCase @moduletag :http + @moduletag kind: :single_node @moduledoc """ Allow broken HTTP clients to fake a full method vocabulary with an diff --git a/test/elixir/test/multiple_rows_test.exs b/test/elixir/test/multiple_rows_test.exs index 646682823..422b254b0 100644 --- a/test/elixir/test/multiple_rows_test.exs +++ b/test/elixir/test/multiple_rows_test.exs @@ -2,6 +2,7 @@ defmodule MultipleRowsTest do use CouchTestCase @moduletag :multiple_rows + @moduletag kind: :single_node @north_carolina_cities ["Charlotte", "Raleigh"] @massachussets_cities ["Boston", "Lowell", "Worcester", "Cambridge", "Springfield"] diff --git a/test/elixir/test/partition_all_docs_test.exs b/test/elixir/test/partition_all_docs_test.exs index 816a8d6ed..8abc635f6 100644 --- a/test/elixir/test/partition_all_docs_test.exs +++ b/test/elixir/test/partition_all_docs_test.exs @@ -6,6 +6,9 @@ defmodule PartitionAllDocsTest do Test Partition functionality for for all_docs """ + @moduletag :partition + @moduletag kind: :cluster + setup_all do db_name = random_db_name() {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) diff --git a/test/elixir/test/partition_crud_test.exs b/test/elixir/test/partition_crud_test.exs index 7e32abbdc..f4a23ed28 100644 --- a/test/elixir/test/partition_crud_test.exs +++ b/test/elixir/test/partition_crud_test.exs @@ -1,6 +1,9 @@ defmodule PartitionCrudTest do use CouchTestCase + @moduletag :partition + @moduletag kind: :cluster + @tag :with_partitioned_db test "Sets partition in db info", context do db_name = context[:db_name] diff --git a/test/elixir/test/partition_ddoc_test.exs b/test/elixir/test/partition_ddoc_test.exs index 9fdfb9260..353d52af1 100644 --- a/test/elixir/test/partition_ddoc_test.exs +++ b/test/elixir/test/partition_ddoc_test.exs @@ -4,6 +4,9 @@ defmodule PartitionDDocTest do @moduledoc """ Test partition design doc interactions """ + + @moduletag :partition + @moduletag kind: :cluster setup do db_name = random_db_name() diff --git a/test/elixir/test/partition_design_docs_test.exs b/test/elixir/test/partition_design_docs_test.exs index 4ccd63fe0..0de95b1fc 100644 --- a/test/elixir/test/partition_design_docs_test.exs +++ b/test/elixir/test/partition_design_docs_test.exs @@ -5,6 +5,9 @@ defmodule PartitionDesignDocsTest do Test Partition functionality for partition design docs """ + @moduletag :partition + @moduletag kind: :cluster + @tag :with_partitioned_db test "/_partition/:pk/_design/doc 404", context do db_name = context[:db_name] diff --git a/test/elixir/test/partition_mango_test.exs b/test/elixir/test/partition_mango_test.exs index 9e4f1e783..e203bac5b 100644 --- a/test/elixir/test/partition_mango_test.exs +++ b/test/elixir/test/partition_mango_test.exs @@ -5,6 +5,10 @@ defmodule PartitionMangoTest do @moduledoc """ Test Partition functionality for mango """ + + @moduletag :partition + @moduletag kind: :cluster + def create_index(db_name, fields \\ ["some"], opts \\ %{}) do default_index = %{ index: %{ diff --git a/test/elixir/test/partition_size_limit_test.exs b/test/elixir/test/partition_size_limit_test.exs index 6ef686611..de857bd31 100644 --- a/test/elixir/test/partition_size_limit_test.exs +++ b/test/elixir/test/partition_size_limit_test.exs @@ -5,6 +5,9 @@ defmodule PartitionSizeLimitTest do Test Partition size limit functionality """ + @moduletag :partition + @moduletag kind: :cluster + @max_size 10_240 setup do diff --git a/test/elixir/test/partition_size_test.exs b/test/elixir/test/partition_size_test.exs index 2ba8139fc..5dfa5b5d8 100644 --- a/test/elixir/test/partition_size_test.exs +++ b/test/elixir/test/partition_size_test.exs @@ -4,6 +4,9 @@ defmodule PartitionSizeTest do @moduledoc """ Test Partition size functionality """ + + @moduletag :partition + @moduletag kind: :cluster setup do db_name = random_db_name() diff --git a/test/elixir/test/partition_view_test.exs b/test/elixir/test/partition_view_test.exs index 0a55c2443..bea5e1d8c 100644 --- a/test/elixir/test/partition_view_test.exs +++ b/test/elixir/test/partition_view_test.exs @@ -5,6 +5,9 @@ defmodule ViewPartitionTest do @moduledoc """ Test Partition functionality for views """ + + @moduletag :partition + @moduletag kind: :cluster setup_all do db_name = random_db_name() diff --git a/test/elixir/test/partition_view_update_test.exs b/test/elixir/test/partition_view_update_test.exs index 5c1cb09f0..390e14198 100644 --- a/test/elixir/test/partition_view_update_test.exs +++ b/test/elixir/test/partition_view_update_test.exs @@ -5,6 +5,10 @@ defmodule PartitionViewUpdateTest do @moduledoc """ Test Partition view update functionality """ + + @moduletag :partition + @moduletag kind: :cluster + @tag :with_partitioned_db test "view updates properly remove old keys", context do db_name = context[:db_name] diff --git a/test/elixir/test/proxyauth_test.exs b/test/elixir/test/proxyauth_test.exs index 6f2d49a53..b152e9bd5 100644 --- a/test/elixir/test/proxyauth_test.exs +++ b/test/elixir/test/proxyauth_test.exs @@ -2,6 +2,7 @@ defmodule ProxyAuthTest do use CouchTestCase @moduletag :authentication + @moduletag kind: :single_node @tag :with_db test "proxy auth with secret", context do diff --git a/test/elixir/test/purge_test.exs b/test/elixir/test/purge_test.exs index 5fc03f16b..1a069083b 100644 --- a/test/elixir/test/purge_test.exs +++ b/test/elixir/test/purge_test.exs @@ -2,6 +2,7 @@ defmodule PurgeTest do use CouchTestCase @moduletag :purge + @moduletag kind: :single_node @tag :with_db test "purge documents", context do diff --git a/test/elixir/test/reader_acl_test.exs b/test/elixir/test/reader_acl_test.exs index f65e7cbf6..3cbd5c886 100644 --- a/test/elixir/test/reader_acl_test.exs +++ b/test/elixir/test/reader_acl_test.exs @@ -2,6 +2,7 @@ defmodule ReaderACLTest do use CouchTestCase @moduletag :authentication + @moduletag kind: :single_node @users_db_name "custom-users" @password "funnybone" diff --git a/test/elixir/test/recreate_doc_test.exs b/test/elixir/test/recreate_doc_test.exs index 08f92293e..9ee914dc2 100644 --- a/test/elixir/test/recreate_doc_test.exs +++ b/test/elixir/test/recreate_doc_test.exs @@ -2,6 +2,7 @@ defmodule RecreateDocTest do use CouchTestCase @moduletag :recreate_doc + @moduletag kind: :single_node @moduledoc """ Test CouchDB document recreation diff --git a/test/elixir/test/reduce_builtin_test.exs b/test/elixir/test/reduce_builtin_test.exs index d13ada1b3..410ca5989 100644 --- a/test/elixir/test/reduce_builtin_test.exs +++ b/test/elixir/test/reduce_builtin_test.exs @@ -2,6 +2,7 @@ defmodule ReduceBuiltinTest do use CouchTestCase @moduletag :views + @moduletag kind: :single_node @moduledoc """ Test CouchDB view builtin reduce functions diff --git a/test/elixir/test/reduce_false_test.exs b/test/elixir/test/reduce_false_test.exs index 675c11dbd..3cf4ccb49 100644 --- a/test/elixir/test/reduce_false_test.exs +++ b/test/elixir/test/reduce_false_test.exs @@ -2,6 +2,7 @@ defmodule ReduceFalseTest do use CouchTestCase @moduletag :views + @moduletag kind: :single_node @moduledoc """ Test CouchDB view without reduces diff --git a/test/elixir/test/reduce_test.exs b/test/elixir/test/reduce_test.exs index 22f2fa6f2..7b5641ccf 100644 --- a/test/elixir/test/reduce_test.exs +++ b/test/elixir/test/reduce_test.exs @@ -2,6 +2,7 @@ defmodule ReduceTest do use CouchTestCase @moduletag :views + @moduletag kind: :single_node @moduledoc """ Test CouchDB view reduces diff --git a/test/elixir/test/replication_test.exs b/test/elixir/test/replication_test.exs index 075f65bfa..7b462bdfc 100644 --- a/test/elixir/test/replication_test.exs +++ b/test/elixir/test/replication_test.exs @@ -5,6 +5,9 @@ defmodule ReplicationTest do Test CouchDB Replication Behavior This is a port of the view_collation.js suite """ + + @moduletag kind: :cluster + @moduletag :replication # TODO: Parameterize these @db_pairs_prefixes [ diff --git a/test/elixir/test/replicator_db_bad_rep_id_test.exs b/test/elixir/test/replicator_db_bad_rep_id_test.exs index 693c9d85d..9477eb183 100644 --- a/test/elixir/test/replicator_db_bad_rep_id_test.exs +++ b/test/elixir/test/replicator_db_bad_rep_id_test.exs @@ -5,6 +5,9 @@ defmodule ReplicationBadIdTest do This is a port of the replicator_db_bad_rep_id.js suite """ + @moduletag :replication + @moduletag kind: :cluster + @docs [ %{ _id: "foo1", diff --git a/test/elixir/test/replicator_db_by_doc_id_test.exs b/test/elixir/test/replicator_db_by_doc_id_test.exs index 2e68f2ca9..681ed02df 100644 --- a/test/elixir/test/replicator_db_by_doc_id_test.exs +++ b/test/elixir/test/replicator_db_by_doc_id_test.exs @@ -5,6 +5,9 @@ defmodule ReplicatorDBByDocIdTest do This is a port of the replicator_db_by_doc_id.js suite """ + @moduletag :replication + @moduletag kind: :cluster + @docs [ %{ _id: "foo1", diff --git a/test/elixir/test/reshard_all_docs_test.exs b/test/elixir/test/reshard_all_docs_test.exs index ab8c6b75b..042437e6e 100644 --- a/test/elixir/test/reshard_all_docs_test.exs +++ b/test/elixir/test/reshard_all_docs_test.exs @@ -6,6 +6,8 @@ defmodule ReshardAllDocsTest do Test _all_docs interaction with resharding """ + @moduletag kind: :cluster + setup do db = random_db_name() {:ok, _} = create_db(db, query: %{q: 2}) diff --git a/test/elixir/test/reshard_basic_test.exs b/test/elixir/test/reshard_basic_test.exs index dcb198c46..92f8145bf 100644 --- a/test/elixir/test/reshard_basic_test.exs +++ b/test/elixir/test/reshard_basic_test.exs @@ -5,6 +5,8 @@ defmodule ReshardBasicTest do @moduledoc """ Test resharding basic functionality """ + + @moduletag kind: :cluster setup_all do db1 = random_db_name() diff --git a/test/elixir/test/reshard_changes_feed.exs b/test/elixir/test/reshard_changes_feed.exs index 5498ded7b..ad55e577d 100644 --- a/test/elixir/test/reshard_changes_feed.exs +++ b/test/elixir/test/reshard_changes_feed.exs @@ -6,6 +6,8 @@ defmodule ReshardChangesFeedTest do Test _changes interaction with resharding """ + @moduletag kind: :cluster + setup do db = random_db_name() {:ok, _} = create_db(db, query: %{q: 2}) diff --git a/test/elixir/test/rev_stemming_test.exs b/test/elixir/test/rev_stemming_test.exs index 9a16d481d..1fb745ead 100644 --- a/test/elixir/test/rev_stemming_test.exs +++ b/test/elixir/test/rev_stemming_test.exs @@ -2,6 +2,7 @@ defmodule RevStemmingTest do use CouchTestCase @moduletag :revs + @moduletag kind: :single_node @moduledoc """ This is a port of the rev_stemming.js suite diff --git a/test/elixir/test/rewrite_test.exs b/test/elixir/test/rewrite_test.exs index 1960ddfde..daa2a80a8 100644 --- a/test/elixir/test/rewrite_test.exs +++ b/test/elixir/test/rewrite_test.exs @@ -2,6 +2,7 @@ defmodule RewriteTest do use CouchTestCase @moduletag :js_engine + @moduletag kind: :single_node @moduledoc """ Test CouchDB rewrites diff --git a/test/elixir/test/security_validation_test.exs b/test/elixir/test/security_validation_test.exs index 0df3a780b..036d4a7a9 100644 --- a/test/elixir/test/security_validation_test.exs +++ b/test/elixir/test/security_validation_test.exs @@ -2,6 +2,7 @@ defmodule SecurityValidationTest do use CouchTestCase @moduletag :security + @moduletag kind: :single_node @moduledoc """ Test CouchDB Security Validations diff --git a/test/elixir/test/update_documents_test.exs b/test/elixir/test/update_documents_test.exs index c29b31a4d..fcbdbeaca 100644 --- a/test/elixir/test/update_documents_test.exs +++ b/test/elixir/test/update_documents_test.exs @@ -1,6 +1,8 @@ defmodule UpdateDocumentsTest do use CouchTestCase + @moduletag kind: :single_node + @ddoc %{ _id: "_design/update", language: "javascript", diff --git a/test/elixir/test/users_db_test.exs b/test/elixir/test/users_db_test.exs index 62877d542..db86b2739 100644 --- a/test/elixir/test/users_db_test.exs +++ b/test/elixir/test/users_db_test.exs @@ -2,6 +2,7 @@ defmodule UsersDbTest do use CouchTestCase @moduletag :authentication + @moduletag kind: :single_node @users_db_name "_users" diff --git a/test/elixir/test/utf8_test.exs b/test/elixir/test/utf8_test.exs index 0e4d8b875..6afaee606 100644 --- a/test/elixir/test/utf8_test.exs +++ b/test/elixir/test/utf8_test.exs @@ -2,6 +2,7 @@ defmodule UTF8Test do use CouchTestCase @moduletag :utf8 + @moduletag kind: :single_node @moduledoc """ Test CouchDB UTF8 support diff --git a/test/elixir/test/uuids_test.exs b/test/elixir/test/uuids_test.exs index bb9369b80..355bed824 100644 --- a/test/elixir/test/uuids_test.exs +++ b/test/elixir/test/uuids_test.exs @@ -6,6 +6,9 @@ defmodule UUIDsTest do This is a port of the uuids.js suite """ + @moduletag :docs + @moduletag kind: :single_node + test "cache busting headers are set" do resp = Couch.get("/_uuids") assert resp.status_code == 200 diff --git a/test/elixir/test/view_collation_raw_test.exs b/test/elixir/test/view_collation_raw_test.exs index ee272d72e..1bcd397d9 100644 --- a/test/elixir/test/view_collation_raw_test.exs +++ b/test/elixir/test/view_collation_raw_test.exs @@ -6,6 +6,8 @@ defmodule ViewCollationRawTest do This is a port of the view_collation_raw.js suite """ + @moduletag kind: :single_node + @values [ # Then numbers 1, diff --git a/test/elixir/test/view_collation_test.exs b/test/elixir/test/view_collation_test.exs index 7563ba416..1e3fec03f 100644 --- a/test/elixir/test/view_collation_test.exs +++ b/test/elixir/test/view_collation_test.exs @@ -6,6 +6,8 @@ defmodule ViewCollationTest do This is a port of the view_collation.js suite """ + @moduletag kind: :single_node + @values [ # Special values sort before all other types :null, diff --git a/test/elixir/test/view_compaction_test.exs b/test/elixir/test/view_compaction_test.exs index d2bf060ba..5bdd2eac1 100644 --- a/test/elixir/test/view_compaction_test.exs +++ b/test/elixir/test/view_compaction_test.exs @@ -5,6 +5,9 @@ defmodule ViewCompactionTest do Test CouchDB View Compaction Behavior This is a port of the view_compaction.js suite """ + + @moduletag kind: :single_node + @num_docs 1000 @ddoc %{ diff --git a/test/elixir/test/view_multi_key_all_docs_test.exs b/test/elixir/test/view_multi_key_all_docs_test.exs index d9fa41e23..6426eb2c2 100644 --- a/test/elixir/test/view_multi_key_all_docs_test.exs +++ b/test/elixir/test/view_multi_key_all_docs_test.exs @@ -1,6 +1,8 @@ defmodule ViewMultiKeyAllDocsTest do use CouchTestCase + @moduletag kind: :single_node + @keys ["10", "15", "30", "37", "50"] setup_all do diff --git a/test/elixir/test/view_multi_key_design_test.exs b/test/elixir/test/view_multi_key_design_test.exs index ab57e89eb..1ea3fe334 100644 --- a/test/elixir/test/view_multi_key_design_test.exs +++ b/test/elixir/test/view_multi_key_design_test.exs @@ -1,5 +1,7 @@ defmodule ViewMultiKeyDesignTest do use CouchTestCase + + @moduletag kind: :single_node @keys [10, 15, 30, 37, 50] diff --git a/test/elixir/test/view_offsets_test.exs b/test/elixir/test/view_offsets_test.exs index 20aa1ca9d..9361a54ae 100644 --- a/test/elixir/test/view_offsets_test.exs +++ b/test/elixir/test/view_offsets_test.exs @@ -2,6 +2,7 @@ defmodule ViewOffsetTest do use CouchTestCase @moduletag :view_offsets + @moduletag kind: :single_node @moduledoc """ Tests about view offsets. diff --git a/test/elixir/test/view_pagination_test.exs b/test/elixir/test/view_pagination_test.exs index 322b653cb..5aa1cd789 100644 --- a/test/elixir/test/view_pagination_test.exs +++ b/test/elixir/test/view_pagination_test.exs @@ -2,6 +2,7 @@ defmodule ViewPaginationTest do use CouchTestCase @moduletag :view_pagination + @moduletag kind: :single_node @moduledoc """ Integration tests for pagination. diff --git a/test/elixir/test/view_sandboxing_test.exs b/test/elixir/test/view_sandboxing_test.exs index af0928efa..99a9c0de4 100644 --- a/test/elixir/test/view_sandboxing_test.exs +++ b/test/elixir/test/view_sandboxing_test.exs @@ -1,6 +1,8 @@ defmodule ViewSandboxingTest do use CouchTestCase + @moduletag kind: :single_node + @document %{integer: 1, string: "1", array: [1, 2, 3]} @tag :with_db diff --git a/test/elixir/test/view_test.exs b/test/elixir/test/view_test.exs index 5fb8c009c..f768cef16 100644 --- a/test/elixir/test/view_test.exs +++ b/test/elixir/test/view_test.exs @@ -2,6 +2,7 @@ defmodule ViewTest do use CouchTestCase @moduletag :view + @moduletag kind: :single_node @moduledoc """ Test CouchDB /{db}/_design/{ddoc}/_view/{view} diff --git a/test/elixir/test/view_update_seq_test.exs b/test/elixir/test/view_update_seq_test.exs index 38b42c7a7..d1eca4038 100644 --- a/test/elixir/test/view_update_seq_test.exs +++ b/test/elixir/test/view_update_seq_test.exs @@ -2,6 +2,7 @@ defmodule ViewUpdateSeqTest do use CouchTestCase @moduletag :view_update_seq + @moduletag kind: :single_node @moduledoc """ This is a port of the view_update_seq.js test suite. -- cgit v1.2.1 From 947308d61d8016d9624d53a1a4dc91b7d92a0464 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 1 Sep 2020 15:28:25 -0400 Subject: Allow creating new deleted documents This makes it compatible with CouchDB <= 3.x where we can create deleted documents. How to check: ``` $ http put $DB1/mydb $ http put $DB1/mydb/foo _deleted:='true' a=b { "id": "foo", "ok": true, "rev": "1-ad7eb689fcae75e7a7edb57dc1f30939" } ``` --- src/fabric/src/fabric2_db.erl | 11 +++++++---- src/fabric/test/fabric2_doc_crud_tests.erl | 12 ++++++++++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 4ac105589..b62f26ec8 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1831,13 +1831,16 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> % Check that a revision was specified if required Doc0RevId = doc_to_revid(Doc0), - if Doc0RevId /= {0, <<>>} orelse WinnerRevId == {0, <<>>} -> ok; true -> + HasRev = Doc0RevId =/= {0, <<>>}, + if HasRev orelse WinnerRevId == {0, <<>>} -> ok; true -> ?RETURN({Doc0, conflict}) end, - % Check that we're not trying to create a deleted doc - if Doc0RevId /= {0, <<>>} orelse not Doc0#doc.deleted -> ok; true -> - ?RETURN({Doc0, conflict}) + % Allow inserting new deleted documents. Only works when the document has + % never existed to match CouchDB 3.x + case not HasRev andalso Doc0#doc.deleted andalso is_map(Winner) of + true -> ?RETURN({Doc0, conflict}); + false -> ok end, % Get the target revision to update diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index ce3757d55..7a24b7d52 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -49,7 +49,7 @@ doc_crud_test_() -> ?TDEF(recreate_doc_basic), ?TDEF(conflict_on_create_new_with_rev), ?TDEF(conflict_on_update_with_no_rev), - ?TDEF(conflict_on_create_as_deleted), + ?TDEF(allow_create_new_as_deleted), ?TDEF(conflict_on_recreate_as_deleted), ?TDEF(conflict_on_extend_deleted), ?TDEF(open_doc_revs_basic), @@ -449,12 +449,20 @@ conflict_on_update_with_no_rev({Db, _}) -> ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc2)). -conflict_on_create_as_deleted({Db, _}) -> +allow_create_new_as_deleted({Db, _}) -> Doc = #doc{ id = fabric2_util:uuid(), deleted = true, body = {[{<<"foo">>, <<"bar">>}]} }, + {ok, {1, Rev}} = fabric2_db:update_doc(Db, Doc), + ?assertEqual({not_found, deleted}, fabric2_db:open_doc(Db, Doc#doc.id)), + Doc1 = Doc#doc{ + revs = {1, [Rev]} + }, + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc#doc.id, [deleted])), + % Only works when the document has never existed to match CouchDB 3.x + % behavior ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc)). -- cgit v1.2.1 From 3acb20dcba3991fb6ea145041555b37c567fcbb3 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 18 Aug 2020 14:58:47 -0500 Subject: Implement ebtree:insert_multi/3 This allows for batch insertion of keys in order to minimize node serialization and collation costs. --- src/ebtree/src/ebtree.erl | 164 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 536d3b139..84e3183cf 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -18,6 +18,7 @@ min/0, max/0, insert/4, + insert_multi/3, delete/3, lookup/3, range/6, @@ -607,6 +608,155 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> reduce_node(Tree, Node2). +%% @doc Inserts or updates multiple values in the ebtree +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param KeyValues A list of two-tuples representing the key/values to insert +%% @returns the tree. +-spec insert_multi(Db :: term(), Tree :: #tree{}, KeyValues :: [{term(), term()}]) -> #tree{}. +insert_multi(_Db, #tree{} = Tree, []) -> + Tree; + +insert_multi(Db, #tree{} = Tree, KeyValues) when is_list(KeyValues) -> + % Sort our KeyValues so that we can insert in order + SortedKeyValues = usort_members(Tree, 0, KeyValues), + erlfdb:transactional(Db, fun(Tx) -> + Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), + Members = insert_multi(Tx, Tree, Root0, SortedKeyValues), + Root1 = grow_tree(Tx, Tree, Root0#node{members = Members}), + set_node(Tx, Tree, Root1) + end), + Tree. + + +insert_multi(Tx, #tree{} = Tree, #node{level = L} = Node, KeyValues) when L > 0 -> + ChildKVsPairs = assign_kvs(Tree, Node#node.members, KeyValues), + NewMembers = lists:flatmap(fun({{_F, _L, P, _R} = Child, KVs}) -> + case KVs of + [] -> + [Child]; + _ -> + ChildNode = get_node(Tx, Tree, P), + insert_multi(Tx, Tree, ChildNode, KVs) + end + end, ChildKVsPairs), + split_node_multi(Tx, Tree, Node#node{members = NewMembers}); + +insert_multi(Tx, #tree{} = Tree, #node{level = 0} = Node, KeyValues) -> + NewMembers = umerge_members(Tree, 0, KeyValues, Node#node.members), + split_node_multi(Tx, Tree, Node#node{members = NewMembers}). + + +assign_kvs(_Tree, [Child], KeyValues) -> + [{Child, KeyValues}]; + +assign_kvs(Tree, [{_F, L, _P, _R} = Child | RestChildren], KeyValues) -> + {KVsInChild, RestKVs} = lists:splitwith(fun({Key, _}) -> + collate(Tree, Key, L, [lt, eq]) + end, KeyValues), + [{Child, KVsInChild} | assign_kvs(Tree, RestChildren, RestKVs)]. + + +split_node_multi(Tx, Tree, Node) -> + NumMembers = length(Node#node.members), + % Not =< so that we don't leave full nodes + % in the tree after update. + case NumMembers < Tree#tree.max of + true when Node#node.id == ?NODE_ROOT_ID -> + Node#node.members; + true -> + set_node(Tx, Tree, Node), + [to_member(Tree, Node)]; + false -> + clear_node(Tx, Tree, Node), + Nodes0 = create_nodes(Tx, Tree, Node), + Nodes1 = if Node#node.level > 0 -> Nodes0; true -> + Nodes2 = update_next_ptrs(Nodes0), + Nodes3 = update_prev_ptrs(Nodes2), + Nodes4 = set_first_prev_ptr(Tx, Tree, Node#node.prev, Nodes3), + set_last_next_ptr(Tx, Tree, Node#node.next, Nodes4) + end, + set_nodes(Tx, Tree, Nodes1), + [to_member(Tree, N) || N <- Nodes1] + end. + + +grow_tree(_Tx, _Tree, #node{level = 0, members = [{_, _} | _]} = Root) -> + Root; + +grow_tree(Tx, Tree, #node{level = 0, members = [{_, _, _, _} | _]} = Root) -> + grow_tree(Tx, Tree, Root#node{level = 1}); + +grow_tree(Tx, Tree, Root) -> + case length(Root#node.members) < Tree#tree.max of + true -> + Root; + false -> + NewMembers = split_node_multi(Tx, Tree, Root), + NewRoot = Root#node{ + level = Root#node.level + 1, + members = NewMembers + }, + grow_tree(Tx, Tree, NewRoot) + end. + + +to_member(Tree, Node) -> + FirstKey = first_key(Node#node.members), + LastKey = last_key(Node#node.members), + Reds = reduce_node(Tree, Node), + {FirstKey, LastKey, Node#node.id, Reds}. + + +create_nodes(Tx, #tree{} = Tree, Node) -> + case length(Node#node.members) >= Tree#tree.max of + true -> + {Members, Rest} = lists:split(Tree#tree.min, Node#node.members), + NewNode = #node{ + id = new_node_id(Tx, Tree), + level = Node#node.level, + members = Members + }, + [NewNode | create_nodes(Tx, Tree, Node#node{members = Rest})]; + false -> + NewNode = #node{ + id = new_node_id(Tx, Tree), + level = Node#node.level, + members = Node#node.members + }, + [NewNode] + end. + + +update_next_ptrs([_] = Nodes) -> + Nodes; + +update_next_ptrs([N1, N2 | Rest]) -> + [N1#node{next = N2#node.id} | update_next_ptrs([N2 | Rest])]. + + +update_prev_ptrs([_] = Nodes) -> + Nodes; + +update_prev_ptrs([N1, N2 | Rest]) -> + [N1 | update_prev_ptrs([N2#node{prev = N1#node.id} | Rest])]. + + +set_first_prev_ptr(Tx, Tree, Prev, [Node | Rest]) -> + NewNode = Node#node{prev = Prev}, + update_prev_neighbour(Tx, Tree, NewNode), + [NewNode | Rest]. + + +set_last_next_ptr(Tx, Tree, Next, [Node0]) -> + Node1 = Node0#node{next = Next}, + update_next_neighbour(Tx, Tree, Node1), + [Node1]; + +set_last_next_ptr(Tx, Tree, Next, [N | Rest]) -> + [N | set_last_next_ptr(Tx, Tree, Next, Rest)]. + + %% @doc Deletes an entry from the ebtree %% @param Db An erlfdb database or transaction. %% @param Tree The ebtree. @@ -1158,6 +1308,20 @@ lookup_test() -> ?assertEqual(false, lookup(Db, Tree, 101)). +insert_multi_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1, 2, 3>>, 4), + AllKVs = lists:foldl(fun(_Seq, Acc) -> + KVs = [{rand:uniform(), rand:uniform()} || _ <- lists:seq(1, 16)], + insert_multi(Db, Tree, KVs), + KVs ++ Acc + end, [], lists:seq(1, 16)), + lists:foreach(fun({K, V}) -> + ?assertEqual({K, V}, lookup(Db, Tree, K)) + end, AllKVs), + validate_tree(Db, Tree). + + delete_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1,2,3>>, 4), -- cgit v1.2.1 From 3e969fcd4d58ba18ae1a852ea03fc3a2d0bceb90 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 21 Aug 2020 13:29:42 -0500 Subject: Implement ebtree:lookup_multi/3 This allows looking up multiple keys simultaneously which reduces the amount of overhead due to node serialization and collation. --- src/ebtree/src/ebtree.erl | 61 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 84e3183cf..95e361550 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -21,6 +21,7 @@ insert_multi/3, delete/3, lookup/3, + lookup_multi/3, range/6, reverse_range/6, fold/4, @@ -149,6 +150,55 @@ lookup(Db, #tree{} = Tree, Key) -> fold(Db, Tree, Fun, false, []). +%% @doc Lookup a list of keys in the ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @param Keys the list of keys to lookup +%% @returns A list containing key/value tuples for keys that were found +-spec lookup_multi(Db :: term(), Tree :: #tree{}, Key :: [term()]) -> + [{Key :: term(), Value :: term()}]. +lookup_multi(Db, #tree{} = Tree, Keys) -> + FoldFun = fun lookup_multi_fold/2, + Acc = {Tree, sort_keys(Tree, Keys), []}, + {_, _, FoundKeys} = fold(Db, Tree, FoldFun, Acc, []), + FoundKeys. + + +lookup_multi_fold(_, {_, [], _} = Acc) -> + % No more keys to find + {stop, Acc}; + +lookup_multi_fold({visit, Key1, Value}, {Tree, [Key2 | Rest], Acc}) -> + {NewKeys, NewAcc} = case collate(Tree, Key1, Key2) of + lt -> + % Still looking for the next user key + {[Key2 | Rest], Acc}; + eq -> + % Found a requested key + {Rest, [{Key2, Value} | Acc]}; + gt -> + % The user key wasn't found so we drop it + {Rest, Acc} + end, + {ok, {Tree, NewKeys, NewAcc}}; + +lookup_multi_fold({traverse, FKey, LKey, R}, {Tree, [UKey | Rest], Acc}) -> + case collate(Tree, FKey, UKey, [gt]) of + true -> + % We've passed by our first user key + lookup_multi_fold({traverse, FKey, LKey, R}, {Tree, Rest, Acc}); + false -> + case collate(Tree, UKey, LKey, [lt, eq]) of + true -> + % Key might be in this range + {ok, {Tree, [UKey | Rest], Acc}}; + false -> + % Next key is not in range + {skip, {Tree, [UKey | Rest], Acc}} + end + end. + + %% @equiv fold(Db, Tree, Fun, Acc, []) fold(Db, #tree{} = Tree, Fun, Acc) -> fold(Db, Tree, Fun, Acc, []). @@ -1308,6 +1358,17 @@ lookup_test() -> ?assertEqual(false, lookup(Db, Tree, 101)). +lookup_multi_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + validate_tree(Db, Tree), + ?assertEqual([{1, 2}], lookup_multi(Db, Tree, [1])), + ?assertEqual([{15, 16}, {2, 3}], lookup_multi(Db, Tree, [2, 15])), + ?assertEqual([{15, 16}, {4, 5}, {2, 3}], lookup_multi(Db, Tree, [2, 101, 15, 4, -3])). + + insert_multi_test() -> Db = erlfdb_util:get_test_db([empty]), Tree = open(Db, <<1, 2, 3>>, 4), -- cgit v1.2.1 From 02e59140b5158ca1105f09ec24ba799426b88e1e Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 28 Aug 2020 11:33:35 -0500 Subject: Implement caching of immutable ebtree nodes Inner nodes of the B+Tree are now immutable so that they can be cached. --- src/ebtree/src/ebtree.erl | 185 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 142 insertions(+), 43 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 95e361550..680bf1d55 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -49,15 +49,15 @@ collate_fun, reduce_fun, encode_fun, - persist_fun + persist_fun, + cache_fun }). -define(META, 0). -define(META_ORDER, 0). --define(META_NEXT_ID, 1). -define(NODE, 1). --define(NODE_ROOT_ID, 0). +-define(NODE_ROOT_ID, <<0>>). -define(underflow(Tree, Node), Tree#tree.min > length(Node#node.members)). -define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). @@ -87,13 +87,15 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/3), PersistFun = proplists:get_value(persist_fun, Options, fun simple_persist/3), + CacheFun = proplists:get_value(cache_fun, Options, fun cache_noop/2), Tree = #tree{ prefix = Prefix, reduce_fun = ReduceFun, collate_fun = CollateFun, encode_fun = EncodeFun, - persist_fun = PersistFun + persist_fun = PersistFun, + cache_fun = CacheFun }, erlfdb:transactional(Db, fun(Tx) -> @@ -101,7 +103,6 @@ open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Orde not_found -> erlfdb:clear_range_startswith(Tx, Prefix), set_meta(Tx, Tree, ?META_ORDER, Order), - set_meta(Tx, Tree, ?META_NEXT_ID, 1), set_node(Tx, Tree, #node{id = ?NODE_ROOT_ID}), init_order(Tree, Order); ActualOrder when is_integer(ActualOrder) -> @@ -543,7 +544,7 @@ insert(Db, #tree{} = Tree, Key, Value) -> Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), case ?is_full(Tree, Root0) of true -> - OldRoot = Root0#node{id = new_node_id(Tx, Tree)}, + OldRoot = Root0#node{id = new_node_id()}, FirstKey = first_key(OldRoot), LastKey = last_key(OldRoot), Root1 = #node{ @@ -562,8 +563,8 @@ insert(Db, #tree{} = Tree, Key, Value) -> split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> {LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members), - LeftId = new_node_id(Tx, Tree), - RightId = new_node_id(Tx, Tree), + LeftId = new_node_id(), + RightId = new_node_id(), LeftChild = remove_pointers_if_not_leaf(#node{ id = LeftId, @@ -600,9 +601,10 @@ split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> umerge_members(Tree, Parent0#node.level, [{FirstRightKey, LastRightKey, RightId, RightReduction}], lists:keydelete(Child#node.id, 3, Parent0#node.members))) }, + Parent2 = new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1), clear_node(Tx, Tree, Child), - set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), - {Parent1, LeftChild, RightChild}. + set_nodes(Tx, Tree, [LeftChild, RightChild, Parent2]), + {Parent2, LeftChild, RightChild}. update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) -> @@ -626,7 +628,7 @@ insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> members = umerge_members(Tree, 0, [{Key, Value}], Node0#node.members) }, set_node(Tx, Tree, Node0, Node1), - reduce_node(Tree, Node1); + {Node1#node.id, reduce_node(Tree, Node1)}; insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> ChildId0 = find_child_id(Tree, Node0, Key), @@ -646,16 +648,17 @@ insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> {Node0, Child0} end, ChildId1 = Child1#node.id, - NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), + {ChildId2, NewReduction} = insert_nonfull(Tx, Tree, Child1, Key, Value), {CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members), [NewFirstKey, _] = sort_keys(Tree, [Key, CurrentFirstKey]), [_, NewLastKey] = sort_keys(Tree, [Key, CurrentLastKey]), Node2 = Node1#node{ members = lists:keyreplace(ChildId1, 3, Node1#node.members, - {NewFirstKey, NewLastKey, ChildId1, NewReduction}) + {NewFirstKey, NewLastKey, ChildId2, NewReduction}) }, - set_node(Tx, Tree, Node0, Node2), - reduce_node(Tree, Node2). + Node3 = new_node_id_if_cacheable(Tx, Tree, Node0, Node2), + set_node(Tx, Tree, Node0, Node3), + {Node3#node.id, reduce_node(Tree, Node2)}. %% @doc Inserts or updates multiple values in the ebtree @@ -715,8 +718,14 @@ split_node_multi(Tx, Tree, Node) -> true when Node#node.id == ?NODE_ROOT_ID -> Node#node.members; true -> - set_node(Tx, Tree, Node), - [to_member(Tree, Node)]; + NewNode = case node_is_cacheable(Node) of + true -> + Node#node{id = new_node_id()}; + false -> + Node + end, + set_node(Tx, Tree, NewNode), + [to_member(Tree, NewNode)]; false -> clear_node(Tx, Tree, Node), Nodes0 = create_nodes(Tx, Tree, Node), @@ -763,14 +772,14 @@ create_nodes(Tx, #tree{} = Tree, Node) -> true -> {Members, Rest} = lists:split(Tree#tree.min, Node#node.members), NewNode = #node{ - id = new_node_id(Tx, Tree), + id = new_node_id(), level = Node#node.level, members = Members }, [NewNode | create_nodes(Tx, Tree, Node#node{members = Rest})]; false -> NewNode = #node{ - id = new_node_id(Tx, Tree), + id = new_node_id(), level = Node#node.level, members = Node#node.members }, @@ -845,12 +854,12 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> Sibling = get_node(Tx, Tree, SiblingId), NewNodes = case ?at_min(Tree, Sibling) of true -> - Merged = merge(Tx, Tree, Child1, Sibling), + Merged = merge(Tree, Child1, Sibling), update_prev_neighbour(Tx, Tree, Merged), update_next_neighbour(Tx, Tree, Merged), [Merged]; false -> - {Left, Right} = rebalance(Tx, Tree, Child1, Sibling), + {Left, Right} = rebalance(Tree, Child1, Sibling), update_prev_neighbour(Tx, Tree, Left), update_next_neighbour(Tx, Tree, Right), [Left, Right] @@ -866,28 +875,28 @@ delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> end, Members2, NewNodes), Parent1 = Parent0#node{ - %% TODO change id members = Members3 }, - + Parent2 = new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1), clear_nodes(Tx, Tree, [Child0, Sibling]), set_nodes(Tx, Tree, NewNodes), - Parent1; + Parent2; false -> set_node(Tx, Tree, Child0, Child1), {_OldFirstKey, _OldLastKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 3, Parent0#node.members), - Parent0#node{ + Parent1 = Parent0#node{ members = lists:keyreplace(ChildId0, 3, Parent0#node.members, {first_key(Child1), last_key(Child1), Child1#node.id, reduce_node(Tree, Child1)}) - } + }, + new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1) end. -merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> +merge(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> [Left, Right] = sort_nodes(Tree, [Node1, Node2]), #node{ - id = new_node_id(Tx, Tree), + id = new_node_id(), level = Level, prev = Left#node.prev, next = Right#node.next, @@ -895,14 +904,14 @@ merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = N }. -rebalance(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> +rebalance(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> [Left0, Right0] = sort_nodes(Tree, [Node1, Node2]), Members = lists:append(Left0#node.members, Right0#node.members), {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), - Left1Id = new_node_id(Tx, Tree), - Right1Id = new_node_id(Tx, Tree), + Left1Id = new_node_id(), + Right1Id = new_node_id(), Left1 = remove_pointers_if_not_leaf(Left0#node{ id = Left1Id, @@ -974,10 +983,16 @@ meta_key(Prefix, MetaKey) when is_binary(Prefix) -> %% node persistence functions get_node(Tx, #tree{} = Tree, Id) -> - Key = node_key(Tree#tree.prefix, Id), - Value = persist(Tree, Tx, get, Key), - decode_node(Tree, Id, Key, Value). - + case cache(Tree, get, Id) of + undefined -> + Key = node_key(Tree#tree.prefix, Id), + Value = persist(Tree, Tx, get, Key), + Node = decode_node(Tree, Id, Key, Value), + cache(Tree, set, [Id, Node]), + Node; + #node{} = Node -> + Node + end. clear_nodes(Tx, #tree{} = Tree, Nodes) -> lists:foreach(fun(Node) -> @@ -987,6 +1002,7 @@ clear_nodes(Tx, #tree{} = Tree, Nodes) -> clear_node(Tx, #tree{} = Tree, #node{} = Node) -> Key = node_key(Tree#tree.prefix, Node#node.id), + cache(Tree, clear, Node#node.id), persist(Tree, Tx, clear, Key). @@ -1007,10 +1023,11 @@ set_node(Tx, #tree{} = Tree, #node{} = Node) -> validate_node(Tree, Node), Key = node_key(Tree#tree.prefix, Node#node.id), Value = encode_node(Tree, Key, Node), + cache(Tree, set, [Node#node.id, Node]), persist(Tree, Tx, set, [Key, Value]). -node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) -> +node_key(Prefix, Id) when is_binary(Prefix), is_binary(Id) -> erlfdb_tuple:pack({?NODE, Id}, Prefix). @@ -1202,7 +1219,7 @@ collate_raw(A, A) -> %% encoding function encode_erlang(encode, _Key, Value) -> - term_to_binary(Value, [compressed, {minor_version, 2}]); + term_to_binary(Value, [{minor_version, 2}]); encode_erlang(decode, _Key, Value) -> @@ -1225,6 +1242,37 @@ simple_persist(Tx, clear, Key) -> erlfdb:clear(Tx, Key). +%% cache functions + +cache_noop(set, _) -> + ok; +cache_noop(clear, _) -> + ok; +cache_noop(get, _) -> + undefined. + + +cache(#tree{} = Tree, set, [Id, #node{} = Node]) -> + #tree{cache_fun = CacheFun} = Tree, + case node_is_cacheable(Node) of + true -> + CacheFun(set, [Id, Node]); + false -> + ok + end; + +cache(#tree{} = Tree, clear, Id) -> + #tree{cache_fun = CacheFun} = Tree, + CacheFun(clear, Id); + +cache(#tree{} = _Tree, get, ?NODE_ROOT_ID) -> + undefined; + +cache(#tree{} = Tree, get, Id) -> + #tree{cache_fun = CacheFun} = Tree, + CacheFun(get, Id). + + %% private functions init_order(#tree{} = Tree, Order) @@ -1254,10 +1302,30 @@ last_key(Members) when is_list(Members) -> end. -new_node_id(Tx, Tree) -> - NextId = get_meta(Tx, Tree, ?META_NEXT_ID), - set_meta(Tx, Tree, ?META_NEXT_ID, NextId + 1), - NextId. +new_node_id_if_cacheable(Tx, #tree{} = Tree, #node{} = Old, #node{} = New) -> + MembersChanged = Old#node.members /= New#node.members, + NodeIsCacheable = node_is_cacheable(New), + if + MembersChanged andalso NodeIsCacheable -> + clear_node(Tx, Tree, New), + New#node{id = new_node_id()}; + true -> + New + end. + + +node_is_cacheable(#node{id = ?NODE_ROOT_ID}) -> + false; + +node_is_cacheable(#node{level = 0}) -> + false; + +node_is_cacheable(#node{}) -> + true. + + +new_node_id() -> + crypto:strong_rand_bytes(16). %% remove prev/next pointers for nonleaf nodes @@ -1268,11 +1336,24 @@ remove_pointers_if_not_leaf(#node{} = Node) -> Node#node{prev = undefined, next = undefined}. + +print_node(#node{level = 0} = Node) -> + io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~w}~n~n", + [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), + Node#node.members]); + print_node(#node{} = Node) -> - io:format("#node{id = ~w, level = ~w, prev = ~w, next = ~w, members = ~w}~n~n", - [Node#node.id, Node#node.level, Node#node.prev, Node#node.next, Node#node.members]). + io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n", + [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), + [io_lib:format("{~w, ~w, ~s, ~w}, ", [F, L, b64(P), R]) || {F, L, P, R} <- Node#node.members]]). +b64(undefined) -> + undefined; + +b64(Bin) -> + base64:encode(Bin). + %% tests -ifdef(TEST). @@ -1679,4 +1760,22 @@ validate_node_test_() -> ]. +cache_test_() -> + {spawn, [fun() -> + Db = erlfdb_util:get_test_db([empty]), + CacheFun = fun + (set, [Id, Node]) -> + erlang:put(Id, Node); + (clear, Id) -> + erlang:erase(Id); + (get, Id) -> + erlang:get(Id) + end, + Tree = open(Db, <<1,2,3>>, 4, [{cache_fun, CacheFun}]), + [ebtree:insert(Db, Tree, I, I) || I <- lists:seq(1, 16)], + ?assertEqual({1, 1}, ebtree:lookup(Db, Tree, 1)), + NodeCache = [V || {_K, V} <- erlang:get(), is_record(V, node)], + ?assertEqual(3, length(NodeCache)) + end]}. + -endif. -- cgit v1.2.1 From 2dd85afd59d409617b974ca07924c22ec2979c50 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 27 Aug 2020 11:26:33 -0500 Subject: Optimize umerge_members Using lists:umerge/3 adds extra invocations of the collation algorithm because its using `=<` semantics when ebtree collations are capable of producing `lt, eq, gt` results. --- src/ebtree/src/ebtree.erl | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 680bf1d55..ea445eada 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -1161,13 +1161,29 @@ collate(#tree{} = Tree, A, B, Allowed) -> umerge_members(#tree{} = Tree, Level, List1, List2) -> - CollateWrapper = fun + Collate = fun ({K1, _V1}, {K2, _V2}) when Level == 0 -> - collate(Tree, K1, K2, [lt, eq]); + collate(Tree, K1, K2); ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> - collate(Tree, L1, L2, [lt, eq]) + collate(Tree, L1, L2) end, - lists:umerge(CollateWrapper, List1, List2). + umerge_members_int(Collate, List1, List2, []). + + +umerge_members_int(Collate, [], [H2 | T2], [HAcc | _] = Acc) -> + case Collate(H2, HAcc) of + lt -> erlang:error(unsorted_members); + eq -> lists:reverse(Acc, T2); + gt -> lists:reverse(Acc, [H2 | T2]) + end; +umerge_members_int(_Collate, List1, [], Acc) -> + lists:reverse(Acc, List1); +umerge_members_int(Collate, [H1 | T1], [H2 | T2], Acc) -> + case Collate(H1, H2) of + lt -> umerge_members_int(Collate, T1, [H2 | T2], [H1 | Acc]); + eq -> umerge_members_int(Collate, T1, [H2 | T2], [H1 | Acc]); + gt -> umerge_members_int(Collate, [H1 | T1], T2, [H2 | Acc]) + end. sort_keys(#tree{} = Tree, List) -> -- cgit v1.2.1 From b55afbddc0013e3fff6f8ca0713d4f0f35b6eda0 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 3 Sep 2020 12:44:24 -0500 Subject: Disable node validation in production This keeps validation during tests but disables the validation during production to avoid the overhead of collation. --- src/ebtree/src/ebtree.erl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index ea445eada..3cfb82030 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -63,6 +63,12 @@ -define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). -define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). +-ifdef(TEST). +-define(validate_node(Tree, Node), validate_node(Tree, Node)). +-else. +-define(validate_node(Tree, Node), ignore). +-endif. + %% two special 1-bit bitstrings that cannot appear in valid keys. -define(MIN, <<0:1>>). -define(MAX, <<1:1>>). @@ -1020,7 +1026,7 @@ set_node(Tx, #tree{} = Tree, #node{} = _From, #node{} = To) -> set_node(Tx, #tree{} = Tree, #node{} = Node) -> - validate_node(Tree, Node), + ?validate_node(Tree, Node), Key = node_key(Tree#tree.prefix, Node#node.id), Value = encode_node(Tree, Key, Node), cache(Tree, set, [Node#node.id, Node]), -- cgit v1.2.1 From d72a5f5eb2bc0a3e33c7f1916dde0be97ab208f8 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 3 Sep 2020 17:41:01 +0100 Subject: return a clean error if pem_decode fails --- src/jwtf/src/jwtf_keystore.erl | 17 +++++++++++------ src/jwtf/test/jwtf_keystore_tests.erl | 11 +++++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl index c2d80b9cb..5c2b47985 100644 --- a/src/jwtf/src/jwtf_keystore.erl +++ b/src/jwtf/src/jwtf_keystore.erl @@ -141,12 +141,17 @@ get_from_config(Kty, KID) -> pem_decode(PEM) -> BinPEM = re:replace(PEM, "\\\\n", "\n", [global, {return, binary}]), - case public_key:pem_decode(BinPEM) of - [PEMEntry] -> - public_key:pem_entry_decode(PEMEntry); - [] -> - throw({bad_request, <<"Not a valid key">>}) - end. + try + case public_key:pem_decode(BinPEM) of + [PEMEntry] -> + public_key:pem_entry_decode(PEMEntry); + _ -> + throw({bad_request, <<"Not a valid key">>}) + end + catch + error:_ -> + throw({bad_request, <<"Not a valid key">>}) + end. kty(<<"HS", _/binary>>) -> "hmac"; diff --git a/src/jwtf/test/jwtf_keystore_tests.erl b/src/jwtf/test/jwtf_keystore_tests.erl index 9ec943653..acbc002b5 100644 --- a/src/jwtf/test/jwtf_keystore_tests.erl +++ b/src/jwtf/test/jwtf_keystore_tests.erl @@ -17,6 +17,8 @@ -define(HMAC_SECRET, "aGVsbG8="). -define(RSA_SECRET, "-----BEGIN PUBLIC KEY-----\\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAztanwQtIx0sms+x7m1SF\\nh7EHJHkM2biTJ41jR89FsDE2gd3MChpaqxemS5GpNvfFKRvuHa4PUZ3JtRCBG1KM\\n/7EWIVTy1JQDr2mb8couGlQNqz4uXN2vkNQ0XszgjU4Wn6ZpvYxmqPFbmkRe8QSn\\nAy2Wf8jQgjsbez8eaaX0G9S1hgFZUN3KFu7SVmUDQNvWpQdaJPP+ms5Z0CqF7JLa\\nvJmSdsU49nlYw9VH/XmwlUBMye6HgR4ZGCLQS85frqF0xLWvi7CsMdchcIjHudXH\\nQK1AumD/VVZVdi8Q5Qew7F6VXeXqnhbw9n6Px25cCuNuh6u5+E6GUzXRrMpqo9vO\\nqQIDAQAB\\n-----END PUBLIC KEY-----\\n"). +-define(BAD_RSA_SECRET,"-----BEGIN PUBLIC KEY-----\\nMIIDAzCCAeugAwIBAgIJAL5YnwkF5jT6MA0GCSqGSIb3DQEBBQUAMBgxFjAUBgNV\\nBAMMDWZvby5hdXRoMC5jb20wHhcNMTQwMzE4MjAwNzUwWhcNMjcxMTI1MjAwNzUw\\nWjAYMRYwFAYDVQQDDA1mb28uYXV0aDAuY29tMIIBIjANBgkqhkiG9w0BAQEFAAOC\\nAQ8AMIIBCgKCAQEAtP6w43ppU0nkqGNHASojFJl60+k3isNVzYTO06f2vm/5tc3l\\nRhEA6ykyIuO8tHY3Ziqowc4h8XGaeDKqHw/BSS/b54F2rUVb/wACWyJICkM3bGtC\\ntWmM7kU8XZRCqXV04qIgQte+9GFSOax/TFyotS+FGFyFPUY+b57H7/6wNQ8ywGLi\\nWCbrWEx4wOJbGhnVNV+STmZXJgToLgz0R2kwsiGURhHMkNkUjcRl34nSv+lMYSMK\\nyywwzu0k3KBgqkxWibU3pa3jibWVRxc20f8ltfByp/wU/ICQ0MNGJ3/KaCiOtGQa\\noZOa7bMzb4W1x2L3cfgrshLrp978+FEeNzY9KQIDAQABo1AwTjAdBgNVHQ4EFgQU\\nOyDe79RE2SYTcCNPbniw3p4uZykwHwYDVR0jBBgwFoAUOyDe79RE2SYTcCNPbniw\\n3p4uZykwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQUFAAOCAQEAW0mB5wR1sSHC\\n7iSmQo1uioH80X7txJY6zXH8hVjoCQOGUCi79x43L9wUTtyJg44Z8RhNozWOsCZM\\nf5LDSkeNx48QITrinDqWv5C/NA0klJ1g0Y/jN9X01r5T6vGdge8inIbQcO7ZrJ6v\\nVYDH+9HLvfPKFYd0uhYRFnw2aa3mKIRsanlWSEYHQr5Aoa+nboFLRiDtVWBuiAoV\\nZ1NoYm7uheU42CNGJqkv6SXxKHTea2TjmOxKRmaxYMvkjk/CsiPrSEQHUxDXqSSd\\nrIWU8o+9q9Hpdb3UuNJzMjlTzg2/UeHpzMBJAWxUlzTuXMqrrDFF9V/d4zO77Ts/\\n4mRBKB+GsQ==\\n-----END PUBLIC KEY-----\\n"). + -define(EC_SECRET, "-----BEGIN PUBLIC KEY-----\\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEDsr0lz/Dg3luarb+Kua0Wcj9WrfR23os\\nwHzakglb8GhWRDn+oZT0Bt/26sX8uB4/ij9PEOLHPo+IHBtX4ELFFVr5GTzlqcJe\\nyctaTDd1OOAPXYuc67EWtGZ3pDAzztRs\\n-----END PUBLIC KEY-----\\n"). setup() -> @@ -31,7 +33,10 @@ setup() -> config:set("jwt_keys", "hmac:ec", ?EC_SECRET), config:set("jwt_keys", "rsa:ec", ?EC_SECRET), - config:set("jwt_keys", "ec:ec", ?EC_SECRET). + config:set("jwt_keys", "ec:ec", ?EC_SECRET), + + config:set("jwt_keys", "rsa:badrsa", ?BAD_RSA_SECRET). + teardown(_) -> test_util:stop_applications([config, jwtf]). @@ -52,6 +57,8 @@ jwtf_keystore_test_() -> ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"HS256">>, <<"ec">>)), ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"RS256">>, <<"ec">>)), - ?_assertMatch({#'ECPoint'{}, _}, jwtf_keystore:get(<<"ES256">>, <<"ec">>)) + ?_assertMatch({#'ECPoint'{}, _}, jwtf_keystore:get(<<"ES256">>, <<"ec">>)), + + ?_assertThrow({bad_request, <<"Not a valid key">>}, jwtf_keystore:get(<<"RS256">>, <<"badrsa">>)) ] }. -- cgit v1.2.1 From e7822a5390de398ae032a0f632ec3c9a89a10864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bessenyei=20Bal=C3=A1zs=20Don=C3=A1t?= Date: Fri, 4 Sep 2020 14:00:32 +0200 Subject: Make COPY doc return only one "ok" --- src/chttpd/src/chttpd_db.erl | 2 +- src/chttpd/test/eunit/chttpd_db_test.erl | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 5af65937c..b9954603a 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1108,7 +1108,7 @@ db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> send_json(Req, HttpCode, [{"Location", Loc}, {"ETag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewTargetRev)) ++ "\""}], - {[{ok, true}] ++ PartRes}); + {PartRes}); db_doc_req(Req, _Db, _DocId) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST,PUT,COPY"). diff --git a/src/chttpd/test/eunit/chttpd_db_test.erl b/src/chttpd/test/eunit/chttpd_db_test.erl index 204332d7f..d844aa5b6 100644 --- a/src/chttpd/test/eunit/chttpd_db_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_test.erl @@ -73,6 +73,7 @@ all_test_() -> fun should_return_update_seq_when_set_on_all_docs/1, fun should_not_return_update_seq_when_unset_on_all_docs/1, fun should_return_correct_id_on_doc_copy/1, + fun should_return_only_one_ok_on_doc_copy/1, fun should_return_400_for_bad_engine/1, fun should_not_change_db_proper_after_rewriting_shardmap/1, fun should_succeed_on_all_docs_with_queries_keys/1, @@ -269,6 +270,17 @@ should_return_correct_id_on_doc_copy(Url) -> ] end)}. +should_return_only_one_ok_on_doc_copy(Url) -> + {timeout, ?TIMEOUT, ?_test(begin + {ok, _, _, _} = create_doc(Url, "testdoc"), + {_, _, _, ResultBody} = test_request:copy(Url ++ "/testdoc", + [?CONTENT_JSON, ?AUTH, ?DESTHEADER1]), + {ResultJson} = jiffy:decode(ResultBody), + NumOks = length(lists:filter(fun({Key, Value}) -> Key == <<"ok">> end, ResultJson)), + [ + ?assertEqual(1, NumOks) + ] + end)}. attachment_doc() -> {ok, Data} = file:read_file(?FIXTURE_TXT), -- cgit v1.2.1 From 881f52f50e8d5020ffbdb52fbb73de162b7467ca Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 4 Sep 2020 12:38:11 +0100 Subject: Add option to delay responses until the end When set, every response is sent once fully generated on the server side. This increases memory usage on the nodes but simplifies error handling for the client as it eliminates the possibility that the response will be deliberately terminated midway through due to a timeout. The config value can be changed at runtime without impacting any in-flight responses. --- rel/overlay/etc/default.ini | 3 ++ src/chttpd/src/chttpd.erl | 58 +++++++++++++++++---- src/chttpd/test/eunit/chttpd_delayed_test.erl | 73 +++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 9 deletions(-) create mode 100644 src/chttpd/test/eunit/chttpd_delayed_test.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index f3f12ca96..16d568fa9 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -130,6 +130,9 @@ prefer_minimal = Cache-Control, Content-Length, Content-Range, Content-Type, ETa ; _dbs_info in a request max_db_number_for_dbs_info_req = 100 +; set to true to delay the start of a response until the end has been calculated +;buffer_response = false + ; authentication handlers ; authentication_handlers = {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} ; uncomment the next line to enable proxy authentication diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index adde0730f..fb7d61a06 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -52,8 +52,9 @@ req, code, headers, - first_chunk, - resp=nil + chunks, + resp=nil, + buffer_response=false }). start_link() -> @@ -780,40 +781,54 @@ start_json_response(Req, Code, Headers0) -> end_json_response(Resp) -> couch_httpd:end_json_response(Resp). + start_delayed_json_response(Req, Code) -> start_delayed_json_response(Req, Code, []). + start_delayed_json_response(Req, Code, Headers) -> start_delayed_json_response(Req, Code, Headers, ""). + start_delayed_json_response(Req, Code, Headers, FirstChunk) -> {ok, #delayed_resp{ start_fun = fun start_json_response/3, req = Req, code = Code, headers = Headers, - first_chunk = FirstChunk}}. + chunks = [FirstChunk], + buffer_response = buffer_response(Req)}}. + start_delayed_chunked_response(Req, Code, Headers) -> start_delayed_chunked_response(Req, Code, Headers, ""). + start_delayed_chunked_response(Req, Code, Headers, FirstChunk) -> {ok, #delayed_resp{ start_fun = fun start_chunked_response/3, req = Req, code = Code, headers = Headers, - first_chunk = FirstChunk}}. + chunks = [FirstChunk], + buffer_response = buffer_response(Req)}}. + -send_delayed_chunk(#delayed_resp{}=DelayedResp, Chunk) -> +send_delayed_chunk(#delayed_resp{buffer_response=false}=DelayedResp, Chunk) -> {ok, #delayed_resp{resp=Resp}=DelayedResp1} = start_delayed_response(DelayedResp), {ok, Resp} = send_chunk(Resp, Chunk), - {ok, DelayedResp1}. + {ok, DelayedResp1}; + +send_delayed_chunk(#delayed_resp{buffer_response=true}=DelayedResp, Chunk) -> + #delayed_resp{chunks = Chunks} = DelayedResp, + {ok, DelayedResp#delayed_resp{chunks = [Chunk | Chunks]}}. + send_delayed_last_chunk(Req) -> send_delayed_chunk(Req, []). + send_delayed_error(#delayed_resp{req=Req,resp=nil}=DelayedResp, Reason) -> {Code, ErrorStr, ReasonStr} = error_info(Reason), {ok, Resp} = send_error(Req, Code, ErrorStr, ReasonStr), @@ -823,6 +838,7 @@ send_delayed_error(#delayed_resp{resp=Resp, req=Req}, Reason) -> log_error_with_stack_trace(Reason), throw({http_abort, Resp, Reason}). + close_delayed_json_object(Resp, Buffer, Terminator, 0) -> % Use a separate chunk to close the streamed array to maintain strict % compatibility with earlier versions. See COUCHDB-2724 @@ -831,10 +847,22 @@ close_delayed_json_object(Resp, Buffer, Terminator, 0) -> close_delayed_json_object(Resp, Buffer, Terminator, _Threshold) -> send_delayed_chunk(Resp, [Buffer | Terminator]). -end_delayed_json_response(#delayed_resp{}=DelayedResp) -> + +end_delayed_json_response(#delayed_resp{buffer_response=false}=DelayedResp) -> {ok, #delayed_resp{resp=Resp}} = start_delayed_response(DelayedResp), - end_json_response(Resp). + end_json_response(Resp); + +end_delayed_json_response(#delayed_resp{buffer_response=true}=DelayedResp) -> + #delayed_resp{ + req = Req, + code = Code, + headers = Headers, + chunks = Chunks + } = DelayedResp, + {ok, Resp} = start_response_length(Req, Code, Headers, iolist_size(Chunks)), + send(Resp, lists:reverse(Chunks)). + get_delayed_req(#delayed_resp{req=#httpd{mochi_req=MochiReq}}) -> MochiReq; @@ -847,7 +875,7 @@ start_delayed_response(#delayed_resp{resp=nil}=DelayedResp) -> req=Req, code=Code, headers=Headers, - first_chunk=FirstChunk + chunks=[FirstChunk] }=DelayedResp, {ok, Resp} = StartFun(Req, Code, Headers), case FirstChunk of @@ -858,6 +886,18 @@ start_delayed_response(#delayed_resp{resp=nil}=DelayedResp) -> start_delayed_response(#delayed_resp{}=DelayedResp) -> {ok, DelayedResp}. + +buffer_response(Req) -> + case chttpd:qs_value(Req, "buffer_response") of + "false" -> + false; + "true" -> + true; + _ -> + config:get_boolean("chttpd", "buffer_response", false) + end. + + error_info({Error, Reason}) when is_list(Reason) -> error_info({Error, couch_util:to_binary(Reason)}); error_info(bad_request) -> diff --git a/src/chttpd/test/eunit/chttpd_delayed_test.erl b/src/chttpd/test/eunit/chttpd_delayed_test.erl new file mode 100644 index 000000000..64232dcf8 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_delayed_test.erl @@ -0,0 +1,73 @@ +-module(chttpd_delayed_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(USER, "chttpd_view_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). +-define(DDOC, "{\"_id\": \"_design/bar\", \"views\": {\"baz\": + {\"map\": \"function(doc) {emit(doc._id, doc._id);}\"}}}"). + +-define(FIXTURE_TXT, ?ABS_PATH(?FILE)). +-define(i2l(I), integer_to_list(I)). +-define(TIMEOUT, 60). % seconds + +setup() -> + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + ok = config:set("chttpd", "buffer_response", "true"), + TmpDb = ?tempdb(), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), + create_db(Url), + Url. + +teardown(Url) -> + delete_db(Url), + ok = config:delete("admins", ?USER, _Persist=false). + +create_db(Url) -> + {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), + ?assert(Status =:= 201 orelse Status =:= 202). + + +delete_db(Url) -> + {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). + + +all_test_() -> + { + "chttpd delay tests", + { + setup, + fun chttpd_test_util:start_couch/0, fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_buffer_response_all_docs/1, + fun test_buffer_response_changes/1 + ] + } + } + }. + + +test_buffer_response_all_docs(Url) -> + assert_has_content_length(Url ++ "/_all_docs"). + + +test_buffer_response_changes(Url) -> + assert_has_content_length(Url ++ "/_changes"). + + +assert_has_content_length(Url) -> + {timeout, ?TIMEOUT, ?_test(begin + {ok, Code, Headers, _Body} = test_request:get(Url, [?AUTH]), + ?assertEqual(200, Code), + ?assert(lists:keymember("Content-Length", 1, Headers)) + end)}. + \ No newline at end of file -- cgit v1.2.1 From f677f1eeb02a8ccfd6f637d53c00993112aced30 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 4 Sep 2020 12:38:11 +0100 Subject: Add option to delay responses until the end When set, every response is sent once fully generated on the server side. This increases memory usage on the nodes but simplifies error handling for the client as it eliminates the possibility that the response will be deliberately terminated midway through due to a timeout. The config value can be changed at runtime without impacting any in-flight responses. --- rel/overlay/etc/default.ini | 3 ++ src/chttpd/src/chttpd.erl | 58 +++++++++++++++++---- src/chttpd/test/eunit/chttpd_delayed_test.erl | 73 +++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 9 deletions(-) create mode 100644 src/chttpd/test/eunit/chttpd_delayed_test.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 1c37765be..071359a2f 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -145,6 +145,9 @@ prefer_minimal = Cache-Control, Content-Length, Content-Range, Content-Type, ETa ; _dbs_info in a request max_db_number_for_dbs_info_req = 100 +; set to true to delay the start of a response until the end has been calculated +;buffer_response = false + ; authentication handlers ; authentication_handlers = {chttpd_auth, cookie_authentication_handler}, {chttpd_auth, default_authentication_handler} ; uncomment the next line to enable proxy authentication diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index 325369ecd..fdca5c810 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -52,8 +52,9 @@ req, code, headers, - first_chunk, - resp=nil + chunks, + resp=nil, + buffer_response=false }). start_link() -> @@ -800,40 +801,54 @@ start_json_response(Req, Code, Headers0) -> end_json_response(Resp) -> couch_httpd:end_json_response(Resp). + start_delayed_json_response(Req, Code) -> start_delayed_json_response(Req, Code, []). + start_delayed_json_response(Req, Code, Headers) -> start_delayed_json_response(Req, Code, Headers, ""). + start_delayed_json_response(Req, Code, Headers, FirstChunk) -> {ok, #delayed_resp{ start_fun = fun start_json_response/3, req = Req, code = Code, headers = Headers, - first_chunk = FirstChunk}}. + chunks = [FirstChunk], + buffer_response = buffer_response(Req)}}. + start_delayed_chunked_response(Req, Code, Headers) -> start_delayed_chunked_response(Req, Code, Headers, ""). + start_delayed_chunked_response(Req, Code, Headers, FirstChunk) -> {ok, #delayed_resp{ start_fun = fun start_chunked_response/3, req = Req, code = Code, headers = Headers, - first_chunk = FirstChunk}}. + chunks = [FirstChunk], + buffer_response = buffer_response(Req)}}. + -send_delayed_chunk(#delayed_resp{}=DelayedResp, Chunk) -> +send_delayed_chunk(#delayed_resp{buffer_response=false}=DelayedResp, Chunk) -> {ok, #delayed_resp{resp=Resp}=DelayedResp1} = start_delayed_response(DelayedResp), {ok, Resp} = send_chunk(Resp, Chunk), - {ok, DelayedResp1}. + {ok, DelayedResp1}; + +send_delayed_chunk(#delayed_resp{buffer_response=true}=DelayedResp, Chunk) -> + #delayed_resp{chunks = Chunks} = DelayedResp, + {ok, DelayedResp#delayed_resp{chunks = [Chunk | Chunks]}}. + send_delayed_last_chunk(Req) -> send_delayed_chunk(Req, []). + send_delayed_error(#delayed_resp{req=Req,resp=nil}=DelayedResp, Reason) -> {Code, ErrorStr, ReasonStr} = error_info(Reason), {ok, Resp} = send_error(Req, Code, ErrorStr, ReasonStr), @@ -843,6 +858,7 @@ send_delayed_error(#delayed_resp{resp=Resp, req=Req}, Reason) -> log_error_with_stack_trace(Reason), throw({http_abort, Resp, Reason}). + close_delayed_json_object(Resp, Buffer, Terminator, 0) -> % Use a separate chunk to close the streamed array to maintain strict % compatibility with earlier versions. See COUCHDB-2724 @@ -851,10 +867,22 @@ close_delayed_json_object(Resp, Buffer, Terminator, 0) -> close_delayed_json_object(Resp, Buffer, Terminator, _Threshold) -> send_delayed_chunk(Resp, [Buffer | Terminator]). -end_delayed_json_response(#delayed_resp{}=DelayedResp) -> + +end_delayed_json_response(#delayed_resp{buffer_response=false}=DelayedResp) -> {ok, #delayed_resp{resp=Resp}} = start_delayed_response(DelayedResp), - end_json_response(Resp). + end_json_response(Resp); + +end_delayed_json_response(#delayed_resp{buffer_response=true}=DelayedResp) -> + #delayed_resp{ + req = Req, + code = Code, + headers = Headers, + chunks = Chunks + } = DelayedResp, + {ok, Resp} = start_response_length(Req, Code, Headers, iolist_size(Chunks)), + send(Resp, lists:reverse(Chunks)). + get_delayed_req(#delayed_resp{req=#httpd{mochi_req=MochiReq}}) -> MochiReq; @@ -867,7 +895,7 @@ start_delayed_response(#delayed_resp{resp=nil}=DelayedResp) -> req=Req, code=Code, headers=Headers, - first_chunk=FirstChunk + chunks=[FirstChunk] }=DelayedResp, {ok, Resp} = StartFun(Req, Code, Headers), case FirstChunk of @@ -878,6 +906,18 @@ start_delayed_response(#delayed_resp{resp=nil}=DelayedResp) -> start_delayed_response(#delayed_resp{}=DelayedResp) -> {ok, DelayedResp}. + +buffer_response(Req) -> + case chttpd:qs_value(Req, "buffer_response") of + "false" -> + false; + "true" -> + true; + _ -> + config:get_boolean("chttpd", "buffer_response", false) + end. + + error_info({Error, Reason}) when is_list(Reason) -> error_info({Error, couch_util:to_binary(Reason)}); error_info(bad_request) -> diff --git a/src/chttpd/test/eunit/chttpd_delayed_test.erl b/src/chttpd/test/eunit/chttpd_delayed_test.erl new file mode 100644 index 000000000..64232dcf8 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_delayed_test.erl @@ -0,0 +1,73 @@ +-module(chttpd_delayed_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(USER, "chttpd_view_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). +-define(DDOC, "{\"_id\": \"_design/bar\", \"views\": {\"baz\": + {\"map\": \"function(doc) {emit(doc._id, doc._id);}\"}}}"). + +-define(FIXTURE_TXT, ?ABS_PATH(?FILE)). +-define(i2l(I), integer_to_list(I)). +-define(TIMEOUT, 60). % seconds + +setup() -> + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + ok = config:set("chttpd", "buffer_response", "true"), + TmpDb = ?tempdb(), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), + create_db(Url), + Url. + +teardown(Url) -> + delete_db(Url), + ok = config:delete("admins", ?USER, _Persist=false). + +create_db(Url) -> + {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), + ?assert(Status =:= 201 orelse Status =:= 202). + + +delete_db(Url) -> + {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). + + +all_test_() -> + { + "chttpd delay tests", + { + setup, + fun chttpd_test_util:start_couch/0, fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_buffer_response_all_docs/1, + fun test_buffer_response_changes/1 + ] + } + } + }. + + +test_buffer_response_all_docs(Url) -> + assert_has_content_length(Url ++ "/_all_docs"). + + +test_buffer_response_changes(Url) -> + assert_has_content_length(Url ++ "/_changes"). + + +assert_has_content_length(Url) -> + {timeout, ?TIMEOUT, ?_test(begin + {ok, Code, Headers, _Body} = test_request:get(Url, [?AUTH]), + ?assertEqual(200, Code), + ?assert(lists:keymember("Content-Length", 1, Headers)) + end)}. + \ No newline at end of file -- cgit v1.2.1 From b82a5757119083177e0c8af9919a5ff4265ddc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bessenyei=20Bal=C3=A1zs=20Don=C3=A1t?= Date: Fri, 4 Sep 2020 14:00:32 +0200 Subject: Make COPY doc return only one "ok" --- src/chttpd/src/chttpd_db.erl | 2 +- src/chttpd/test/eunit/chttpd_db_test.erl | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index c458cba12..ec4a1a40f 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -1271,7 +1271,7 @@ db_doc_req(#httpd{method='COPY'}=Req, Db, SourceDocId) -> send_json(Req, HttpCode, [{"Location", Loc}, {"ETag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewTargetRev)) ++ "\""}], - {[{ok, true}] ++ PartRes}); + {PartRes}); db_doc_req(Req, _Db, _DocId) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST,PUT,COPY"). diff --git a/src/chttpd/test/eunit/chttpd_db_test.erl b/src/chttpd/test/eunit/chttpd_db_test.erl index b1d514a4f..cebc3b6d6 100644 --- a/src/chttpd/test/eunit/chttpd_db_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_test.erl @@ -74,6 +74,7 @@ all_test_() -> fun should_not_return_update_seq_when_unset_on_all_docs/1, fun should_return_correct_id_on_doc_copy/1, fun should_ignore_engine_parameter/1, + fun should_return_only_one_ok_on_doc_copy/1, fun should_succeed_on_all_docs_with_queries_keys/1, fun should_succeed_on_all_docs_with_queries_limit_skip/1, fun should_succeed_on_all_docs_with_multiple_queries/1, @@ -268,6 +269,17 @@ should_return_correct_id_on_doc_copy(Url) -> ] end)}. +should_return_only_one_ok_on_doc_copy(Url) -> + {timeout, ?TIMEOUT, ?_test(begin + {ok, _, _, _} = create_doc(Url, "testdoc"), + {_, _, _, ResultBody} = test_request:copy(Url ++ "/testdoc", + [?CONTENT_JSON, ?AUTH, ?DESTHEADER1]), + {ResultJson} = jiffy:decode(ResultBody), + NumOks = length(lists:filter(fun({Key, Value}) -> Key == <<"ok">> end, ResultJson)), + [ + ?assertEqual(1, NumOks) + ] + end)}. attachment_doc() -> {ok, Data} = file:read_file(?FIXTURE_TXT), -- cgit v1.2.1 From e4d577be01bfad2f8e4cf4047efd18391e2c8a31 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 9 Sep 2020 13:21:17 -0400 Subject: Handle malformed URLs when stripping URL creds in couch_replicator Previously there was an error thrown which prevented emitting _scheduler/docs responses. Instead of throwing an error, return `null` if the URL cannot be parsed. --- src/couch_replicator/src/couch_replicator.erl | 28 +++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator.erl b/src/couch_replicator/src/couch_replicator.erl index b38f31b59..b169dccb1 100644 --- a/src/couch_replicator/src/couch_replicator.erl +++ b/src/couch_replicator/src/couch_replicator.erl @@ -141,7 +141,11 @@ strip_url_creds(Endpoint) -> iolist_to_binary(couch_util:url_strip_password(Url)) catch throw:{error, local_endpoints_not_supported} -> - Endpoint + Endpoint; + error:_ -> + % Avoid exposing any part of the URL in case there is a password in + % the malformed endpoint URL + null end. @@ -356,7 +360,8 @@ strip_url_creds_test_() -> [ t_strip_http_basic_creds(), t_strip_http_props_creds(), - t_strip_local_db_creds() + t_strip_local_db_creds(), + t_strip_url_creds_errors() ] }. @@ -389,4 +394,23 @@ t_strip_http_props_creds() -> ?assertEqual(<<"http://host/db/">>, strip_url_creds(Props2)) end). + +t_strip_url_creds_errors() -> + ?_test(begin + Bad1 = {[{<<"url">>, <<"http://adm:pass/bad">>}]}, + ?assertEqual(null, strip_url_creds(Bad1)), + Bad2 = {[{<<"garbage">>, <<"more garbage">>}]}, + ?assertEqual(null, strip_url_creds(Bad2)), + Bad3 = <<"http://a:b:c">>, + ?assertEqual(null, strip_url_creds(Bad3)), + Bad4 = <<"http://adm:pass:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad4)), + ?assertEqual(null, strip_url_creds(null)), + ?assertEqual(null, strip_url_creds(42)), + ?assertEqual(null, strip_url_creds([<<"a">>, <<"b">>])), + Bad5 = {[{<<"source_proxy">>, <<"http://adm:pass/bad">>}]}, + ?assertEqual(null, strip_url_creds(Bad5)) + end). + + -endif. -- cgit v1.2.1 From a819e45f8a841f7fe0b5ef583a95e7de9c04b172 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 9 Sep 2020 15:33:24 -0400 Subject: Add node and pid to indexer active tasks output --- src/couch_views/src/couch_views_util.erl | 4 +++- src/couch_views/test/couch_views_active_tasks_test.erl | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl index 11bba75bd..6298acf33 100644 --- a/src/couch_views/src/couch_views_util.erl +++ b/src/couch_views/src/couch_views_util.erl @@ -286,7 +286,9 @@ active_tasks_info(ChangesDone, DbName, DDocId, LastSeq, DBSeq) -> <<"changes_done">> => ChangesDone, <<"design_document">> => DDocId, <<"current_version_stamp">> => convert_seq_to_stamp(LastSeq), - <<"db_version_stamp">> => convert_seq_to_stamp(DBSeq) + <<"db_version_stamp">> => convert_seq_to_stamp(DBSeq), + <<"node">> => erlang:atom_to_binary(node(), utf8), + <<"pid">> => list_to_binary(pid_to_list(self())) }. diff --git a/src/couch_views/test/couch_views_active_tasks_test.erl b/src/couch_views/test/couch_views_active_tasks_test.erl index f87e01055..c782ffcbd 100644 --- a/src/couch_views/test/couch_views_active_tasks_test.erl +++ b/src/couch_views/test/couch_views_active_tasks_test.erl @@ -81,6 +81,19 @@ verify_basic_active_tasks({Db, DDoc}) -> {IndexerPid, {changes_done, ChangesDone}} = wait_to_reach_changes(10000), [ActiveTask] = fabric2_active_tasks:get_active_tasks(), ChangesDone1 = maps:get(<<"changes_done">>, ActiveTask), + Type = maps:get(<<"type">>, ActiveTask), + DbName = maps:get(<<"database">>, ActiveTask), + DDocId = maps:get(<<"design_document">>, ActiveTask), + Node = maps:get(<<"node">>, ActiveTask), + PidBin = maps:get(<<"pid">>, ActiveTask), + Pid = erlang:list_to_pid(binary_to_list(PidBin)), + ?assertEqual(<<"indexer">>, Type), + ?assertEqual(fabric2_db:name(Db), DbName), + ?assertEqual(?INDEX_FOO, DDocId), + ?assertEqual(atom_to_binary(node(), utf8), Node), + ?assert(is_pid(Pid)), + ?assert(is_process_alive(Pid)), + ?assertEqual(IndexerPid, Pid), IndexerPid ! continue, % we assume the indexer has run for a bit so it has to > 0 ?assert(ChangesDone1 > 0), -- cgit v1.2.1 From 6cedf870b9e1580007f7c20a0359d4be3d5abc3b Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 8 Sep 2020 20:14:18 -0400 Subject: Update all the type monitors after setting any couch jobs type timeout This mostly helps with flaky tests where some jobs might complete before the type monitor discovers this particular type, so opt to always re-scan and start notification monitors when any type timeout is set. --- src/couch_jobs/src/couch_jobs.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index f6fb62664..f5d6a7b96 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -293,7 +293,8 @@ wait(Subs, State, Timeout) when is_list(Subs), set_type_timeout(Type, Timeout) -> couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> couch_jobs_fdb:set_type_timeout(JTx, Type, Timeout) - end). + end), + ok = couch_jobs_server:force_check_types(). -spec clear_type_timeout(job_type()) -> ok. -- cgit v1.2.1 From 45ddc9350e34c609a0a0b0279d0a70f8a4cbc63b Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Thu, 10 Sep 2020 13:09:10 -0400 Subject: Introduce .asf.yaml file (#3020) --- .asf.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .asf.yaml diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 000000000..a3b51fffa --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,32 @@ +# see https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features#git.asf.yamlfeatures-GitHubsettings + +github: + description: "Seamless multi-master syncing database with an intuitive HTTP/JSON API, designed for reliability" + homepage: https://couchdb.apache.org/ + labels: + - database + - content + - network-server + - http + - cloud + - erlang + - javascript + - couchdb + - big-data + - network-client + features: + issues: true + projects: true + enabled_merge_buttons: + squash: true + rebase: true + merge: false + +notifications: + commits: commits@couchdb.apache.org + issues: notifications@couchdb.apache.org + pullrequests: notifications@couchdb.apache.org + # This would send new/closed PR notifications to dev@ + #pullrequests_status: dev@couchdb.apache.org + # This would send individual PR comments/reviews to notifications@ + #pullrequests_comment: notifications@couchdb.apache.org -- cgit v1.2.1 From 9412526730c39394282d2b18c6cb5d2e7dcbb461 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 10 Sep 2020 14:56:35 -0500 Subject: Fix flaky active tasks test Depending on the computer and ordering of tests couch_rate will sometimes give a budget of 1000 or more. This leads the active tasks test to grab the initial _active_task blob which contains `"changes_done": 0` which fails the test. --- src/couch_views/test/couch_views_active_tasks_test.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/couch_views/test/couch_views_active_tasks_test.erl b/src/couch_views/test/couch_views_active_tasks_test.erl index c782ffcbd..6085d73a8 100644 --- a/src/couch_views/test/couch_views_active_tasks_test.erl +++ b/src/couch_views/test/couch_views_active_tasks_test.erl @@ -47,6 +47,9 @@ foreach_setup() -> Docs = make_docs(?TOTAL_DOCS), fabric2_db:update_docs(Db, [DDoc | Docs]), + meck:new(couch_rate, [passthrough]), + meck:expect(couch_rate, budget, fun(_) -> 100 end), + {Db, DDoc}. -- cgit v1.2.1 From a94e693f32672e4613bce0d80d0b9660f85275ea Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Thu, 10 Sep 2020 13:35:17 -0700 Subject: add remonitor code to DOWN message (#3144) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smoosh monitors the compactor pid to determine when the compaction jobs finishes, and uses this for its idea of concurrency. However, this isn't accurate in the case where the compaction job has to re-spawn to catch up on intervening changes since the same logical compaction job continues with another pid and smoosh is not aware. In such cases, a smoosh channel with concurrency one can start arbitrarily many additional database compaction jobs. To solve this problem, we added a check to see if a compaction PID exists for a db in `start_compact`. But wee need to add another check because this check is only for shard that comes off the queue. So the following can still occur: 1. Enqueue a bunch of stuff into channel with concurrency 1 2. Begin highest priority job, Shard1, in channel 3. Compaction finishes, discovers compaction file is behind main file 4. Smoosh-monitored PID for Shard1 exits, a new one starts to finish the job 5. Smoosh receives the 'DOWN' message, begins the next highest priority job, Shard2 6. Channel concurrency is now 2, not 1 This change adds another check into the 'DOWN' message so that it checks for that specific shard. If the compaction PID exists then it means a new process was spawned and we just monitor that one and add it back to the queue. The length of the queue does not change and therefore we won’t spawn new compaction jobs. --- src/smoosh/src/smoosh_channel.erl | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/smoosh/src/smoosh_channel.erl b/src/smoosh/src/smoosh_channel.erl index d8a8d14a9..2a45c17dc 100644 --- a/src/smoosh/src/smoosh_channel.erl +++ b/src/smoosh/src/smoosh_channel.erl @@ -122,10 +122,9 @@ handle_info({'DOWN', Ref, _, Job, Reason}, State0) -> #state{active=Active0, starting=Starting0} = State, case lists:keytake(Job, 2, Active0) of {value, {Key, _Pid}, Active1} -> - couch_log:warning("exit for compaction of ~p: ~p", [ - smoosh_utils:stringify(Key), Reason]), - {ok, _} = timer:apply_after(5000, smoosh_server, enqueue, [Key]), - {noreply, maybe_start_compaction(State#state{active=Active1})}; + State1 = maybe_remonitor_cpid(State#state{active=Active1}, Key, + Reason), + {noreply, maybe_start_compaction(State1)}; false -> case lists:keytake(Ref, 1, Starting0) of {value, {_, Key}, Starting1} -> @@ -281,8 +280,7 @@ start_compact(State, Db) -> Ref = erlang:monitor(process, DbPid), DbPid ! {'$gen_call', {self(), Ref}, start_compact}, State#state{starting=[{Ref, Key}|State#state.starting]}; - % database is still compacting so we can just monitor the existing - % compaction pid + % Compaction is already running, so monitor existing compaction pid. CPid -> couch_log:notice("Db ~s continuing compaction", [smoosh_utils:stringify(Key)]), @@ -293,6 +291,27 @@ start_compact(State, Db) -> false end. +maybe_remonitor_cpid(State, DbName, Reason) when is_binary(DbName) -> + {ok, Db} = couch_db:open_int(DbName, []), + case couch_db:get_compactor_pid(Db) of + nil -> + couch_log:warning("exit for compaction of ~p: ~p", + [smoosh_utils:stringify(DbName), Reason]), + {ok, _} = timer:apply_after(5000, smoosh_server, enqueue, [DbName]), + State; + CPid -> + couch_log:notice("~s compaction already running. Re-monitor Pid ~p", + [smoosh_utils:stringify(DbName), CPid]), + erlang:monitor(process, CPid), + State#state{active=[{DbName, CPid}|State#state.active]} + end; +% not a database compaction, so ignore the pid check +maybe_remonitor_cpid(State, Key, Reason) -> + couch_log:warning("exit for compaction of ~p: ~p", + [smoosh_utils:stringify(Key), Reason]), + {ok, _} = timer:apply_after(5000, smoosh_server, enqueue, [Key]), + State. + schedule_unpause() -> WaitSecs = list_to_integer(config:get("smoosh", "wait_secs", "30")), erlang:send_after(WaitSecs * 1000, self(), unpause). -- cgit v1.2.1 From 1c6a7386f1204d4af38816034ef9e971b5ff19af Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 10 Sep 2020 23:00:36 +0100 Subject: Fix buffer_response=true (#3145) We need to call StartFun as it might add headers, etc. --- src/chttpd/src/chttpd.erl | 9 +++++++-- src/chttpd/test/eunit/chttpd_delayed_test.erl | 15 +++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index fb7d61a06..3962c8601 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -855,13 +855,18 @@ end_delayed_json_response(#delayed_resp{buffer_response=false}=DelayedResp) -> end_delayed_json_response(#delayed_resp{buffer_response=true}=DelayedResp) -> #delayed_resp{ + start_fun = StartFun, req = Req, code = Code, headers = Headers, chunks = Chunks } = DelayedResp, - {ok, Resp} = start_response_length(Req, Code, Headers, iolist_size(Chunks)), - send(Resp, lists:reverse(Chunks)). + {ok, Resp} = StartFun(Req, Code, Headers), + lists:foreach(fun + ([]) -> ok; + (Chunk) -> send_chunk(Resp, Chunk) + end, lists:reverse(Chunks)), + end_json_response(Resp). get_delayed_req(#delayed_resp{req=#httpd{mochi_req=MochiReq}}) -> diff --git a/src/chttpd/test/eunit/chttpd_delayed_test.erl b/src/chttpd/test/eunit/chttpd_delayed_test.erl index 64232dcf8..63e6cb0e5 100644 --- a/src/chttpd/test/eunit/chttpd_delayed_test.erl +++ b/src/chttpd/test/eunit/chttpd_delayed_test.erl @@ -17,7 +17,7 @@ setup() -> Hashed = couch_passwords:hash_admin_password(?PASS), ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), - ok = config:set("chttpd", "buffer_response", "true"), + ok = config:set("chttpd", "buffer_response", "true", _Persist=false), TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), @@ -57,17 +57,16 @@ all_test_() -> test_buffer_response_all_docs(Url) -> - assert_has_content_length(Url ++ "/_all_docs"). + assert_successful_response(Url ++ "/_all_docs"). test_buffer_response_changes(Url) -> - assert_has_content_length(Url ++ "/_changes"). + assert_successful_response(Url ++ "/_changes"). -assert_has_content_length(Url) -> +assert_successful_response(Url) -> {timeout, ?TIMEOUT, ?_test(begin - {ok, Code, Headers, _Body} = test_request:get(Url, [?AUTH]), - ?assertEqual(200, Code), - ?assert(lists:keymember("Content-Length", 1, Headers)) + {ok, Code, _Headers, _Body} = test_request:get(Url, [?AUTH]), + ?assertEqual(200, Code) end)}. - \ No newline at end of file + -- cgit v1.2.1 From ac33e853cef2a6a108aa64269eb196d32b235529 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Wed, 15 Jul 2020 23:12:47 +0200 Subject: =?UTF-8?q?Port=20view=5Fconflicts.js,=20view=5Ferrors.js=20and=20?= =?UTF-8?q?view=5Finclude=5Fdocs.js=20into=20elixir=C2=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/elixir/README.md | 6 +- test/elixir/test/view_conflicts_test.exs | 74 +++++++ test/elixir/test/view_errors_test.exs | 300 ++++++++++++++++++++++++++++ test/elixir/test/view_include_docs_test.exs | 263 ++++++++++++++++++++++++ test/javascript/tests/view_conflicts.js | 1 + test/javascript/tests/view_errors.js | 1 + test/javascript/tests/view_include_docs.js | 1 + 7 files changed, 643 insertions(+), 3 deletions(-) create mode 100644 test/elixir/test/view_conflicts_test.exs create mode 100644 test/elixir/test/view_errors_test.exs create mode 100644 test/elixir/test/view_include_docs_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 38c85a5e8..52ce45a75 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -101,9 +101,9 @@ X means done, - means partially - [X] Port view_collation.js - [X] Port view_collation_raw.js - [X] Port view_compaction.js - - [ ] Port view_conflicts.js - - [ ] Port view_errors.js - - [ ] Port view_include_docs.js + - [X] Port view_conflicts.js + - [X] Port view_errors.js + - [X] Port view_include_docs.js - [X] Port view_multi_key_all_docs.js - [X] Port view_multi_key_design.js - [ ] ~~Port view_multi_key_temp.js~~ diff --git a/test/elixir/test/view_conflicts_test.exs b/test/elixir/test/view_conflicts_test.exs new file mode 100644 index 000000000..9261b1ef0 --- /dev/null +++ b/test/elixir/test/view_conflicts_test.exs @@ -0,0 +1,74 @@ +defmodule ViewConflictsTest do + use CouchTestCase + + @moduletag kind: :single_node + + setup_all do + db_name_a = random_db_name() + db_name_b = random_db_name() + + {:ok, _} = create_db(db_name_a) + {:ok, _} = create_db(db_name_b) + + on_exit(fn -> delete_db(db_name_a) end) + on_exit(fn -> delete_db(db_name_b) end) + {:ok, [db_name_a: db_name_a, db_name_b: db_name_b]} + end + + test "view conflict", context do + db_name_a = context[:db_name_a] + db_name_b = context[:db_name_b] + + create_doc(db_name_a, %{_id: "foo", bar: 42}) + replicate(db_name_a, db_name_b) + + resp = Couch.get("/#{db_name_b}/foo") + + docb = + resp.body + |> Map.put("bar", 43) + + docb = save(db_name_b, docb) + + resp = Couch.get("/#{db_name_a}/foo") + + doca = + resp.body + |> Map.put("bar", 41) + + doca = save(db_name_a, doca) + + replicate(db_name_a, db_name_b) + + resp = Couch.get("/#{db_name_b}/foo", query: [conflicts: true]) + doc = resp.body + assert length(resp.body["_conflicts"]) == 1 + + conflict_rev = Enum.at(resp.body["_conflicts"], 0) + + case doc["bar"] do + 41 -> assert conflict_rev == docb["_rev"] + 43 -> assert conflict_rev == doca["_rev"] + _ -> assert false + end + + map_fun = """ + function(doc) { + if (doc._conflicts) { + emit(doc._id, doc._conflicts); + } + } + """ + + results = query(db_name_b, map_fun) + + rev = + results + |> Map.get("rows") + |> Enum.at(0) + |> Map.get("value") + |> Enum.at(0) + + assert conflict_rev == rev + end +end diff --git a/test/elixir/test/view_errors_test.exs b/test/elixir/test/view_errors_test.exs new file mode 100644 index 000000000..80067ec6c --- /dev/null +++ b/test/elixir/test/view_errors_test.exs @@ -0,0 +1,300 @@ +defmodule ViewErrorsTest do + use CouchTestCase + + @moduletag kind: :single_node + + @document %{integer: 1, string: "1", array: [1, 2, 3]} + + @tag :with_db + test "emit undefined key results as null", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + emit(doc.undef, null); + } + """ + + # emitting a key value that is undefined should result in that row + # being included in the view results as null + results = query(db_name, map_fun) + assert results["total_rows"] == 1 + assert Enum.at(results["rows"], 0)["key"] == :null + end + + @tag :with_db + test "exception in map function", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + doc.undef(); // throws an error + } + """ + + # if a view function throws an exception, its results are not included in + # the view index, but the view does not itself raise an error + results = query(db_name, map_fun) + assert results["total_rows"] == 0 + end + + @tag :with_db + test "emit undefined value results as null", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + map_fun = """ + function(doc) { + emit([doc._id, doc.undef], null); + } + """ + + # if a view function includes an undefined value in the emitted key or + # value, it is treated as null + results = query(db_name, map_fun) + assert results["total_rows"] == 1 + + key = + results["rows"] + |> Enum.at(0) + |> Map.get("key") + |> Enum.at(1) + + assert key == :null + end + + @tag :with_db + test "query view with invalid params", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + body = %{ + language: "javascript", + map: "function(doc){emit(doc.integer)}" + } + + # querying a view with invalid params should give a resonable error message + resp = + Couch.post("/#{db_name}/_all_docs?startkey=foo", + headers: ["Content-Type": "application/json"], + body: body + ) + + assert resp.body["error"] == "bad_request" + + resp = + Couch.post("/#{db_name}/_all_docs", + headers: ["Content-Type": "application/x-www-form-urlencoded"], + body: body + ) + + assert resp.status_code == 415 + end + + @tag :with_db + test "query parse error", context do + db_name = context[:db_name] + + map_fun = """ + function(doc) { + emit(doc.integer, doc.integer); + } + """ + + ddoc_name = create_view(db_name, map_fun) + + resp = Couch.get("/#{db_name}/#{ddoc_name}/_view/view", query: [group: true]) + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + + map_fun = "function() {emit(null, null)}" + ddoc_name = create_view(db_name, map_fun) + + resp = + Couch.get("/#{db_name}/#{ddoc_name}/_view/view", query: [startkey: 2, endkey: 1]) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + assert String.contains?(resp.body["reason"], "No rows can match") + + design_doc = %{ + _id: "_design/test", + language: "javascript", + views: %{ + no_reduce: %{map: "function(doc) {emit(doc._id, null);}"}, + with_reduce: %{ + map: "function (doc) {emit(doc.integer, doc.integer)};", + reduce: "function (keys, values) { return sum(values); };" + } + } + } + + {:ok, _} = create_doc(db_name, design_doc) + + resp = Couch.get("/#{db_name}/_design/test/_view/no_reduce", query: [group: true]) + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + + resp = Couch.get("/#{db_name}/_design/test/_view/no_reduce", query: [group_level: 1]) + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + + resp = Couch.get("/#{db_name}/_design/test/_view/no_reduce", query: [reduce: true]) + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + + resp = Couch.get("/#{db_name}/_design/test/_view/no_reduce", query: [reduce: false]) + assert resp.status_code == 200 + + resp = + Couch.get("/#{db_name}/_design/test/_view/with_reduce", + query: [group: true, reduce: false] + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + + resp = + Couch.get("/#{db_name}/_design/test/_view/with_reduce", + query: [group_level: 1, reduce: false] + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + end + + @tag :with_db + test "infinite loop", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + design_doc3 = %{ + _id: "_design/infinite", + language: "javascript", + views: %{ + infinite_loop: %{ + map: "function(doc) {while(true){emit(doc,doc);}};" + } + } + } + + {:ok, _} = create_doc(db_name, design_doc3) + + resp = Couch.get("/#{db_name}/_design/infinite/_view/infinite_loop") + assert resp.status_code == 500 + # This test has two different races. The first is whether + # the while loop exhausts the JavaScript RAM limits before + # timing. The second is a race between which of two timeouts + # fires first. The first timeout is the couch_os_process + # waiting for data back from couchjs. The second is the + # gen_server call to couch_os_process. + assert resp.body["error"] == "os_process_error" or resp.body["error"] == "timeout" + end + + @tag :with_db + test "error responses for invalid multi-get bodies", context do + db_name = context[:db_name] + + design_doc = %{ + _id: "_design/test", + language: "javascript", + views: %{ + no_reduce: %{map: "function(doc) {emit(doc._id, null);}"}, + with_reduce: %{ + map: "function (doc) {emit(doc.integer, doc.integer)};", + reduce: "function (keys, values) { return sum(values); };" + } + } + } + + {:ok, _} = create_doc(db_name, design_doc) + + resp = + Couch.post("/#{db_name}/_design/test/_view/no_reduce", + body: "[]" + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "bad_request" + assert resp.body["reason"] == "Request body must be a JSON object" + + resp = + Couch.post("/#{db_name}/_design/test/_view/no_reduce", + body: %{keys: 1} + ) + + assert resp.status_code == 400 + assert resp.body["error"] == "bad_request" + assert resp.body["reason"] == "`keys` member must be an array." + end + + @tag :with_db + test "reduce overflow error", context do + db_name = context[:db_name] + {:ok, _} = create_doc(db_name, @document) + + design_doc2 = %{ + _id: "_design/testbig", + language: "javascript", + views: %{ + reduce_too_big: %{ + map: "function (doc) {emit(doc.integer, doc.integer)};", + reduce: + "function (keys, values) { var chars = []; for (var i=0; i < 1000; i++) {chars.push('wazzap');};return chars; };" + } + } + } + + {:ok, _} = create_doc(db_name, design_doc2) + + resp = Couch.get("/#{db_name}/_design/testbig/_view/reduce_too_big") + assert resp.status_code == 200 + # if the reduce grows to fast, throw an overflow error + assert Enum.at(resp.body["rows"], 0)["error"] == "reduce_overflow_error" + end + + @tag :with_db + test "temporary view should give error message", context do + db_name = context[:db_name] + + resp = + Couch.post("/#{db_name}/_temp_view", + headers: ["Content-Type": "application/json"], + body: %{ + language: "javascript", + map: "function(doc){emit(doc.integer)}" + } + ) + + assert resp.status_code == 410 + assert resp.body["error"] == "gone" + assert resp.body["reason"] == "Temporary views are not supported in CouchDB" + end + + defp create_view(db_name, map_fun) do + ddoc_name = "_design/temp_#{now(:ms)}" + + ddoc = %{ + _id: ddoc_name, + language: "javascript", + views: %{ + view: %{map: map_fun} + } + } + + {:ok, _} = create_doc(db_name, ddoc) + ddoc_name + end + + defp now(:ms) do + case elem(:os.type(), 0) do + :win32 -> + div(:erlang.system_time(), 1_000) + + _ -> + div(:erlang.system_time(), 1_000_000) + end + end +end diff --git a/test/elixir/test/view_include_docs_test.exs b/test/elixir/test/view_include_docs_test.exs new file mode 100644 index 000000000..a77753058 --- /dev/null +++ b/test/elixir/test/view_include_docs_test.exs @@ -0,0 +1,263 @@ +defmodule ViewIncludeDocsTest do + use CouchTestCase + + @moduletag kind: :single_node + + @ddoc %{ + _id: "_design/test", + language: "javascript", + views: %{ + all_docs: %{ + map: "function(doc) { emit(doc.integer, doc.string) }" + }, + with_prev: %{ + map: + "function(doc){if(doc.prev) emit(doc._id,{'_rev':doc.prev}); else emit(doc._id,{'_rev':doc._rev});}" + }, + with_id: %{ + map: + "function(doc) {if(doc.link_id) { var value = {'_id':doc.link_id}; if (doc.link_rev) {value._rev = doc.link_rev}; emit(doc._id, value);}};" + }, + summate: %{ + map: + "function (doc) { if (typeof doc.integer === 'number') {emit(doc.integer, doc.integer)};}", + reduce: "function (keys, values) { return sum(values); };" + } + } + } + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + bulk_save(db_name, make_docs(0..99)) + + create_doc(db_name, @ddoc) + + {:ok, [db_name: db_name]} + end + + test "include docs in view", context do + db_name = context[:db_name] + resp = view(db_name, "test/all_docs", %{include_docs: true, limit: 2}) + assert length(resp.body["rows"]) == 2 + row0 = Enum.at(resp.body["rows"], 0) + assert row0["id"] == "0" + assert row0["doc"]["_id"] == "0" + row1 = Enum.at(resp.body["rows"], 1) + assert row1["id"] == "1" + assert row1["doc"]["_id"] == "1" + + resp = view(db_name, "test/all_docs", %{include_docs: true}, [29, 74]) + assert length(resp.body["rows"]) == 2 + row0 = Enum.at(resp.body["rows"], 0) + assert row0["doc"]["_id"] == "29" + row1 = Enum.at(resp.body["rows"], 1) + assert row1["doc"]["integer"] == 74 + end + + test "include docs in all_docs", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_all_docs", + query: [limit: 2, skip: 1, include_docs: true] + ) + + assert length(resp.body["rows"]) == 2 + row0 = Enum.at(resp.body["rows"], 0) + row1 = Enum.at(resp.body["rows"], 1) + assert row0["doc"]["integer"] == 1 + assert row1["doc"]["integer"] == 10 + + resp = + Couch.post("/#{db_name}/_all_docs", + query: [include_docs: true], + headers: ["Content-Type": "application/json"], + body: %{"keys" => ["not_a_doc"]} + ) + + assert length(resp.body["rows"]) == 1 + row0 = Enum.at(resp.body["rows"], 0) + assert not Map.has_key?(row0, "doc") + + resp = + Couch.post("/#{db_name}/_all_docs", + query: [include_docs: true], + headers: ["Content-Type": "application/json"], + body: %{"keys" => ["1", "foo"]} + ) + + assert length(resp.body["rows"]) == 2 + row0 = Enum.at(resp.body["rows"], 0) + row1 = Enum.at(resp.body["rows"], 1) + assert row0["doc"]["integer"] == 1 + assert not Map.has_key?(row1, "doc") + + resp = + Couch.get("/#{db_name}/_all_docs", + query: [limit: 0, include_docs: true] + ) + + assert Enum.empty?(resp.body["rows"]) + end + + test "no reduce support", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", query: [include_docs: true]) + + assert resp.status_code == 400 + assert resp.body["error"] == "query_parse_error" + end + + test "Reduce support when reduce=false", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [reduce: false, include_docs: true] + ) + + assert length(resp.body["rows"]) == 100 + end + + test "Not an error with include_docs=false&reduce=true", context do + db_name = context[:db_name] + + resp = + Couch.get("/#{db_name}/_design/test/_view/summate", + query: [reduce: true, include_docs: false] + ) + + assert length(resp.body["rows"]) == 1 + row0 = Enum.at(resp.body["rows"], 0) + assert row0["value"] == 4950 + end + + @tag :with_db + test "link to another doc from a value", context do + db_name = context[:db_name] + + bulk_save(db_name, make_docs(0..99)) + create_doc(db_name, @ddoc) + + doc_link = %{ + _id: "link-to-10", + link_id: "10" + } + + {:ok, _} = create_doc(db_name, doc_link) + resp = view(db_name, "test/with_id", %{key: ~s("link-to-10")}) + assert length(resp.body["rows"]) == 1 + row0 = Enum.at(resp.body["rows"], 0) + assert row0["key"] == "link-to-10" + assert row0["value"]["_id"] == "10" + + resp = view(db_name, "test/with_id", %{key: ~s("link-to-10"), include_docs: true}) + assert length(resp.body["rows"]) == 1 + row0 = Enum.at(resp.body["rows"], 0) + assert row0["value"]["_id"] == "10" + assert row0["doc"]["_id"] == "10" + end + + @tag :with_db + test "emitted _rev controls things", context do + db_name = context[:db_name] + + bulk_save(db_name, make_docs(0..99)) + create_doc(db_name, @ddoc) + + resp = + Couch.post("/#{db_name}/_all_docs", + query: [include_docs: true], + headers: ["Content-Type": "application/json"], + body: %{"keys" => ["0"]} + ) + + doc_before = Enum.at(resp.body["rows"], 0)["doc"] + + resp = Couch.get("/#{db_name}/0") + assert resp.status_code == 200 + prev = resp.body["_rev"] + + doc_after = + resp.body + |> Map.put("integer", 100) + |> Map.put("prev", prev) + + saved_doc = save(db_name, doc_after) + + resp = Couch.get("/#{db_name}/0") + assert resp.status_code == 200 + doc_after = resp.body + assert doc_after["_rev"] == saved_doc["_rev"] + assert doc_after["_rev"] != doc_after["prev"] + assert doc_after["integer"] == 100 + + resp = view(db_name, "test/with_prev", %{include_docs: true}, ["0"]) + row0 = Enum.at(resp.body["rows"], 0)["doc"] + assert row0["_id"] == "0" + assert row0["_rev"] == doc_before["_rev"] + assert not Map.has_key?(row0, "prev") + assert assert row0["integer"] == 0 + end + + test "COUCHDB-549 - include_docs=true with conflicts=true" do + db_name_a = random_db_name() + db_name_b = random_db_name() + create_db(db_name_a) + create_db(db_name_b) + on_exit(fn -> delete_db(db_name_a) end) + on_exit(fn -> delete_db(db_name_b) end) + + ddoc = %{ + _id: "_design/mydesign", + language: "javascript", + views: %{ + myview: %{ + map: """ + function(doc) { + emit(doc.value, 1); + } + """ + } + } + } + + {:ok, _} = create_doc(db_name_a, ddoc) + + doc1a = %{_id: "foo", value: 1, str: "1"} + {:ok, _} = create_doc(db_name_a, doc1a) + + doc1b = %{_id: "foo", value: 1, str: "666"} + {:ok, _} = create_doc(db_name_b, doc1b) + + doc2 = %{_id: "bar", value: 2, str: "2"} + {:ok, _} = create_doc(db_name_a, doc2) + + replicate(db_name_a, db_name_b) + + resp = Couch.get("/#{db_name_b}/foo", query: [conflicts: true]) + assert resp.status_code == 200 + doc1b = resp.body + assert Map.has_key?(doc1b, "_conflicts") + assert length(doc1b["_conflicts"]) == 1 + conflict_rev = Enum.at(doc1b["_conflicts"], 0) + + resp = Couch.get("/#{db_name_b}/bar", query: [conflicts: true]) + assert resp.status_code == 200 + doc2 = resp.body + assert not Map.has_key?(doc2, "_conflicts") + + resp = view(db_name_b, "mydesign/myview", %{include_docs: true, conflicts: true}) + assert length(resp.body["rows"]) == 2 + row0 = Enum.at(resp.body["rows"], 0)["doc"] + assert length(row0["_conflicts"]) == 1 + assert Enum.at(row0["_conflicts"], 0) == conflict_rev + row1 = Enum.at(resp.body["rows"], 1)["doc"] + assert not Map.has_key?(row1, "_conflicts") + end +end diff --git a/test/javascript/tests/view_conflicts.js b/test/javascript/tests/view_conflicts.js index b1c938c61..b1efa234f 100644 --- a/test/javascript/tests/view_conflicts.js +++ b/test/javascript/tests/view_conflicts.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true couchTests.view_conflicts = function(debug) { var db_name_a = get_random_db_name(); diff --git a/test/javascript/tests/view_errors.js b/test/javascript/tests/view_errors.js index 6b9c75466..477422030 100644 --- a/test/javascript/tests/view_errors.js +++ b/test/javascript/tests/view_errors.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_errors = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); diff --git a/test/javascript/tests/view_include_docs.js b/test/javascript/tests/view_include_docs.js index cefc2cf90..b96227d75 100644 --- a/test/javascript/tests/view_include_docs.js +++ b/test/javascript/tests/view_include_docs.js @@ -10,6 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. +couchTests.elixir = true; couchTests.view_include_docs = function(debug) { var db_name = get_random_db_name(); var db = new CouchDB(db_name, {"X-Couch-Full-Commit":"false"}); -- cgit v1.2.1 From 56e0f9c936af75d4206346214cd836206f4b629a Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Thu, 10 Sep 2020 23:00:36 +0100 Subject: Fix buffer_response=true (#3145) We need to call StartFun as it might add headers, etc. --- src/chttpd/src/chttpd.erl | 9 +++++++-- src/chttpd/test/eunit/chttpd_delayed_test.erl | 15 +++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index fdca5c810..1a9b19bb1 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -875,13 +875,18 @@ end_delayed_json_response(#delayed_resp{buffer_response=false}=DelayedResp) -> end_delayed_json_response(#delayed_resp{buffer_response=true}=DelayedResp) -> #delayed_resp{ + start_fun = StartFun, req = Req, code = Code, headers = Headers, chunks = Chunks } = DelayedResp, - {ok, Resp} = start_response_length(Req, Code, Headers, iolist_size(Chunks)), - send(Resp, lists:reverse(Chunks)). + {ok, Resp} = StartFun(Req, Code, Headers), + lists:foreach(fun + ([]) -> ok; + (Chunk) -> send_chunk(Resp, Chunk) + end, lists:reverse(Chunks)), + end_json_response(Resp). get_delayed_req(#delayed_resp{req=#httpd{mochi_req=MochiReq}}) -> diff --git a/src/chttpd/test/eunit/chttpd_delayed_test.erl b/src/chttpd/test/eunit/chttpd_delayed_test.erl index 64232dcf8..63e6cb0e5 100644 --- a/src/chttpd/test/eunit/chttpd_delayed_test.erl +++ b/src/chttpd/test/eunit/chttpd_delayed_test.erl @@ -17,7 +17,7 @@ setup() -> Hashed = couch_passwords:hash_admin_password(?PASS), ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), - ok = config:set("chttpd", "buffer_response", "true"), + ok = config:set("chttpd", "buffer_response", "true", _Persist=false), TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), @@ -57,17 +57,16 @@ all_test_() -> test_buffer_response_all_docs(Url) -> - assert_has_content_length(Url ++ "/_all_docs"). + assert_successful_response(Url ++ "/_all_docs"). test_buffer_response_changes(Url) -> - assert_has_content_length(Url ++ "/_changes"). + assert_successful_response(Url ++ "/_changes"). -assert_has_content_length(Url) -> +assert_successful_response(Url) -> {timeout, ?TIMEOUT, ?_test(begin - {ok, Code, Headers, _Body} = test_request:get(Url, [?AUTH]), - ?assertEqual(200, Code), - ?assert(lists:keymember("Content-Length", 1, Headers)) + {ok, Code, _Headers, _Body} = test_request:get(Url, [?AUTH]), + ?assertEqual(200, Code) end)}. - \ No newline at end of file + -- cgit v1.2.1 From 168d635fc8516a1fe655cd7088703bafe3110b33 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Mon, 14 Sep 2020 11:12:47 -0700 Subject: fix race condition (#3150) This fixes a94e693f32672e4613bce0d80d0b9660f85275ea because a race condition exisited where the 'DOWN' message could be received before the compactor pid is spawned. Adding a synchronous call to get the compactor pid guarantees that the couch_db_updater process handling of finish_compaction has occurred. --- src/couch/src/couch_db.erl | 9 +++++++++ src/smoosh/src/smoosh_channel.erl | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index e1d726dc9..390a198df 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -37,6 +37,7 @@ get_committed_update_seq/1, get_compacted_seq/1, get_compactor_pid/1, + get_compactor_pid_sync/1, get_db_info/1, get_partition_info/2, get_del_doc_count/1, @@ -572,6 +573,14 @@ get_compacted_seq(#db{}=Db) -> get_compactor_pid(#db{compactor_pid = Pid}) -> Pid. +get_compactor_pid_sync(#db{main_pid=Pid}=Db) -> + case gen_server:call(Pid, compactor_pid, infinity) of + CPid when is_pid(CPid) -> + CPid; + _ -> + nil + end. + get_db_info(Db) -> #db{ name = Name, diff --git a/src/smoosh/src/smoosh_channel.erl b/src/smoosh/src/smoosh_channel.erl index 2a45c17dc..2bc98be9d 100644 --- a/src/smoosh/src/smoosh_channel.erl +++ b/src/smoosh/src/smoosh_channel.erl @@ -293,7 +293,7 @@ start_compact(State, Db) -> maybe_remonitor_cpid(State, DbName, Reason) when is_binary(DbName) -> {ok, Db} = couch_db:open_int(DbName, []), - case couch_db:get_compactor_pid(Db) of + case couch_db:get_compactor_pid_sync(Db) of nil -> couch_log:warning("exit for compaction of ~p: ~p", [smoosh_utils:stringify(DbName), Reason]), -- cgit v1.2.1 From 15ff5802612bfd39f0cc6f4d7f0c3fd993552f7d Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 3 Sep 2020 13:28:19 -0500 Subject: Remove couch_rate This implementation was difficult to understand and had behavior that was too difficult to predict. It would break if view behavior changed in significant ways from what was originally expected. --- .credo.exs | 1 - .gitignore | 1 - Makefile | 2 +- mix.exs | 8 +- mix.lock | 1 - rebar.config.script | 1 - rel/files/eunit.ini | 3 - rel/overlay/etc/default.ini | 5 - src/couch_rate/README.md | 155 -------- src/couch_rate/src/couch_rate.app.src | 24 -- src/couch_rate/src/couch_rate.erl | 318 ----------------- src/couch_rate/src/couch_rate.hrl | 19 - src/couch_rate/src/couch_rate_app.erl | 28 -- src/couch_rate/src/couch_rate_config.erl | 66 ---- src/couch_rate/src/couch_rate_ets.erl | 119 ------- src/couch_rate/src/couch_rate_limiter.erl | 392 --------------------- src/couch_rate/src/couch_rate_pd.erl | 90 ----- src/couch_rate/src/couch_rate_sup.erl | 36 -- .../test/exunit/couch_rate_config_test.exs | 88 ----- .../test/exunit/couch_rate_limiter_test.exs | 350 ------------------ src/couch_rate/test/exunit/test_helper.exs | 14 - src/couch_views/README.md | 42 +-- src/couch_views/src/couch_views.app.src | 3 +- src/couch_views/src/couch_views_indexer.erl | 61 ++-- src/couch_views/test/couch_views_indexer_test.erl | 53 +-- src/couch_views/test/couch_views_server_test.erl | 1 - .../test/couch_views_trace_index_test.erl | 2 +- 27 files changed, 35 insertions(+), 1848 deletions(-) delete mode 100644 src/couch_rate/README.md delete mode 100644 src/couch_rate/src/couch_rate.app.src delete mode 100644 src/couch_rate/src/couch_rate.erl delete mode 100644 src/couch_rate/src/couch_rate.hrl delete mode 100644 src/couch_rate/src/couch_rate_app.erl delete mode 100644 src/couch_rate/src/couch_rate_config.erl delete mode 100644 src/couch_rate/src/couch_rate_ets.erl delete mode 100644 src/couch_rate/src/couch_rate_limiter.erl delete mode 100644 src/couch_rate/src/couch_rate_pd.erl delete mode 100644 src/couch_rate/src/couch_rate_sup.erl delete mode 100644 src/couch_rate/test/exunit/couch_rate_config_test.exs delete mode 100644 src/couch_rate/test/exunit/couch_rate_limiter_test.exs delete mode 100644 src/couch_rate/test/exunit/test_helper.exs diff --git a/.credo.exs b/.credo.exs index 112561b95..bd26f407c 100644 --- a/.credo.exs +++ b/.credo.exs @@ -37,7 +37,6 @@ ~r"/src/metrics", ~r"/src/minerl", ~r"/src/parse_trans", - ~r"/src/stream_data", ~r"/src/ssl_verify_fun", ~r"/test/elixir/deps/" ] diff --git a/.gitignore b/.gitignore index 5c4255245..c84d39e5d 100644 --- a/.gitignore +++ b/.gitignore @@ -78,7 +78,6 @@ src/rebar/ src/recon/ src/smoosh/ src/snappy/ -src/stream_data/ src/ssl_verify_fun/ src/thrift_protocol/ src/triq/ diff --git a/Makefile b/Makefile index 2e3cc8acb..e8d366296 100644 --- a/Makefile +++ b/Makefile @@ -165,7 +165,7 @@ check: all @$(MAKE) emilio make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs - make exunit apps=couch_rate,chttpd + make exunit apps=chttpd make mango-test .PHONY: eunit diff --git a/mix.exs b/mix.exs index 480d426b1..29c81fa49 100644 --- a/mix.exs +++ b/mix.exs @@ -49,14 +49,11 @@ defmodule CouchDBTest.Mixfile do # Run "mix help compile.app" to learn about applications. def application do [ - extra_applications: extra_applications(Mix.env()), + extra_applications: [:logger], applications: [:httpotion] ] end - defp extra_applications(:test), do: [:logger, :stream_data] - defp extra_applications(_), do: [:logger] - # Specifies which paths to compile per environment. defp elixirc_paths(:test), do: ["test/elixir/lib", "test/elixir/test/support"] defp elixirc_paths(:integration), do: ["test/elixir/lib", "test/elixir/test/support"] @@ -71,8 +68,7 @@ defmodule CouchDBTest.Mixfile do {:jiffy, path: Path.expand("src/jiffy", __DIR__)}, {:ibrowse, path: Path.expand("src/ibrowse", __DIR__), override: true, compile: false}, - {:credo, "~> 1.2.0", only: [:dev, :test, :integration], runtime: false}, - {:stream_data, "~> 0.4.3", only: [:dev, :test, :integration], runtime: false} + {:credo, "~> 1.2.0", only: [:dev, :test, :integration], runtime: false} ] end diff --git a/mix.lock b/mix.lock index 7a155c6bb..c03e11f64 100644 --- a/mix.lock +++ b/mix.lock @@ -14,6 +14,5 @@ "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"}, "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm", "17ef63abde837ad30680ea7f857dd9e7ced9476cdd7b0394432af4bfc241b960"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.5", "6eaf7ad16cb568bb01753dbbd7a95ff8b91c7979482b95f38443fe2c8852a79b", [:make, :mix, :rebar3], [], "hexpm", "13104d7897e38ed7f044c4de953a6c28597d1c952075eb2e328bc6d6f2bfc496"}, - "stream_data": {:hex, :stream_data, "0.4.3", "62aafd870caff0849a5057a7ec270fad0eb86889f4d433b937d996de99e3db25", [:mix], [], "hexpm", "7dafd5a801f0bc897f74fcd414651632b77ca367a7ae4568778191fc3bf3a19a"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm", "1d1848c40487cdb0b30e8ed975e34e025860c02e419cb615d255849f3427439d"}, } diff --git a/rebar.config.script b/rebar.config.script index 963d97fb1..f3a975032 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -123,7 +123,6 @@ SubDirs = [ "src/couch_index", "src/couch_mrview", "src/couch_js", - "src/couch_rate", "src/couch_replicator", "src/couch_plugins", "src/couch_pse_tests", diff --git a/rel/files/eunit.ini b/rel/files/eunit.ini index 20277f288..2b73ab307 100644 --- a/rel/files/eunit.ini +++ b/rel/files/eunit.ini @@ -40,6 +40,3 @@ startup_jitter = 0 [fabric] ; disable index auto-updater to avoid interfering with some of the tests index_updater_enabled = false - -[couch_rate.views] -opts = #{budget => 100, target => 500, window => 6000, sensitivity => 200, congested_delay => 1} \ No newline at end of file diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 071359a2f..8f2b25e22 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -727,11 +727,6 @@ compaction = false ; ; all = (#{}) -> true -[couch_rate.views] -limiter = couch_rate_limiter -opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} - - ; Some low-level FDB transaction options. These options will be applied to the ; database handle and inherited by each transaction started with that handle. ; The description of these can be found in fdb_c_option.g.h include file from diff --git a/src/couch_rate/README.md b/src/couch_rate/README.md deleted file mode 100644 index 530da1a99..000000000 --- a/src/couch_rate/README.md +++ /dev/null @@ -1,155 +0,0 @@ -# Description - -The `couch_rate` application implements a generic rate limiter which can -be used to control batch size and delay between batches. It was initially -designed for background index build to find an optimal batch size to utilize -the FDB transaction up to configured `target` parameter. The application -provides an API to plug custom rate limiting logic when need to. - -# Default rate limit logic - -The `couch_rate_limiter` is the rate limit module used by default. -The module tracks average number of reads and writes over specified -time period. It uses average read/write numbers to calculate an -approximate value for read/write ratio. Then the read/write ratio is -used to convert estimated amount of writes into batch size. - -# Configuration - -## API based usage - -In the simplest use case the only mandatory keys `new/3` expects are: -* `budget` - the initial value for estimated batch size -* `target` - the amount in msec which we try to maintain for batch processing time -* `window` - time interval for contention detector -* `sensitivity` - minimal interval within the `window` - -We choose sane default values for the rest of the parameters. - -* `window_size = window div sensitivity + 1` -* `underload_threshold = round(target * 0.95)` -* `overload_threshold = round(target * 1.05)` -* `delay_threshold = round(target * 1.07)` - -Due to the use of `round` in defaults calculation the `target` cannot be less -than `36` msec. Otherwise some of the thresholds become equal which breaks the -algorithm. - -In the case when you need to specify custom parameters, the following keys -are supported: - -* `window_size` - how many batches to consider in contention detector -* `timer` - this is used for testing to fast forward time `fun() -> current_time_in_ms() end` -* `target` - the amount in msec which we try to maintain for batch processing time -* `underload_threshold` - a threshold bellow which we would try to increase the budget -* `overload_threshold` - a threshold above which we would start decreasing the budget -* `delay_threshold` - a threshold above which we would start introducing delays between batches -* `multiplicative_factor` - determines how fast we are going to decrease budget (must be in (0..1) range) -* `regular_delay` - delay between batches when there is no overload -* `congested_delay` - delay between batches when there is an overload -* `initial_budget` - initial value for budget to start with - -## default.ini based usage - -The users of the `couch_rate` application pass the `ConfigId` parameter. -When calling `couch_rate:new` and `couch_rate:create_if_missing`. -The `couch_rate` application uses this information to construct name of the -configuration section to use to get configuration parameters. The configration -section is constructed using `"couch_rate." ++ ConfigId`. -The parameters are encoded using erlang map syntax. -Limitation of the map parser: - -* Keys must be atoms -* Values are either integers or floats -* We only support positive values in the map -* Configuration object cannot use erlang reserved words in keys: - `after`, `and`, `andalso`, `band`, `begin`, `bnot`, `bor`, - `bsl`, `bsr`, `bxor`, `case`, `catch`, `cond`, `div`, `end` - `fun`, `if`, `let`, `not`, `of`, `or`, `orelse`, `receive` - `rem`, `try`, `when`, `xor` - -The auxilary `couch_rate_config` module implements the following API: - -* `couch_rate_config:from_str/1` - parses a string representation of parameters -* `couch_rate_config:to_str/1` - converts parameters to string (used in testing) - -Here is the example of configuration used in `couch_view` application: - -``` -[couch_rate.views] -limiter = couch_rate_limiter -opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} -``` - -In the `couch_view` application it is used as follows: - -``` -Limiter = couch_rate:create_if_missing({DbName, DDocId}, "views"), -``` - -# API - -The application implements two APIs. Both APIs are supported by `couch_rate` -module. The API variants are: - -* explicit state passing -* state store based approach - -The API is chosen baed on the `StoreModule` argument passed to `new/4`. -Currently we support following values for `StoreModule`: - -* `nil` - this value indicates that explicit state passing would be used -* `couch_rate_ets` - ets based global state store (ets tables are owned by app supervisor) -* `couch_rate_pd` - process dicionary based local state store - -The "explicit state passing" style returns a tuple `{Result :: term(), state()}`. -The result is the same as for state store based API. - - -## State store based APIs of `couch_rate` module. - -All functions can return `{error, Reason :: term()}` in case of errors. -This detail is ommited bellow. - -* `create_if_missing(Id :: id(), Module :: module(), Store :: module(), Options :: map()) -> limiter()` - create new rate limiter instance -* `new(Id :: id(), Module :: module(), Store :: module(), Options :: map()) -> limiter()` - create new rate limiter instance -* `budget(limiter()) -> Budget :: integer().` - get batch size -* `delay(limiter()) -> Delay :: timeout().` - return delay in msec between batches -* `wait(limiter()) -> ok` - block the caller for amount of time returned by `delay/1` -* `in(limiter(), Reads :: integer()) -> limiter()` - notify rate limiter on the amount of reads were actually done (could be less than `budget`) -* `success(limiter(), Writes :: integer()) -> limiter()` - how many writes happen -* `failure(limiter()) -> limiter()` - called instead of `success/2` when failure happen -* `is_congestion(limiter()) -> boolean()` - returns `false` when congestion is detected -* `format(limiter()) -> [{Key :: atom(), Value :: term()}]` - return key value list representing important aspects of the limiter state -* `id(limitter()) -> id()` - returns `id()` of the rate limiter -* `module(limiter()) -> module()` - returns callback module implementing rate limiting logic. -* `state(limiter()) -> state()` - returns internal state of rate limiter. -* `store(limiter()) -> module() | nil` - returns store state backend. - -# Testing - -The test suite is written in Elixir. - -## Running all tests - -``` -make couch && ERL_LIBS=`pwd`/src mix test --trace src/couch_rate/test/exunit/ -``` - -## Running specific test suite - -``` -make couch && ERL_LIBS=`pwd`/src mix test --trace src/couch_rate/test/exunit/couch_rate_limiter_test.exs -``` - -## Running specific test using line number - -``` -make couch && ERL_LIBS=`pwd`/src mix test --trace src/couch_rate/test/exunit/couch_rate_limiter_test.exs:10 -``` - -## Running traces with stats output - -``` -make couch && ERL_LIBS=`pwd`/src EXUNIT_DEBUG=true mix test --trace src/couch_rate/test/exunit/couch_rate_limiter_test.exs -``` \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate.app.src b/src/couch_rate/src/couch_rate.app.src deleted file mode 100644 index ed6de81d6..000000000 --- a/src/couch_rate/src/couch_rate.app.src +++ /dev/null @@ -1,24 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - - {application, couch_rate, [ - {description, "Simple rate limiter"}, - {vsn, git}, - {registered, [ - ]}, - {applications, [ - kernel, - stdlib, - syntax_tools - ]}, - {mod, {couch_rate_app, []}} -]}. diff --git a/src/couch_rate/src/couch_rate.erl b/src/couch_rate/src/couch_rate.erl deleted file mode 100644 index 24bbcc2a5..000000000 --- a/src/couch_rate/src/couch_rate.erl +++ /dev/null @@ -1,318 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate). - --include("couch_rate.hrl"). - --export([ - create_if_missing/2, - create_if_missing/3, - create_if_missing/4, - new/2, - new/3, - new/4, - from_map/4, - budget/1, - delay/1, - wait/1, - in/2, - success/2, - failure/1, - is_congestion/1, - min_latency/1, - format/1, - to_map/1, - id/1, - module/1, - state/1, - store/1 -]). - --define(LIMITER, ?MODULE). - --type id() :: term(). --type state() :: term(). --type store() :: module(). - --opaque limiter() :: #?LIMITER{}. - --export_type([ - id/0, - state/0, - limiter/0 -]). - --spec create_if_missing(id(), string()) -> - couch_rate:limiter() | {error, Reason :: term()}. - -create_if_missing(Id, ConfigId) -> - ?MODULE:create_if_missing(Id, ConfigId, couch_rate_ets). - --spec create_if_missing(id(), string(), nil | module()) -> - couch_rate:limiter() | {error, Reason :: term()}. - -create_if_missing(Id, ConfigId, StateStore) -> - {Module, Options} = get_config(ConfigId), - ?MODULE:create_if_missing(Id, Module, StateStore, Options). - --spec create_if_missing(id(), module(), nil | module(), map()) -> - couch_rate:limiter() | {error, Reason :: term()}. - -create_if_missing(Id, Module, nil, Options) -> - #?LIMITER{ - id = Id, - module = Module, - store = nil, - state = Module:new(Id, Options) - }; - -create_if_missing(Id, Module, Store, Options) -> - case Store:create_if_missing(Id, Module:new(Id, Options)) of - {error, _} = Error -> - Error; - State -> - #?LIMITER{ - id = Id, - module = Module, - store = Store, - state = State - } - end. - - --spec new(id(), string()) -> - couch_rate:limiter() | {error, Reason :: term()}. - -new(Id, ConfigId) -> - ?MODULE:new(Id, ConfigId, couch_rate_ets). - --spec new(id(), string(), module()) -> - couch_rate:limiter() | {error, Reason :: term()}. - -new(Id, ConfigId, StateStore) -> - {Module, Options} = get_config(ConfigId), - ?MODULE:new(Id, Module, StateStore, Options). - - --spec new(id(), module(), nil | module(), map()) -> - couch_rate:limiter() | {error, Reason :: term()}. - -new(Id, Module, nil, Options) -> - #?LIMITER{ - id = Id, - module = Module, - store = nil, - state = Module:new(Id, Options) - }; - -new(Id, Module, Store, Options) -> - case Store:new(Id, Module:new(Id, Options)) of - {error, _} = Error -> - Error; - State -> - #?LIMITER{ - id = Id, - module = Module, - store = Store, - state = State - } - end. - - --spec from_map(id(), module(), store(), map()) -> - couch_rate:limiter() - | {error, Reason :: term()}. - -from_map(Id, Module, nil, Map) -> - #?LIMITER{ - id = Id, - module = Module, - store = nil, - state = Module:from_map(Map) - }; - -from_map(Id, Module, Store, Map) -> - case Store:new(Id, Module:from_map(Map)) of - {error, _} = Error -> - Error; - State -> - #?LIMITER{ - id = Id, - module = Module, - store = Store, - state = State - } - end. - - --spec update(limiter(), ( - fun( - (id(), state()) -> - {Result :: term(), state()} - | {error, Reason :: term()} - ) - )) -> - Result :: term() - | {Result :: term(), state()} - | {error, Reason :: term()}. - -update(#?LIMITER{store = nil, id = Id, state = State0} = Limiter, Fun) -> - case Fun(Id, State0) of - {error, _Reason} = Error -> - Error; - {Result, State1} -> - {Result, Limiter#?LIMITER{state = State1}} - end; - -update(#?LIMITER{id = Id, store = Store, state = State}, Fun) -> - Store:update(Id, State, Fun). - - --spec budget(limiter()) -> - Budget :: integer() - | {Budget :: integer(), limiter()} - | {error, term()}. - -budget(#?LIMITER{module = Module} = Limiter) -> - update(Limiter, fun(Id, StateIn) -> - Module:budget(Id, StateIn) - end). - - --spec delay(limiter()) -> - DelayTime :: integer() - | {DelayTime :: integer(), limiter()} - | {error, term()}. - -delay(#?LIMITER{module = Module} = Limiter) -> - update(Limiter, fun(Id, State) -> - Module:delay(Id, State) - end). - - --spec wait(limiter()) -> - ok - | {ok, limiter()} - | {error, term()}. - -wait(#?LIMITER{module = Module} = Limiter) -> - update(Limiter, fun(Id, State) -> - Module:wait(Id, State) - end). - - --spec in(limiter(), integer()) -> - ok - | {ok, limiter()} - | {error, term()}. - -in(#?LIMITER{module = Module} = Limiter, Reads) -> - update(Limiter, fun(Id, State) -> - Module:in(Id, State, Reads) - end). - - --spec success(limiter(), integer()) -> - ok - | limiter() - | {error, term()}. - -success(#?LIMITER{module = Module} = Limiter, Writes) -> - update(Limiter, fun(Id, State) -> - Module:success(Id, State, Writes) - end). - - --spec failure(limiter()) -> - ok - | limiter() - | {error, term()}. - -failure(#?LIMITER{module = Module} = Limiter) -> - update(Limiter, fun(Id, State) -> - Module:failure(Id, State) - end). - - --spec is_congestion(limiter()) -> boolean(). - -is_congestion(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> - Module:is_congestion(Id, State); - -is_congestion(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> - Module:is_congestion(Id, Store:lookup(Id, State)). - - --spec format(limiter()) -> [{Key :: atom(), Value :: term()}]. - -format(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> - Module:format(Id, State); - -format(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> - Module:format(Id, Store:lookup(Id, State)). - - --spec to_map(limiter()) -> map(). - -to_map(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> - Module:to_map(Id, State); - -to_map(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> - Module:to_map(Id, Store:lookup(Id, State)). - --spec min_latency(limiter()) -> pos_integer(). - -min_latency(#?LIMITER{store = nil, module = Module, id = Id, state = State}) -> - Module:min_latency(Id, State); - -min_latency(#?LIMITER{store = Store, module = Module, id = Id, state = State}) -> - Module:to_map(Id, Store:lookup(Id, State)). - - --spec id(limiter()) -> module(). - -id(Limiter) -> - Limiter#?LIMITER.id. - - --spec module(limiter()) -> module(). - -module(Limiter) -> - Limiter#?LIMITER.module. - - --spec state(limiter()) -> state(). - -state(Limiter) -> - Limiter#?LIMITER.state. - --spec store(limiter()) -> module() | nil. - -store(Limiter) -> - Limiter#?LIMITER.store. - - -get_config(ConfigId) -> - ConfigSection = "couch_rate." ++ ConfigId, - ModuleStr = config:get(ConfigSection, "limiter", "couch_rate_limiter"), - Module = list_to_existing_atom(ModuleStr), - case config:get(ConfigSection, "opts", undefined) of - undefined -> - {error, #{missing_key => "opts", in => ConfigSection}}; - OptionsStr -> - Options = couch_rate_config:from_str(OptionsStr), - lists:map(fun(Key) -> - maps:is_key(Key, Options) orelse error(#{missing_key => Key, in => Options}) - end, [budget, target, window, sensitivity]), - {Module, Options} - end. diff --git a/src/couch_rate/src/couch_rate.hrl b/src/couch_rate/src/couch_rate.hrl deleted file mode 100644 index d19f7d8e4..000000000 --- a/src/couch_rate/src/couch_rate.hrl +++ /dev/null @@ -1,19 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --record(couch_rate, - { - id :: couch_rate:id(), - module = couch_rate_limiter :: module(), - store = couch_rate_ets :: module() | nil, - state :: couch_rate:state() - }). diff --git a/src/couch_rate/src/couch_rate_app.erl b/src/couch_rate/src/couch_rate_app.erl deleted file mode 100644 index 2bb1621c3..000000000 --- a/src/couch_rate/src/couch_rate_app.erl +++ /dev/null @@ -1,28 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate_app). - --behaviour(application). - --export([ - start/2, - stop/1 -]). - - -start(_StartType, _StartArgs) -> - couch_rate_sup:start_link(). - - -stop(_State) -> - ok. diff --git a/src/couch_rate/src/couch_rate_config.erl b/src/couch_rate/src/couch_rate_config.erl deleted file mode 100644 index 709fbc3d3..000000000 --- a/src/couch_rate/src/couch_rate_config.erl +++ /dev/null @@ -1,66 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate_config). - -% This parser supports only maps where key is atom and value -% is positive float or positive integer. - --include_lib("syntax_tools/include/merl.hrl"). - --export([ - from_str/1, - to_str/1 -]). - -from_str(String) -> - parse_map(merl:quote(String)). - - -to_str(Map) when is_map(Map) -> - StringArgs = maps:fold(fun(Key, Val, Acc) -> - Acc ++ [atom_to_list(Key) ++ " => " ++ number_to_list(Val)] - end, [], Map), - "#{" ++ string:join(StringArgs, ", ") ++ "}". - - -number_to_list(Int) when is_integer(Int) -> - integer_to_list(Int); - -number_to_list(Float) when is_float(Float) -> - float_to_list(Float). - - -parse_map(MapAST) -> - erl_syntax:type(MapAST) == map_expr - orelse fail("Only #{field => pos_integer() | float()} syntax is supported"), - %% Parsing map manually, since merl does not support maps - lists:foldl(fun(AST, Bindings) -> - NameAST = erl_syntax:map_field_assoc_name(AST), - erl_syntax:type(NameAST) == atom - orelse fail("Only atoms are supported as field names"), - Name = erl_syntax:atom_value(NameAST), - ValueAST = erl_syntax:map_field_assoc_value(AST), - Value = case erl_syntax:type(ValueAST) of - integer -> - erl_syntax:integer_value(ValueAST); - float -> - erl_syntax:float_value(ValueAST); - _ -> - fail("Only pos_integer() or float() alowed as values") - end, - Bindings#{Name => Value} - end, #{}, erl_syntax:map_expr_fields(MapAST)). - - -fail(Msg) -> - throw({error, Msg}). \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate_ets.erl b/src/couch_rate/src/couch_rate_ets.erl deleted file mode 100644 index edd9d965c..000000000 --- a/src/couch_rate/src/couch_rate_ets.erl +++ /dev/null @@ -1,119 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate_ets). - --include("couch_rate.hrl"). - --export([ - create_tables/0, - delete_tables/0, - create_if_missing/2, - new/2, - lookup/2, - update/3 -]). - - --define(SHARDS_N, 16). - --type id() :: term(). --type state() :: term(). --type result() :: term(). --type store_state() :: term(). - - --spec create_if_missing(couch_rate:id(), state()) -> - store_state(). - -create_if_missing(Id, State) -> - Tid = term_to_table(Id), - case ets:lookup(Tid, Id) of - [_ | _] -> ok; - _ -> ets:insert(Tid, {Id, State}) - end, - ok. - - --spec new(couch_rate:id(), state()) -> - store_state() - | {error, term()}. - -new(Id, State) -> - Tid = term_to_table(Id), - case ets:insert_new(Tid, {Id, State}) of - true -> ok; - false -> {error, #{reason => already_exists, id => Id}} - end. - - --spec update(id(), store_state(), fun( - (id(), state()) -> {state(), result()} - )) -> - result() - | {error, term()}. - -update(Id, _StoreState, Fun) -> - Tid = term_to_table(Id), - case ets:lookup(Tid, Id) of - [{Id, State0}] -> - case Fun(Id, State0) of - {Result, State1} -> - ets:insert(Tid, {Id, State1}), - Result; - Error -> - Error - end; - _ -> - {error, #{reason => cannot_find, id => Id}} - end. - - --spec lookup(id(), store_state()) -> - state() - | {error, term()}. - -lookup(Id, _StoreState) -> - Tid = term_to_table(Id), - case ets:lookup(Tid, Id) of - [{Id, State}] -> - State; - _ -> - {error, #{reason => cannot_find, id => Id}} - end. - - -create_tables() -> - Opts = [named_table, public, {read_concurrency, true}], - [ets:new(TableName, Opts) || TableName <- table_names()], - ok. - -delete_tables() -> - [ets:delete(TableName) || TableName <- table_names()], - ok. - - --spec term_to_table(any()) -> atom(). -term_to_table(Term) -> - PHash = erlang:phash2(Term), - table_name(PHash rem ?SHARDS_N). - - --dialyzer({no_return, table_names/0}). - --spec table_names() -> [atom()]. -table_names() -> - [table_name(N) || N <- lists:seq(0, ?SHARDS_N - 1)]. - --spec table_name(non_neg_integer()) -> atom(). -table_name(Id) when is_integer(Id), Id >= 0 andalso Id < ?SHARDS_N -> - list_to_atom(atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id)). \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate_limiter.erl b/src/couch_rate/src/couch_rate_limiter.erl deleted file mode 100644 index 97a630206..000000000 --- a/src/couch_rate/src/couch_rate_limiter.erl +++ /dev/null @@ -1,392 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate_limiter). - -%% This module implements an algorithm to control the consumption rate -%% parameters such as: -%% - batch size -%% - delay between batches -%% The components of the algorithm use -%% - [ascending minima algorithm](http://web.archive.org/web/20120805114719/http://home.tiac.net/~cri/2001/slidingmin.html) -%% - "Welford's method" of calculating average - --export([ - new/2, - from_map/2, - budget/2, - delay/2, - wait/2, - in/3, - success/3, - failure/2, - is_congestion/2, - min_latency/2, - format/2, - to_map/2 -]). - --type msec() :: non_neg_integer(). - --define(STATE, ?MODULE). - -%% This is the number below which the math would not work due to round errors -%% In particular the default values for thresholds would be equal --define(MIN_TARGET, 36). - --define(record_to_keyval(Name, Record), - lists:zip(record_info(fields, Name), - tl(tuple_to_list(Record)))). - --define(map_to_record(RecordName, Map), - element(1, lists:foldl(fun(Field, {Record, Idx}) -> - {setelement(Idx, Record, maps:get(Field, Map, element(Idx, Record))), Idx + 1} - end, {#RecordName{}, 2}, record_info(fields, RecordName)))). - - --define(record_to_map(RecordName, Record), - element(1, lists:foldl(fun(Field, {Map, Idx}) -> - { - maps:put(Field, element(Idx, Record), Map), - Idx + 1 - } - end, {#{}, 2}, record_info(fields, RecordName)))). - --record(?STATE, { - window_size = 0 :: 0 | pos_integer(), - timer = fun now_msec/0, - size = 1 :: pos_integer(), - epoch = 1 :: pos_integer(), - minimums :: queue:queue() | undefined, - start_ts = undefined, - mean_reads = 0.0, - mean_writes = 0.0, - reads = 0, - writes = 0, - target = 4500, - underload_threshold = 4275, %% target * 0.95 - overload_threshold = 4725, %% target * 1.05 - delay_threshold = 4950, %% target * 1.10 - multiplicative_factor = 0.7, - regular_delay = 100 :: timeout(), - congested_delay = 5000 :: timeout(), - initial_budget = 100, - latency = 0, - has_failures = false -}). - --type state() :: #?STATE{}. - - --spec new(couch_rate:id(), Opts :: map()) -> state(). - -new(_Id, #{sensitivity := S}) when S =< 0 -> - error("expected SensitivityTimeWindow > 0"); - -new(_Id, #{target := T}) when T < ?MIN_TARGET -> - error("the target is too small"); - -new(_Id, #{budget := B, target := T, window := W, sensitivity := S} = Opts) -> - WinSize = W div S + 1, - validate_arguments(?map_to_record(?STATE, maps:merge(#{ - minimums => queue:new(), - window_size => WinSize, - initial_budget => B, - underload_threshold => round(T * 0.95), - overload_threshold => round(T * 1.05), - delay_threshold => round(T * 1.07) - }, maps:without([budget, window, sensitivity], Opts)))). - - --spec from_map(couch_rate:id(), map()) -> state(). - -from_map(_Id, Map) -> - ?map_to_record(?STATE, Map). - - --spec budget(couch_rate:id(), state()) -> - {pos_integer(), state()}. - -budget(Id, #?STATE{} = State) -> - #?STATE{ - reads = R, - writes = W, - mean_writes = MW, - mean_reads = MR, - multiplicative_factor = MultiplicativeFactor, - target = Target, - initial_budget = InitialBudget, - latency = Latency - } = State, - case pattern(Id, State) of - optimal -> - {max(1, round(MR)), State}; - failed -> - %% decrease budget - {max(1, round(R * MultiplicativeFactor)), State}; - overloaded -> - %% decrease budget - {max(1, round(R * MultiplicativeFactor)), State}; - underloaded when W == 0 orelse Latency == 0 -> - {max(1, round(MR)), State}; - underloaded -> - ReadWriteRatio = min(1, MR / max(1, MW)), - SingleWrite = Latency / W, - EstimatedWrites = floor(Target / SingleWrite), - {max(1, round(ReadWriteRatio * EstimatedWrites)), State}; - init -> - {InitialBudget, State} - end. - --spec delay(couch_rate:id(), state()) -> - {pos_integer(), state()}. - -delay(Id, #?STATE{} = State) -> - #?STATE{ - regular_delay = RD, - congested_delay = CD - } = State, - case pattern(Id, State) of - failed -> - {CD, State}; - _ -> - {RD, State} - end. - - --spec wait(couch_rate:id(), state()) -> - ok. - -wait(Id, State) -> - {Delay, _} = delay(Id, State), - timer:sleep(Delay). - - --spec in(couch_rate:id(), state(), Reads :: pos_integer()) -> - {ok, state()}. - -in(_Id, #?STATE{timer = TimerFun} = State, Reads) -> - {ok, State#?STATE{ - reads = Reads, - start_ts = TimerFun() - }}. - - --spec success(couch_rate:id(), state(), Writes :: pos_integer()) -> - {ok, state()}. - -success(_Id, #?STATE{start_ts = undefined} = State, _Writes) -> - {ok, State}; - -success(_Id, #?STATE{} = State, Writes) -> - #?STATE{ - start_ts = TS, - timer = TimerFun, - reads = Reads, - mean_reads = MeanReads, - mean_writes = MeanWrites, - window_size = WinSize - } = State, - {ok, update_min(State#?STATE{ - writes = Writes, - mean_writes = average(MeanWrites, WinSize, Writes), - mean_reads = average(MeanReads, WinSize, Reads), - latency = TimerFun() - TS, - has_failures = false - })}. - - --spec failure(couch_rate:id(), state()) -> {ok, state()}. - -failure(_Id, #?STATE{start_ts = undefined} = State) -> - {ok, State}; - -failure(_Id, #?STATE{} = State) -> - #?STATE{ - timer = TimerFun, - start_ts = TS - } = State, - {ok, update_min(State#?STATE{ - writes = 0, - latency = TimerFun() - TS, - has_failures = true - })}. - - --spec is_congestion(couch_rate:id(), state()) -> boolean(). - -is_congestion(Id, #?STATE{} = State) -> - case pattern(Id, State) of - overloaded -> true; - failed -> true; - _ -> false - end. - - --spec format(couch_rate:id(), state()) -> [{Key :: atom(), Value :: term()}]. - -format(_Id, #?STATE{minimums = M} = State) -> - Map = ?record_to_map(?STATE, State), - Minimums = lists:map(fun({D, V}) -> - [{value, V}, {death, D}] - end, queue:to_list(M)), - maps:to_list(maps:merge(Map, #{ - minimums => Minimums - })). - - --spec to_map(couch_rate:id(), state()) -> map(). - -to_map(_Id, #?STATE{} = State) -> - ?record_to_map(?STATE, State). - - --spec update_min(state()) -> state(). - -update_min(#?STATE{latency = ProcessingDelay} = Q0) -> - Q1 = remove_greater_than(Q0, ProcessingDelay), - Q2 = append(Q1, ProcessingDelay), - maybe_remove_first(Q2). - - --spec pattern(couch_rate:id(), state()) -> - init - | underloaded - | overloaded - | optimal - | failed. - -pattern(Id, #?STATE{} = State) -> - #?STATE{ - underload_threshold = UnderloadThreshold, - overload_threshold = OverloadThreshold, - mean_writes = MW, - has_failures = HasFailures - } = State, - case min_latency(Id, State) of - MinRollingLatency when MinRollingLatency > OverloadThreshold -> - overloaded; - MinRollingLatency when MinRollingLatency > UnderloadThreshold -> - optimal; - MinRollingLatency when MinRollingLatency == 0 andalso MW == 0.0 -> - init; - _ when HasFailures -> - failed; - _ -> - underloaded - end. - - --spec min_latency(couch_rate:id(), state()) -> pos_integer() | 0. - -min_latency(_Id, #?STATE{size = 1}) -> - 0; - -min_latency(_Id, #?STATE{minimums = Minimums}) -> - {value, {_, Min}} = head(Minimums), - Min. - - -validate_arguments(#?STATE{timer = TimerFun}) - when not is_function(TimerFun, 0) -> - error("expected `timer` to be an arity 0 function"); - -validate_arguments(#?STATE{window_size = WinSize}) - when WinSize < 1 -> - error("expected `window_size` to be greater than 1"); - -validate_arguments(#?STATE{initial_budget = Budget}) - when Budget < 1 -> - error("expected `initial_budget` to be greater than 1"); - -validate_arguments(#?STATE{overload_threshold = OT, target = T}) - when OT =< T -> - error("expected `overload_threshold` to be greater than `target`"); - -validate_arguments(#?STATE{underload_threshold = UT, target = T}) - when UT >= T -> - error("expected `underload_threshold` to be less than `target`"); - -validate_arguments(#?STATE{delay_threshold = DT, overload_threshold = OT}) - when DT =< OT -> - error("expected `delay_threshold` to be greater than `overload_threshold`"); - -validate_arguments(#?STATE{multiplicative_factor = MF}) - when MF < 0 orelse MF > 1 -> - error("expected `multiplicative_factor` to be in the (0, 1) range"); - -validate_arguments(#?STATE{} = State) -> - State. - - --spec remove_greater_than(state(), pos_integer()) -> state(). - -remove_greater_than(#?STATE{minimums = Minimums, size = S} = State, Value) -> - case tail(Minimums) of - {value, {_, T}} when Value =< T -> - NewState = State#?STATE{minimums = tail_drop(Minimums), size = S - 1}, - remove_greater_than(NewState, Value); - {value, _} -> - State; - empty -> - State#?STATE{epoch = 1} - end. - - --spec append(state(), pos_integer()) -> state(). - -append(#?STATE{minimums = Minimums, epoch = E, window_size = S} = State, Value) -> - Death = E + S, - State#?STATE{ - minimums = tail_put(Minimums, {Death, Value}), - epoch = E + 1, - size = S + 1 - }. - - --spec maybe_remove_first(state()) -> state(). - -maybe_remove_first(#?STATE{minimums = Minimums, epoch = E, size = S} = State) -> - case head(Minimums) of - {value, {E, _V}} -> - State#?STATE{minimums = head_drop(Minimums), size = S - 1}; - _ -> - State - end. - - -% Donald Knuth’s Art of Computer Programming, Vol 2, page 232, 3rd -% Welford method -average(Avg, WindowSize, Value) -> - Delta = Value - Avg, - Avg + Delta / WindowSize. - -%% The helper functions are added because queue module -%% naming conventions are weird -head(Q) -> queue:peek_r(Q). - - -head_drop(Q) -> queue:drop_r(Q). - -tail(Q) -> queue:peek(Q). - - -tail_put(Q, V) -> queue:in_r(V, Q). - - -tail_drop(Q) -> queue:drop(Q). - - --spec now_msec() -> msec(). -now_msec() -> - {Mega, Sec, Micro} = os:timestamp(), - ((Mega * 1000000) + Sec) * 1000 + Micro div 1000. \ No newline at end of file diff --git a/src/couch_rate/src/couch_rate_pd.erl b/src/couch_rate/src/couch_rate_pd.erl deleted file mode 100644 index 5d79f7890..000000000 --- a/src/couch_rate/src/couch_rate_pd.erl +++ /dev/null @@ -1,90 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate_pd). - --include("couch_rate.hrl"). - - --export([ - new/2, - create_if_missing/2, - lookup/2, - update/3 -]). - --type id() :: term(). --type state() :: term(). --type result() :: term(). --type store_state() :: term(). - --define(STATE_KEY, couch_rate_state). - - --spec create_if_missing(couch_rate:id(), state()) -> store_state(). - -create_if_missing(Id, State) -> - case get({?STATE_KEY, Id}) of - undefined -> - put({?STATE_KEY, Id}, State), - ok; - _ -> - ok - end. - - --spec new(couch_rate:id(), state()) -> - store_state() - | {error, term()}. - -new(Id, State) -> - case get({?STATE_KEY, Id}) of - undefined -> - put({?STATE_KEY, Id}, State), - ok; - _ -> - {error, #{reason => already_exists, id => Id}} - end. - - --spec lookup(id(), store_state()) -> - state() - | {error, term()}. - -lookup(Id, _StoreState) -> - case get({?STATE_KEY, Id}) of - undefined -> - {error, #{reason => cannot_find, id => Id}}; - State -> - State - end. - - --spec update(id(), store_state(), fun( - (id(), state()) -> {state(), result()} - )) -> - result() - | {error, term()}. - -update(Id, _StoreState, Fun) -> - case get({?STATE_KEY, Id}) of - undefined -> - {error, #{reason => cannot_find, id => Id}}; - State -> - case Fun(Id, State) of - {Result, State} -> - put({?STATE_KEY, Id}, State), - Result; - Error -> - Error - end - end. diff --git a/src/couch_rate/src/couch_rate_sup.erl b/src/couch_rate/src/couch_rate_sup.erl deleted file mode 100644 index 1ce01b644..000000000 --- a/src/couch_rate/src/couch_rate_sup.erl +++ /dev/null @@ -1,36 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_rate_sup). --behaviour(supervisor). --vsn(1). - --export([ - start_link/0, - init/1 -]). - - -start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). - - -init([]) -> - couch_rate_ets:create_tables(), - Flags = #{ - strategy => one_for_one, - intensity => 5, - period => 10 - }, - Children = [ - ], - {ok, {Flags, Children}}. \ No newline at end of file diff --git a/src/couch_rate/test/exunit/couch_rate_config_test.exs b/src/couch_rate/test/exunit/couch_rate_config_test.exs deleted file mode 100644 index 7db30d272..000000000 --- a/src/couch_rate/test/exunit/couch_rate_config_test.exs +++ /dev/null @@ -1,88 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -defmodule Couch.Rate.Config.Test do - use ExUnit.Case, async: true - use ExUnitProperties - import StreamData - - @erlang_reserved_words MapSet.new([ - "after", - "and", - "andalso", - "band", - "begin", - "bnot", - "bor", - "bsl", - "bsr", - "bxor", - "case", - "catch", - "cond", - "div", - "end", - "fun", - "if", - "let", - "not", - "of", - "or", - "orelse", - "receive", - "rem", - "try", - "when", - "xor" - ]) - - alias :couch_rate_config, as: RLC - - test "parse valid configuration" do - parsed = RLC.from_str(~S(#{foo => 1, bar => 2.0})) - assert %{foo: 1, bar: 2} == parsed - end - - property "roundtrip" do - check all(options <- valid_config()) do - parsed = RLC.from_str(RLC.to_str(options)) - assert options == parsed - end - end - - defp valid_config() do - map_of( - erlang_atom(), - one_of([ - positive_integer(), - # we only support positive float - float(min: 0.0) - ]) - ) - end - - defp erlang_atom() do - bind(string(:alphanumeric), fn str -> - bind(integer(?a..?z), fn char -> - erlang_atom(str, char) - end) - end) - end - - defp erlang_atom(str, char) do - if MapSet.member?(@erlang_reserved_words, <>) do - String.to_atom(<>) - else - String.to_atom(<>) - end - end -end diff --git a/src/couch_rate/test/exunit/couch_rate_limiter_test.exs b/src/couch_rate/test/exunit/couch_rate_limiter_test.exs deleted file mode 100644 index ff70f793a..000000000 --- a/src/couch_rate/test/exunit/couch_rate_limiter_test.exs +++ /dev/null @@ -1,350 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -defmodule Couch.Rate.Limiter.Test do - use ExUnit.Case, async: true - - @transaction_timeout 5_000 - - alias :couch_rate, as: RL - - describe "Stats :" do - @scenario %{rw_ratio: 1 / 1, target: 400, write_time: 100} - test "#{__ENV__.line} : #{inspect(@scenario)} (underloaded)" do - {rate_limiter, measurments} = simulate(@scenario, 1000) - stats = statistics(measurments) - maybe_debug(rate_limiter, measurments, stats) - - assert stats.wait_time.p90 == 100, - "expected no artificial delays for more than 90% of batches" - - budget = stats.budget - - assert floor(budget.p95) in 1..7, - "expected budget to converge into the 1..7 range (got #{budget.p95})" - - reads = stats.mean_reads - - assert floor(reads.p95) in 1..7, - "expected mean_read to converge into the 1..7 range (got #{reads.p95})" - - writes = stats.mean_writes - assert round(writes.p99) in 2..6 - "expected mean_writes to converge into the 2..6 range (got #{writes.p95})" - - assert stats.latency.p95 < @transaction_timeout, - "expected latency for 95% batches under @transaction_timout" - - found_after = initial_search_speed(measurments) - - assert found_after < 5, - "expected to find acceptable budget in less than 5 iterations (got: #{ - found_after - })" - - measurments - |> initial_search() - |> Enum.reduce(101, fn row, prev_budget -> - assert row.budget < prev_budget, - "expected to reduce budget while we fail" - - row.budget - end) - end - - @scenario %{rw_ratio: 1 / 8, target: 3900, write_time: 100} - test "#{__ENV__.line} : #{inspect(@scenario)} (optimal)" do - {rate_limiter, measurments} = simulate(@scenario, 1000) - stats = statistics(measurments) - maybe_debug(rate_limiter, measurments, stats) - - assert stats.wait_time.p90 == 100, - "expected no artificial delays for more than 90% of batches" - - budget = stats.budget - - assert floor(budget.p95) in 4..7, - "expected budget to converge into the 4..7 range (got #{budget.p95})" - - reads = stats.mean_reads - - assert floor(reads.p95) in 4..7, - "expected mean_read to converge into the 4..7 range (got #{reads.p95})" - - writes = stats.mean_writes - assert round(writes.p99) in 39..41 - "expected mean_writes to converge into the 39..41 range (got #{writes.p95})" - - assert stats.latency.p95 < @transaction_timeout, - "expected latency for 95% of batches under @transaction_timout" - - found_after = initial_search_speed(measurments) - - assert found_after < 10, - "expected to find acceptable budget in less than 10 iterations (got: #{ - found_after - })" - - measurments - |> initial_search() - |> Enum.reduce(101, fn row, prev_budget -> - assert row.budget < prev_budget, - "expected to reduce budget while we fail" - - row.budget - end) - end - - @scenario %{rw_ratio: 1 / 20, target: 3900, write_time: 100} - test "#{__ENV__.line} : #{inspect(@scenario)} (overloaded)" do - # This is a worst case scenario due to big variability of wait_time and - # big value read/write ratio - {rate_limiter, measurments} = simulate(@scenario, 1000) - stats = statistics(measurments) - maybe_debug(rate_limiter, measurments, stats) - - assert stats.wait_time.p90 == 100, - "expected no artificial delays for more than 90% of batches" - - budget = stats.budget - assert floor(budget.p95) in 1..4 - "expected budget to converge into the 1..4 range (got #{budget.p95})" - reads = stats.mean_reads - assert floor(reads.p95) in 1..4 - "expected mean_read to converge into the 1..4 range (got #{reads.p95})" - writes = stats.mean_writes - assert round(writes.p99) in 39..41 - "expected mean_writes to converge into the 39..41 range (got #{writes.p95})" - - assert stats.latency.p90 < @transaction_timeout, - "expected latency for 90% of batches under @transaction_timout" - - found_after = initial_search_speed(measurments) - - assert found_after < 16, - "expected to find acceptable budget in less than 16 iterations (got: #{ - found_after - })" - - measurments - |> initial_search() - |> Enum.reduce(101, fn row, prev_budget -> - assert row.budget < prev_budget, - "expected to reduce budget while we fail" - - row.budget - end) - end - end - - defp simulate(scenario, iterations) do - :couch_rate_ets.create_tables() - - limiter = - RL.new(:limiter_id, :couch_rate_limiter, nil, %{ - budget: 100, - target: scenario.target, - # average over 20 last measurments - window: scenario.write_time * 20, - sensitivity: scenario.write_time, - timer: &timer/0 - }) - - result = - Enum.reduce(0..iterations, {limiter, []}, fn _idx, {limiter, stats} -> - {budget, limiter} = step(limiter, scenario.rw_ratio, scenario.write_time) - {limiter, update_measurments(limiter, stats, budget)} - end) - - :couch_rate_ets.delete_tables() - result - end - - defp step(limiter, read_write_ratio, write_time) do - {reads, limiter} = RL.budget(limiter) - writes = round(reads / read_write_ratio) - {delay, limiter} = RL.delay(limiter) - sleep(delay) - data_before = RL.to_map(limiter) - {:ok, limiter} = RL.in(limiter, reads) - data_after = RL.to_map(limiter) - - assert data_after.size <= data_after.window_size + 1, - "The number of elements in minimums container shouldn't grow (got: #{ - data_after.size - })" - - if data_before.writes == 0 and - data_after.writes == 0 and - data_before.reads != 0 do - assert data_before.reads > data_after.reads, - "expected to reduce number of reads while transaction fails" - end - - total_write_time = - 0..writes - |> Enum.reduce_while(0, fn _, acc -> - write_time = :rand.normal(write_time, write_time * 0.25) - - if acc < @transaction_timeout do - {:cont, acc + write_time} - else - {:halt, acc} - end - end) - - sleep(total_write_time) - - if total_write_time < @transaction_timeout do - {:ok, limiter} = RL.success(limiter, writes) - {reads, limiter} - else - {:ok, limiter} = RL.failure(limiter) - {reads, limiter} - end - end - - defp update_measurments(limiter, stats, budget) do - data = RL.to_map(limiter) - {wait_time, _} = RL.delay(limiter) - - stats ++ - [ - %{ - budget: budget, - slack: data.target - data.latency, - rw_ratio: data.mean_reads / max(1, data.mean_writes), - latency: data.latency, - new_budget: budget, - minimum_latency: RL.min_latency(limiter), - wait_time: wait_time, - elements_in_min_queue: data.size, - mean_reads: data.mean_reads, - mean_writes: data.mean_writes, - total_reads: data.reads, - total_writes: data.writes - } - ] - end - - defp timer() do - now = Process.get(:time, 1) - Process.put(:time, now + 1) - now - end - - defp sleep(sleep_time_in_ms) do - now = timer() - Process.put(:time, now + sleep_time_in_ms - 1) - end - - defp format_table([first | _] = rows) do - spec = - first - |> Map.keys() - |> Enum.map(fn h -> {h, String.length(to_str(h))} end) - - header = first |> Map.keys() |> Enum.map(&to_str/1) |> Enum.join(" , ") - - lines = - Enum.map(rows, fn row -> - fields = - Enum.map(spec, fn {field, size} -> - String.pad_trailing("#{to_str(Map.get(row, field))}", size) - end) - - Enum.join(fields, " , ") - end) - - Enum.join([header | lines], "\n") - end - - defp initial_search_speed(measurments) do - length(initial_search(measurments)) - end - - defp initial_search(measurments) do - Enum.reduce_while(measurments, [], fn row, acc -> - if row.total_writes == 0 do - {:cont, acc ++ [row]} - else - {:halt, acc} - end - end) - end - - defp statistics(measurments) do - data = - Enum.reduce(measurments, %{}, fn row, acc -> - Enum.reduce(row, acc, fn {key, value}, acc -> - Map.update(acc, key, [], fn metric -> - metric ++ [value] - end) - end) - end) - - Enum.reduce(data, %{}, fn {key, values}, acc -> - stats = Enum.into(:bear.get_statistics(values), %{}) - {percentile, stats} = Map.pop(stats, :percentile) - - stats = - Enum.reduce(percentile, stats, fn {key, value}, acc -> - Map.put(acc, String.to_atom("p#{to_str(key)}"), value) - end) - - Map.put(acc, key, stats) - end) - end - - defp format_stats(stats) do - rows = - Enum.map(stats, fn {key, values} -> - values - |> Enum.into(%{}) - |> Map.put(:metric, key) - |> Map.delete(:histogram) - end) - - format_table(rows) - end - - defp to_str(int) when is_integer(int) do - "#{int}" - end - - defp to_str(float) when is_float(float) do - "#{Float.to_string(Float.round(float, 2))}" - end - - defp to_str(atom) when is_atom(atom) do - Atom.to_string(atom) - end - - defp to_str(string) when is_binary(string) do - string - end - - defp to_map(rate_limiter) do - RL.to_map(rate_limiter) - end - - defp maybe_debug(rate_limiter, measurments, stats) do - if System.fetch_env("EXUNIT_DEBUG") != :error do - IO.puts("") - IO.puts("rate_limiter: #{inspect(to_map(rate_limiter))}") - IO.puts("measurments: #{inspect(measurments)}") - IO.puts("stats: #{inspect(stats)}") - - IO.puts("\n" <> format_table(measurments) <> "\n" <> format_stats(stats)) - end - end -end diff --git a/src/couch_rate/test/exunit/test_helper.exs b/src/couch_rate/test/exunit/test_helper.exs deleted file mode 100644 index 9b9d6ef94..000000000 --- a/src/couch_rate/test/exunit/test_helper.exs +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -ExUnit.configure(formatters: [JUnitFormatter, ExUnit.CLIFormatter]) -ExUnit.start() diff --git a/src/couch_views/README.md b/src/couch_views/README.md index 5647913f0..09696da82 100644 --- a/src/couch_views/README.md +++ b/src/couch_views/README.md @@ -7,42 +7,12 @@ Currently only map indexes are supported and it will always return the full inde Code layout: * `couch_views` - Main entry point to query a view -* `couch_views_reader` - Reads from the index for queries +* `couch_views_encoding` - Encodes view keys that are byte comparable following CouchDB view sort order. +* `couch_views_fdb` - Maps view operations to FoundationDB logic. +* `couch_views_http` - View specific helpers for chttpd * `couch_views_indexer` - `couch_jobs` worker that builds an index from the changes feed. +* `couch_views_reader` - Reads from the index for queries * `couch_vews_jobs` - `couch_views` interactions with `couch_jobs`. It handles adding index jobs and subscribes to jobs. -* `couch_views_fdb` - Maps view operations to FoundationDB logic. -* `couch_views_encoding` - Encodes view keys that are byte comparable following CouchDB view sort order. * `couch_views_server` - Spawns `couch_views_indexer` workers to handle index update jobs. - -# Configuration - -## Configuring rate limiter - -Here is the example of configuration used in `couch_view` application: - -``` -[couch_rate.views] -limiter = couch_rate_limiter -opts = #{budget => 100, target => 2500, window => 60000, sensitivity => 1000} -``` - -Supported fields in `opts`: - -* `budget` - the initial value for estimated batch size -* `target` - the amount in msec which we try to maintain for batch processing time -* `window` - time interval for contention detector -* `sensitivity` - minimal interval within the `window` - -Unsupported fields in `opts` (if you really know what you are doing): - -* `window_size` - how many batches to consider in contention detector -* `timer` - this is used for testing to fast forward time `fun() -> current_time_in_ms() end` -* `target` - the amount in msec which we try to maintain for batch processing time -* `underload_threshold` - a threshold below which we would try to increase the budget -* `overload_threshold` - a threshold above which we would start decreasing the budget -* `delay_threshold` - a threshold above which we would start introducing delays between batches -* `multiplicative_factor` - determines how fast we are going to decrease budget (must be in (0..1) range) -* `regular_delay` - delay between batches when there is no overload -* `congested_delay` - delay between batches when there is an overload -* `initial_budget` - initial value for budget to start with - +* `couch_views_updater` - Update interactive indexes during doc update transactions +* `couch_views_util` - Various utility functions diff --git a/src/couch_views/src/couch_views.app.src b/src/couch_views/src/couch_views.app.src index cb8285ac2..985c503cd 100644 --- a/src/couch_views/src/couch_views.app.src +++ b/src/couch_views/src/couch_views.app.src @@ -28,7 +28,6 @@ couch_stats, fabric, couch_jobs, - couch_eval, - couch_rate + couch_eval ]} ]}. diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 9c8be6fca..737b6f880 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -78,8 +78,6 @@ init() -> fail_job(Job, Data, sig_changed, "Design document was modified") end, - Limiter = couch_rate:create_if_missing({DbName, DDocId}, "views"), - State = #{ tx_db => undefined, db_uuid => DbUUID, @@ -90,8 +88,8 @@ init() -> job => Job, job_data => Data, count => 0, + limit => num_changes(), changes_done => 0, - limiter => Limiter, doc_acc => [], design_opts => Mrst#mrst.design_opts }, @@ -104,7 +102,6 @@ init() -> error:database_does_not_exist -> fail_job(Job, Data, db_deleted, "Database was deleted"); Error:Reason -> - couch_rate:failure(Limiter), NewRetry = Retries + 1, RetryLimit = retry_limit(), @@ -165,48 +162,27 @@ add_error(Error, Reason, Data) -> update(#{} = Db, Mrst0, State0) -> - Limiter = maps:get(limiter, State0), - case couch_rate:budget(Limiter) of - 0 -> - couch_rate:wait(Limiter), - update(Db, Mrst0, State0); - Limit -> - {Mrst1, State1} = do_update(Db, Mrst0, State0#{limit => Limit, limiter => Limiter}), - case State1 of - finished -> - couch_eval:release_map_context(Mrst1#mrst.qserver); - _ -> - couch_rate:wait(Limiter), - update(Db, Mrst1, State1) - end - end. - - -do_update(Db, Mrst0, State0) -> - fabric2_fdb:transactional(Db, fun(TxDb) -> + {Mrst2, State4} = fabric2_fdb:transactional(Db, fun(TxDb) -> State1 = get_update_start_state(TxDb, Mrst0, State0), {ok, State2} = fold_changes(State1), #{ count := Count, + limit := Limit, doc_acc := DocAcc, last_seq := LastSeq, - limit := Limit, - limiter := Limiter, view_vs := ViewVS, changes_done := ChangesDone0, design_opts := DesignOpts } = State2, + DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), - couch_rate:in(Limiter, Count), {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), - WrittenDocs = write_docs(TxDb, Mrst1, MappedDocs, State2), - - ChangesDone = ChangesDone0 + WrittenDocs, + write_docs(TxDb, Mrst1, MappedDocs, State2), - couch_rate:success(Limiter, WrittenDocs), + ChangesDone = ChangesDone0 + length(DocAcc), case Count < Limit of true -> @@ -225,7 +201,14 @@ do_update(Db, Mrst0, State0) -> view_seq := LastSeq }} end - end). + end), + + case State4 of + finished -> + couch_eval:release_map_context(Mrst2#mrst.qserver); + _ -> + update(Db, Mrst2, State4) + end. maybe_set_build_status(_TxDb, _Mrst1, not_found, _State) -> @@ -368,16 +351,14 @@ write_docs(TxDb, Mrst, Docs, State) -> KeyLimit = key_size_limit(), ValLimit = value_size_limit(), - DocsNumber = lists:foldl(fun(Doc0, N) -> + lists:foreach(fun(Doc0) -> Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), - couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1), - N + 1 - end, 0, Docs), + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1) + end, Docs), if LastSeq == false -> ok; true -> couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq) - end, - DocsNumber. + end. fetch_docs(Db, DesignOpts, Changes) -> @@ -563,6 +544,10 @@ fail_job(Job, Data, Error, Reason) -> exit(normal). +num_changes() -> + config:get_integer("couch_views", "change_limit", 100). + + retry_limit() -> config:get_integer("couch_views", "retry_limit", 3). @@ -572,4 +557,4 @@ key_size_limit() -> value_size_limit() -> - config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). \ No newline at end of file + config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index cb8378f01..cff3a2e54 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -52,7 +52,6 @@ indexer_test_() -> ?TDEF_FE(index_autoupdater_callback), ?TDEF_FE(handle_db_recreated_when_running), ?TDEF_FE(handle_db_recreated_after_finished), - ?TDEF_FE(index_budget_is_changing), ?TDEF_FE(index_can_recover_from_crash, 60) ] } @@ -378,55 +377,6 @@ index_autoupdater_callback(Db) -> ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DDoc#doc.id, DbSeq)). -index_budget_is_changing(Db) -> - ok = meck:new(couch_rate, [passthrough]), - ok = meck:expect(couch_rate, budget, fun(State) -> - meck:passthrough([State]) - end), - - LimiterOpts = #{ - budget => 100, - sensitivity => 500, - target => 500, - timer => fun timer/0, - window => 2000 - }, - - ok = meck:expect(couch_rate, create_if_missing, fun(Id, Module, Store, _Options) -> - meck:passthrough([Id, Module, Store, LimiterOpts]) - end), - - ok = meck:expect(couch_rate, wait, fun(State) -> - Delay = couch_rate:delay(State), - put(time, timer() + Delay - 1) - end), - - DDoc = create_ddoc(), - Docs = lists:map(fun doc/1, lists:seq(1, 200)), - - {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), - - {ok, _Out} = couch_views:query( - Db, - DDoc, - <<"map_fun2">>, - fun fold_fun/2, - [], - #mrargs{} - ), - ?assert(length(lists:usort(budget_history())) > 1). - - -timer() -> - get(time) == undefined andalso put(time, 1), - Now = get(time), - put(time, Now + 1), - Now. - - -budget_history() -> - [Result || {_Pid, {couch_rate, budget, _}, Result} <- meck:history(couch_rate)]. - multiple_design_docs(Db) -> Cleanup = fun() -> @@ -487,8 +437,7 @@ handle_db_recreated_when_running(Db) -> % To intercept job building while it is running ensure updates happen one % row at a time. - ok = meck:new(couch_rate, [passthrough]), - ok = meck:expect(couch_rate, budget, ['_'], meck:val(1)), + config:set("couch_view", "change_limit", "1", false), meck_intercept_job_update(self()), diff --git a/src/couch_views/test/couch_views_server_test.erl b/src/couch_views/test/couch_views_server_test.erl index 23c807cc2..3c0c0a86a 100644 --- a/src/couch_views/test/couch_views_server_test.erl +++ b/src/couch_views/test/couch_views_server_test.erl @@ -45,7 +45,6 @@ setup() -> Ctx = test_util:start_couch([ fabric, couch_jobs, - couch_rate, couch_js, couch_eval ]), diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl index f5ea37982..03c21a34a 100644 --- a/src/couch_views/test/couch_views_trace_index_test.erl +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -51,7 +51,7 @@ indexer_test_() -> setup() -> - test_util:start_couch([fabric, couch_js, couch_rate]). + test_util:start_couch([fabric, couch_js]). cleanup(Ctx) -> -- cgit v1.2.1 From a626125ad536f0a8003a50136bcbd7841264aa15 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 3 Sep 2020 12:04:56 -0500 Subject: Optimize view indexer batch sizes The couch_views_batch module is responsible for sensing the largest batch sizes that can be successfully processed for a given indexer process. It works by initially searching for the maximum number of documents that can be included in a batch. Once this threshold is found it then works by slowly increasing the batch size and decreasing when its found again. This approach works to maximise batch sizes while being reactive to when a larger batch would cross over the FoundationDB transaction limits which causes the entire batch to be aborted and retried which wastes time during view builds. --- rel/overlay/etc/default.ini | 8 + src/couch_views/README.md | 17 ++ src/couch_views/src/couch_views_batch.erl | 86 +++++++ src/couch_views/src/couch_views_batch_impl.erl | 248 +++++++++++++++++++++ src/couch_views/src/couch_views_indexer.erl | 87 ++++++-- .../test/couch_views_active_tasks_test.erl | 4 +- src/couch_views/test/couch_views_batch_test.erl | 90 ++++++++ src/couch_views/test/couch_views_indexer_test.erl | 2 +- 8 files changed, 516 insertions(+), 26 deletions(-) create mode 100644 src/couch_views/src/couch_views_batch.erl create mode 100644 src/couch_views/src/couch_views_batch_impl.erl create mode 100644 src/couch_views/test/couch_views_batch_test.erl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 8f2b25e22..b837082f6 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -312,6 +312,14 @@ iterations = 10 ; iterations for password hashing ; ; The maximum allowed value size emitted from a view for a document (in bytes) ;value_size_limit = 64000 +; +; Batch size sensing parameters +; batch_initial_size = 100 ; Initial batch size in number of documents +; batch_search_increment = 500 ; Size change when searching for the threshold +; batch_sense_increment = 100 ; Size change increment after hitting a threshold +; batch_max_tx_size_bytes = 9000000 ; Maximum transaction size in bytes +; batch_max_tx_time_msec = 4500 ; Maximum transaction time in milliseconds +; batch_thresold_penalty = 0.2 ; Amount to reduce batch size when crossing a threshold ; CSP (Content Security Policy) Support for _utils [csp] diff --git a/src/couch_views/README.md b/src/couch_views/README.md index 09696da82..181f48ed2 100644 --- a/src/couch_views/README.md +++ b/src/couch_views/README.md @@ -7,6 +7,8 @@ Currently only map indexes are supported and it will always return the full inde Code layout: * `couch_views` - Main entry point to query a view +* `couch_views_batch` - Dynamically determine optimal batch sizes for view indexers. +* `couch_views_batch_impl` - Default implementation for optimizing batch sizes. * `couch_views_encoding` - Encodes view keys that are byte comparable following CouchDB view sort order. * `couch_views_fdb` - Maps view operations to FoundationDB logic. * `couch_views_http` - View specific helpers for chttpd @@ -16,3 +18,18 @@ Code layout: * `couch_views_server` - Spawns `couch_views_indexer` workers to handle index update jobs. * `couch_views_updater` - Update interactive indexes during doc update transactions * `couch_views_util` - Various utility functions + +# Configuration + +; Batch size sensing parameters +; batch_initial_size = 100 ; Initial batch size in number of documents +; batch_search_increment = 500 ; Size change when searching for the threshold +; batch_sense_increment = 100 ; Size change increment after hitting a threshold +; batch_max_tx_size_bytes = 9000000 ; Maximum transaction size in bytes +; batch_max_tx_time_msec = 4500 ; Maximum transaction time in milliseconds +; batch_thresold_penalty = 0.2 ; Amount to reduce batch size when crossing a threshold + +The default batch size sensing parameters are fairly straight forward. These +values can be tweaked in the config if desired. If you find that you need to +tweak these values for any reason please open an issue on GitHub reporting your +experience in case we need to adjust them for common cases. diff --git a/src/couch_views/src/couch_views_batch.erl b/src/couch_views/src/couch_views_batch.erl new file mode 100644 index 000000000..ba2a22782 --- /dev/null +++ b/src/couch_views/src/couch_views_batch.erl @@ -0,0 +1,86 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_batch). + + +-export([ + start/1, + success/2, + failure/1 +]). + + +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +-type update_stats() :: #{ + docs_read => non_neg_integer(), + tx_size => non_neg_integer(), + total_kvs => non_neg_integer() +}. + +-export_type([update_stats/0]). + +-callback start( + Mrst::#mrst{}, + State::term() + ) -> {NewState::term(), BatchSize::pos_integer()}. + +-callback success( + Mrst::#mrst{}, + UpdateStats::update_stats(), + State::term() + ) -> NewState::term(). + +-callback failure(Mrst::#mrst{}, State::term()) -> NewState::term(). + + +-define(DEFAULT_MOD, "couch_views_batch_impl"). + + +-spec start(#mrst{}) -> pos_integer(). +start(#mrst{} = Mrst) -> + {Mod, State} = case load_state() of + {M, S} -> + {M, S}; + undefined -> + ModStr = config:get("couch_views", "batch_module", ?DEFAULT_MOD), + ModAtom = list_to_existing_atom(ModStr), + {ModAtom, undefined} + end, + {NewState, BatchSize} = Mod:start(Mrst, State), + save_state(Mod, NewState), + BatchSize. + + +-spec success(#mrst{}, UpdateStats::update_stats()) -> ok. +success(#mrst{} = Mrst, UpdateStats) -> + {Mod, State} = load_state(), + NewState = Mod:success(Mrst, UpdateStats, State), + save_state(Mod, NewState), + ok. + + +-spec failure(#mrst{}) -> ok. +failure(#mrst{} = Mrst) -> + {Mod, State} = load_state(), + NewState = Mod:failure(Mrst, State), + save_state(Mod, NewState), + ok. + + +load_state() -> + get(?MODULE). + + +save_state(Mod, State) -> + put(?MODULE, {Mod, State}). diff --git a/src/couch_views/src/couch_views_batch_impl.erl b/src/couch_views/src/couch_views_batch_impl.erl new file mode 100644 index 000000000..cacd53b1b --- /dev/null +++ b/src/couch_views/src/couch_views_batch_impl.erl @@ -0,0 +1,248 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_batch_impl). + +-behavior(couch_views_batch). + + +-export([ + start/2, + success/3, + failure/2 +]). + + +-include("couch_mrview/include/couch_mrview.hrl"). + + +-record(batch_st, { + start_time, + state, + size, + search_incr, + sense_incr, + max_tx_size_bytes, + max_tx_time_msec, + threshold_penalty +}). + + +-spec start( + Mrst::#mrst{}, + State::term() + ) -> {NewState::term(), BatchSize::pos_integer()}. +start(Mrst, undefined) -> + St = #batch_st{ + state = search, + size = get_config(batch_initial_size, "100"), + search_incr = get_config(batch_search_increment, "500"), + sense_incr = get_config(batch_sense_increment, "100"), + max_tx_size_bytes = get_config(batch_max_tx_size_bytes, "9000000"), + max_tx_time_msec = get_config(batch_max_tx_time_msec, "4500"), + threshold_penalty = get_config( + batch_threshold_penalty, + "0.2", + fun float_0_to_1/2 + ) + }, + start(Mrst, St); + +start(_Mrst, #batch_st{size = Size} = St) -> + NewSt = St#batch_st{ + start_time = erlang:monotonic_time() + }, + {NewSt, Size}. + + +-spec success( + Mrst::#mrst{}, + UpdateStats::couch_views_batch:update_stats(), + State::term() + ) -> NewState::term(). +success(_Mrst, #{tx_size := TxSize}, #batch_st{} = St) -> + #batch_st{ + start_time = StartTime, + size = Size, + state = State, + search_incr = SearchIncr, + sense_incr = SenseIncr, + max_tx_size_bytes = MaxTxSize, + max_tx_time_msec = MaxTxTime, + threshold_penalty = ThresholdPenalty + } = St, + + TxTimeNative = erlang:monotonic_time() - StartTime, + TxTime = erlang:convert_time_unit(TxTimeNative, native, millisecond), + + {NewSize, NewState} = case TxSize > MaxTxSize orelse TxTime > MaxTxTime of + true -> + {round(Size * (1.0 - ThresholdPenalty)), sense}; + false when State == search -> + {Size + SearchIncr, State}; + false when State == sense -> + {Size + SenseIncr, State} + end, + + St#batch_st{ + size = erlang:max(1, NewSize), + state = NewState + }. + + +-spec failure(Mrst::#mrst{}, State::term()) -> NewState::term(). +failure(_Mrst, #batch_st{} = St) -> + St#batch_st{ + size = erlang:max(1, St#batch_st.size div 2), + state = sense + }. + + +get_config(Key, Default) -> + get_config(Key, Default, fun non_neg_integer/2). + + +get_config(Key, Default, Validator) -> + StrVal = config:get("couch_views", atom_to_list(Key), Default), + Validator(Key, StrVal). + + +non_neg_integer(Name, Str) -> + try + Val = list_to_integer(Str), + true = Val > 0, + Val + catch _:_ -> + erlang:error({invalid_non_neg_integer, {couch_views, Name, Str}}) + end. + + +float_0_to_1(Name, Str) -> + Val = try + list_to_float(Str) + catch error:badarg -> + erlang:error({invalid_float, {couch_views, Name, Str}}) + end, + if Val >= 0.0 andalso Val =< 1.0 -> Val; true -> + erlang:error({float_out_of_range, {couch_views, Name, Str}}) + end. + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + + +good_config_test() -> + with_good_config(fun() -> + {St, 1} = start(#mrst{}, undefined), + ?assertMatch( + #batch_st{ + state = search, + size = 1, + search_incr = 2, + sense_incr = 3, + max_tx_size_bytes = 4, + max_tx_time_msec = 5, + threshold_penalty = 0.6 + }, + St + ) + end). + + +bad_config_test() -> + Fields = [ + {batch_initial_size, invalid_non_neg_integer}, + {batch_search_increment, invalid_non_neg_integer}, + {batch_sense_increment, invalid_non_neg_integer}, + {batch_max_tx_size_bytes, invalid_non_neg_integer}, + {batch_max_tx_time_msec, invalid_non_neg_integer}, + {batch_threshold_penalty, invalid_float} + ], + lists:foreach(fun({Field, Error}) -> + with_bad_config(atom_to_list(Field), fun() -> + ?assertError( + {Error, {couch_views, Field, _}}, + start(#mrst{}, undefined) + ) + end) + end, Fields). + + +float_range_test() -> + with_bad_float_config("batch_threshold_penalty", fun() -> + lists:foreach(fun(_) -> + ?assertError( + {float_out_of_range, {couch_views, batch_threshold_penalty, _}}, + start(#mrst{}, undefined) + ) + end, lists:seq(1, 10)) + end). + + +with_good_config(Fun) -> + meck:new(config), + meck:expect(config, get, fun + ("couch_views", "batch_initial_size", _) -> "1"; + ("couch_views", "batch_search_increment", _) -> "2"; + ("couch_views", "batch_sense_increment", _) -> "3"; + ("couch_views", "batch_max_tx_size_bytes", _) -> "4"; + ("couch_views", "batch_max_tx_time_msec", _) -> "5"; + ("couch_views", "batch_threshold_penalty", _) -> "0.6" + end), + try + Fun() + after + meck:unload() + end. + + +with_bad_config(FieldName, Fun) -> + meck:new(config), + meck:expect(config, get, fun("couch_views", Field, Default) -> + case Field == FieldName of + true -> + case rand:uniform() < 0.5 of + true -> "foo"; + false -> -10 + end; + false -> + Default + end + end), + try + Fun() + after + meck:unload() + end. + + +with_bad_float_config(FieldName, Fun) -> + meck:new(config), + meck:expect(config, get, fun("couch_views", Field, Default) -> + case Field == FieldName of + true -> + case rand:uniform() < 0.5 of + true -> "100.0"; + false -> "-0.5" + end; + false -> + Default + end + end), + try + Fun() + after + meck:unload() + end. + +-endif. diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 737b6f880..17b0daab7 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -33,12 +33,20 @@ -include_lib("couch_mrview/include/couch_mrview.hrl"). -include_lib("fabric/include/fabric2.hrl"). -% TODO: -% * Handle timeouts of transaction and other errors -define(KEY_SIZE_LIMIT, 8000). -define(VALUE_SIZE_LIMIT, 64000). +% These are all of the errors that we can fix by using +% a smaller batch size. +-define(IS_RECOVERABLE_ERROR(Code), ( + (Code == 1004) % timed_out + orelse (Code == 1007) % transaction_too_old + orelse (Code == 1031) % transaction_timed_out + orelse (Code == 2101) % transaction_too_large +)). + + spawn_link() -> proc_lib:spawn_link(?MODULE, init, []). @@ -88,10 +96,10 @@ init() -> job => Job, job_data => Data, count => 0, - limit => num_changes(), changes_done => 0, doc_acc => [], - design_opts => Mrst#mrst.design_opts + design_opts => Mrst#mrst.design_opts, + update_stats => #{} }, try @@ -162,7 +170,32 @@ add_error(Error, Reason, Data) -> update(#{} = Db, Mrst0, State0) -> - {Mrst2, State4} = fabric2_fdb:transactional(Db, fun(TxDb) -> + Limit = couch_views_batch:start(Mrst0), + {Mrst1, State1} = try + do_update(Db, Mrst0, State0#{limit => Limit}) + catch + error:{erlfdb_error, Error} when ?IS_RECOVERABLE_ERROR(Error) -> + couch_views_batch:failure(Mrst0), + update(Db, Mrst0, State0) + end, + case State1 of + finished -> + couch_eval:release_map_context(Mrst1#mrst.qserver); + _ -> + #{ + update_stats := UpdateStats + } = State1, + couch_views_batch:success(Mrst1, UpdateStats), + update(Db, Mrst1, State1) + end. + + +do_update(Db, Mrst0, State0) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx + } = TxDb, + State1 = get_update_start_state(TxDb, Mrst0, State0), {ok, State2} = fold_changes(State1), @@ -180,10 +213,16 @@ update(#{} = Db, Mrst0, State0) -> DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), - write_docs(TxDb, Mrst1, MappedDocs, State2), + TotalKVs = write_docs(TxDb, Mrst1, MappedDocs, State2), ChangesDone = ChangesDone0 + length(DocAcc), + UpdateStats = #{ + docs_read => length(DocAcc), + tx_size => erlfdb:wait(erlfdb:get_approximate_size(Tx)), + total_kvs => TotalKVs + }, + case Count < Limit of true -> maybe_set_build_status(TxDb, Mrst1, ViewVS, @@ -198,17 +237,11 @@ update(#{} = Db, Mrst0, State0) -> count := 0, doc_acc := [], changes_done := ChangesDone, - view_seq := LastSeq + view_seq := LastSeq, + update_stats := UpdateStats }} end - end), - - case State4 of - finished -> - couch_eval:release_map_context(Mrst2#mrst.qserver); - _ -> - update(Db, Mrst2, State4) - end. + end). maybe_set_build_status(_TxDb, _Mrst1, not_found, _State) -> @@ -351,14 +384,17 @@ write_docs(TxDb, Mrst, Docs, State) -> KeyLimit = key_size_limit(), ValLimit = value_size_limit(), - lists:foreach(fun(Doc0) -> + TotalKVCount = lists:foldl(fun(Doc0, KVCount) -> Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), - couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1) - end, Docs), + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1), + KVCount + count_kvs(Doc1) + end, 0, Docs), if LastSeq == false -> ok; true -> couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq) - end. + end, + + TotalKVCount. fetch_docs(Db, DesignOpts, Changes) -> @@ -478,6 +514,15 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> end. +count_kvs(Doc) -> + #{ + results := Results + } = Doc, + lists:foldl(fun(ViewRows, Count) -> + Count + length(ViewRows) + end, 0, Results). + + report_progress(State, UpdateType) -> #{ tx_db := TxDb, @@ -544,10 +589,6 @@ fail_job(Job, Data, Error, Reason) -> exit(normal). -num_changes() -> - config:get_integer("couch_views", "change_limit", 100). - - retry_limit() -> config:get_integer("couch_views", "retry_limit", 3). diff --git a/src/couch_views/test/couch_views_active_tasks_test.erl b/src/couch_views/test/couch_views_active_tasks_test.erl index 6085d73a8..b7f36a343 100644 --- a/src/couch_views/test/couch_views_active_tasks_test.erl +++ b/src/couch_views/test/couch_views_active_tasks_test.erl @@ -47,8 +47,8 @@ foreach_setup() -> Docs = make_docs(?TOTAL_DOCS), fabric2_db:update_docs(Db, [DDoc | Docs]), - meck:new(couch_rate, [passthrough]), - meck:expect(couch_rate, budget, fun(_) -> 100 end), + meck:new(couch_views_batch, [passthrough]), + meck:expect(couch_views_batch, success, fun(_, _) -> 100 end), {Db, DDoc}. diff --git a/src/couch_views/test/couch_views_batch_test.erl b/src/couch_views/test/couch_views_batch_test.erl new file mode 100644 index 000000000..78e68925e --- /dev/null +++ b/src/couch_views/test/couch_views_batch_test.erl @@ -0,0 +1,90 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_batch_test). + + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +batch_test_() -> + { + "Test view batch sizing", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(basic), + ?TDEF(search_success), + ?TDEF(sense_success), + ?TDEF(failure), + ?TDEF(failure_switches_to_sense) + ]) + } + }. + + +setup() -> + test_util:start_couch(). + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +basic(_) -> + erase(couch_views_batch), + ?assertEqual(100, couch_views_batch:start(#mrst{})). + + +search_success(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + couch_views_batch:success(#mrst{}, ustats(0, 0, 0)), + ?assertEqual(600, couch_views_batch:start(#mrst{})). + + +sense_success(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + % Exceeding our threshold switches from search to sense + couch_views_batch:success(#mrst{}, ustats(5000, 10000000, 10000)), + ?assertEqual(80, couch_views_batch:start(#mrst{})), + couch_views_batch:success(#mrst{}, ustats(0, 0, 0)), + ?assertEqual(180, couch_views_batch:start(#mrst{})). + + +failure(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + couch_views_batch:failure(#mrst{}), + ?assertEqual(50, couch_views_batch:start(#mrst{})). + + +failure_switches_to_sense(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + couch_views_batch:failure(#mrst{}), + couch_views_batch:start(#mrst{}), + couch_views_batch:success(#mrst{}, ustats(0, 0, 0)), + ?assertEqual(150, couch_views_batch:start(#mrst{})). + + +ustats(DocsRead, TxSize, TotalKVs) -> + #{ + docs_read => DocsRead, + tx_size => TxSize, + total_kvs => TotalKVs + }. diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index cff3a2e54..86c0a8195 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -437,7 +437,7 @@ handle_db_recreated_when_running(Db) -> % To intercept job building while it is running ensure updates happen one % row at a time. - config:set("couch_view", "change_limit", "1", false), + config:set("couch_views", "batch_initial_size", "1", false), meck_intercept_job_update(self()), -- cgit v1.2.1 From 8cd1792235b724db47d25e322178e06a978ed690 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 4 Sep 2020 09:29:21 -0500 Subject: Move error reporting test to EUnit This test doesn't fail correctly any longer. Rather than attempting to create a new pathological view case I've just moved it to eunit where we can use meck to throw errors directly. --- src/couch_views/test/couch_views_error_test.erl | 102 ++++++++++++++++++++++++ test/elixir/test/map_test.exs | 32 -------- 2 files changed, 102 insertions(+), 32 deletions(-) create mode 100644 src/couch_views/test/couch_views_error_test.erl diff --git a/src/couch_views/test/couch_views_error_test.erl b/src/couch_views/test/couch_views_error_test.erl new file mode 100644 index 000000000..8b6399e0e --- /dev/null +++ b/src/couch_views/test/couch_views_error_test.erl @@ -0,0 +1,102 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_error_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + +-define(USER, "chttpd_db_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). + + +error_test_() -> + { + "Test views report errors", + { + setup, + fun setup/0, + fun teardown/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(view_reports_error) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + chttpd, + couch_jobs, + couch_js, + couch_views + ]), + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + Ctx. + + +teardown(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DbName = fabric2_db:name(Db), + Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(DbName)]), + {Db, Url}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +view_reports_error({Db, Url}) -> + meck:new(couch_views_batch, [passthrough]), + meck:expect(couch_views_batch, start, fun(_) -> + erlang:error({erlfdb_error, 2101}) + end), + + {ok, _} = fabric2_db:update_doc(Db, ddoc(), []), + + ViewUrl = lists:concat([Url, "/_design/foo/_view/bar"]), + {ok, Status, _Headers, Body} = test_request:get(ViewUrl, [?AUTH]), + + ?assertEqual(500, Status), + {Props} = couch_util:json_decode(Body), + {<<"error">>, Error} = lists:keyfind(<<"error">>, 1, Props), + ?assertEqual(<<"foundationdb_error">>, Error). + + +ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/foo">>}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"bar">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.value, doc.value);}">>} + ]}} + ]}} + ]}). diff --git a/test/elixir/test/map_test.exs b/test/elixir/test/map_test.exs index 9254cc4c3..3e2765fbd 100644 --- a/test/elixir/test/map_test.exs +++ b/test/elixir/test/map_test.exs @@ -503,38 +503,6 @@ defmodule ViewMapTest do assert keys == ["bar"] end - test "send error for failed indexing", context do - db_name = context[:db_name] - - docs = [ - %{_id: "doc1", foo: "foo", bar: "bar"}, - %{ - _id: "_design/view1", - views: %{ - view: %{ - map: """ - function (doc) { - for (var i=0; i<10000; i++) { - emit({doc: doc._id + 1}, doc._id); - } - } - """ - } - } - } - ] - - resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}) - assert resp.status_code == 201 - - url = "/#{db_name}/_design/view1/_view/view" - - resp = Couch.get(url, timeout: 500_000) - assert resp.status_code == 500 - %{:body => %{"error" => error}} = resp - assert error == "foundationdb_error" - end - test "descending=true query with startkey_docid", context do db_name = context[:db_name] -- cgit v1.2.1 From 6169104a24d8c587c9866904a884bcef33c6913b Mon Sep 17 00:00:00 2001 From: Joan Touzet Date: Tue, 15 Sep 2020 18:27:44 +0000 Subject: Drop Jenkins ppc64le builds (for now) (#3151) --- build-aux/Jenkinsfile.full | 90 ++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/build-aux/Jenkinsfile.full b/build-aux/Jenkinsfile.full index cc13f9d16..d7a7657b4 100644 --- a/build-aux/Jenkinsfile.full +++ b/build-aux/Jenkinsfile.full @@ -593,49 +593,53 @@ pipeline { } // post } // stage - stage('Debian Buster ppc64le') { - agent { - docker { - image 'couchdbdev/ppc64le-debian-buster-erlang-20.3.8.25-1:latest' - label 'ppc64le' - alwaysPull true - args "${DOCKER_ARGS}" - } - } - environment { - platform = 'buster' - sm_ver = '60' - } - stages { - stage('Build from tarball & test') { - steps { - unstash 'tarball' - sh( script: build_and_test ) - } - post { - always { - junit '**/.eunit/*.xml, **/_build/*/lib/couchdbtest/*.xml, **/src/mango/nosetests.xml, **/test/javascript/junit.xml' - } - } - } - stage('Build CouchDB packages') { - steps { - sh( script: make_packages ) - sh( script: cleanup_and_save ) - } - post { - success { - archiveArtifacts artifacts: 'pkgs/**', fingerprint: true - } - } - } - } // stages - post { - cleanup { - sh 'rm -rf ${WORKSPACE}/*' - } - } // post - } // stage +/* + - Removed 2020.09.15 - VMs are offline +*/ + +// stage('Debian Buster ppc64le') { +// agent { +// docker { +// image 'couchdbdev/ppc64le-debian-buster-erlang-20.3.8.25-1:latest' +// label 'ppc64le' +// alwaysPull true +// args "${DOCKER_ARGS}" +// } +// } +// environment { +// platform = 'buster' +// sm_ver = '60' +// } +// stages { +// stage('Build from tarball & test') { +// steps { +// unstash 'tarball' +// sh( script: build_and_test ) +// } +// post { +// always { +// junit '**/.eunit/*.xml, **/_build/*/lib/couchdbtest/*.xml, **/src/mango/nosetests.xml, **/test/javascript/junit.xml' +// } +// } +// } +// stage('Build CouchDB packages') { +// steps { +// sh( script: make_packages ) +// sh( script: cleanup_and_save ) +// } +// post { +// success { +// archiveArtifacts artifacts: 'pkgs/**', fingerprint: true +// } +// } +// } +// } // stages +// post { +// cleanup { +// sh 'rm -rf ${WORKSPACE}/*' +// } +// } // post +// } // stage /* * Example of how to do a qemu-based run, please leave here -- cgit v1.2.1 From 36a6b390f4f5e236401224ed30dea7282be965a5 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:30:09 -0400 Subject: Add after_db_create/2 and after_db_delete/2 callbacks to fabric `after_db_create/2` and `after_db_delete/2` are when databases are created and deleted respectively. The callbacks are called with both the database name and the database instance UUID values. --- rel/apps/couch_epi.config | 1 + src/fabric/src/fabric2_db.erl | 15 ++++++++++++--- src/fabric/src/fabric2_db_plugin.erl | 10 ++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/rel/apps/couch_epi.config b/rel/apps/couch_epi.config index d3711636f..f9f49e1c3 100644 --- a/rel/apps/couch_epi.config +++ b/rel/apps/couch_epi.config @@ -16,6 +16,7 @@ chttpd_epi, couch_index_epi, couch_views_epi, + couch_replicator_epi, dreyfus_epi, global_changes_epi, mango_epi, diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index b62f26ec8..b3e510b2e 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -187,6 +187,7 @@ create(DbName, Options) -> #{} = Db0 -> Db1 = maybe_add_sys_db_callbacks(Db0), ok = fabric2_server:store(Db1), + fabric2_db_plugin:after_db_create(DbName, get_uuid(Db1)), {ok, Db1#{tx := undefined}}; Error -> Error @@ -235,6 +236,7 @@ delete(DbName, Options) -> fabric2_fdb:delete(TxDb) end), if Resp /= ok -> Resp; true -> + fabric2_db_plugin:after_db_delete(DbName, get_uuid(Db)), fabric2_server:remove(DbName) end end. @@ -243,9 +245,16 @@ delete(DbName, Options) -> undelete(DbName, TgtDbName, TimeStamp, Options) -> case validate_dbname(TgtDbName) of ok -> - fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> - fabric2_fdb:undelete(TxDb, TgtDbName, TimeStamp) - end); + Resp = fabric2_fdb:transactional(DbName, Options, + fun(TxDb) -> + fabric2_fdb:undelete(TxDb, TgtDbName, TimeStamp) + end + ), + if Resp /= ok -> ok; true -> + {ok, Db} = open(TgtDbName, Options), + fabric2_db_plugin:after_db_create(TgtDbName, get_uuid(Db)) + end, + Resp; Error -> Error end. diff --git a/src/fabric/src/fabric2_db_plugin.erl b/src/fabric/src/fabric2_db_plugin.erl index 1d923dd96..095b94cf4 100644 --- a/src/fabric/src/fabric2_db_plugin.erl +++ b/src/fabric/src/fabric2_db_plugin.erl @@ -14,6 +14,8 @@ -export([ validate_dbname/3, + after_db_create/2, + after_db_delete/2, before_doc_update/3, after_doc_write/6, after_doc_read/2, @@ -37,6 +39,14 @@ validate_dbname(DbName, Normalized, Default) -> maybe_handle(validate_dbname, [DbName, Normalized], Default). +after_db_create(DbName, DbUUID) when is_binary(DbName), is_binary(DbUUID) -> + with_pipe(after_db_create, [DbName, DbUUID]). + + +after_db_delete(DbName, DbUUID) when is_binary(DbName), is_binary(DbUUID) -> + with_pipe(after_db_delete, [DbName, DbUUID]). + + before_doc_update(_, #doc{id = <>} = Doc, _) -> Doc; -- cgit v1.2.1 From e3b1c418ebc9735a46ef8e1f36d09d7023939372 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:30:27 -0400 Subject: Read attachment data outside the transaction Previously the data was read from the parser in the transaction. If the transaction had to retry, for example, because of a conflict, the parser would have been drained and exited resulting the request failing with a 500 "mp_parser noproc" error. Since FDB cannot handle transactions larger than 10MB opt to read the attachment data into memory first, before the transaction starts. --- src/chttpd/src/chttpd_db.erl | 26 ++++++++++++++++++-------- src/couch/src/couch_att.erl | 20 ++++++++++++++++++-- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index ec4a1a40f..b57010d4f 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -418,12 +418,13 @@ db_req(#httpd{method='POST', path_parts=[DbName]}=Req, Db) -> _ -> Doc1 end, - DocId = Doc2#doc.id, + Doc3 = read_att_data(Doc2), + DocId = Doc3#doc.id, case chttpd:qs_value(Req, "batch") of "ok" -> % async_batching spawn(fun() -> - case catch(fabric2_db:update_doc(Db, Doc2, [])) of + case catch(fabric2_db:update_doc(Db, Doc3, [])) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -443,7 +444,7 @@ db_req(#httpd{method='POST', path_parts=[DbName]}=Req, Db) -> % normal DocUrl = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), - case fabric2_db:update_doc(Db, Doc2, []) of + case fabric2_db:update_doc(Db, Doc3, []) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1174,7 +1175,8 @@ db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> NewDoc = Doc#doc{ atts = UpdatedAtts ++ OldAtts2 }, - case fabric2_db:update_doc(Db, NewDoc, []) of + NewDoc1 = read_att_data(NewDoc), + case fabric2_db:update_doc(Db, NewDoc1, []) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1218,8 +1220,8 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> case chttpd:qs_value(Req, "batch") of "ok" -> % batch - Doc = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), - + Doc0 = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), + Doc = read_att_data(Doc0), spawn(fun() -> case catch(fabric2_db:update_doc(Db, Doc, [])) of {ok, _} -> @@ -1479,7 +1481,8 @@ http_code_from_status(Status) -> 200 end. -update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc, Options) -> +update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc0, Options) -> + Doc = read_att_data(Doc0), case fabric2_db:update_doc(Db, Doc, Options) of {ok, NewRev} -> Accepted = false; @@ -1766,9 +1769,10 @@ db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) end, #doc{atts=Atts} = Doc, - DocEdited = Doc#doc{ + DocEdited0 = Doc#doc{ atts = NewAtt ++ [A || A <- Atts, couch_att:fetch(name, A) /= FileName] }, + DocEdited = read_att_data(DocEdited0), case fabric2_db:update_doc(Db, DocEdited, []) of {ok, UpdatedRev} -> chttpd_stats:incr_writes(), @@ -2240,3 +2244,9 @@ bulk_get_json_error(DocId, Rev, Error, Reason) -> {<<"rev">>, Rev}, {<<"error">>, Error}, {<<"reason">>, Reason}]}}]}). + + +read_att_data(#doc{} = Doc) -> + #doc{atts = Atts} = Doc, + Atts1 = lists:map(fun couch_att:read_data/1, Atts), + Doc#doc{atts = Atts1}. diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index d41ab5bf2..b4c95e933 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -40,6 +40,7 @@ -export([ flush/3, + read_data/1, foldl/3, range_foldl/5, foldl_decode/3, @@ -374,7 +375,14 @@ to_json(Att, OutputData, DataToFollow, ShowEncoding) -> flush(Db, DocId, Att1) -> - Att2 = read_data(fetch(data, Att1), Att1), + Data0 = fetch(data, Att1), + case {Data0, Db} of + {{follows, _, _}, #{tx := Tx}} when Tx =/= undefined -> + error(follows_cannot_be_used_in_a_transaction); + {_, #{}} -> + ok + end, + Att2 = read_data(Data0, Att1), [ Data, AttLen, @@ -419,6 +427,11 @@ flush(Db, DocId, Att1) -> end. +read_data(Att) -> + Data = fetch(data, Att), + read_data(Data, Att). + + read_data({loc, #{}, _DocId, _AttId}, Att) -> % Attachment already written to fdb Att; @@ -443,7 +456,10 @@ read_data({follows, Parser, Ref}, Att) -> end; read_data(Data, Att) when is_binary(Data) -> - Att; + case fetch(att_len, Att) of + undefined -> store(att_len, size(Data), Att); + Int when is_integer(Int) -> Att + end; read_data(Fun, Att) when is_function(Fun) -> [AttName, AttLen, InMd5] = fetch([name, att_len, md5], Att), -- cgit v1.2.1 From 9897cd84a37fa0062dfba15f2e0eb6cc611be74a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:30:39 -0400 Subject: Handle possible iodata from jiffy:encode in couch_jobs Also, ensure to use the same options as other couch apps: force_utf8 and dedupe_keys. --- src/couch_jobs/src/couch_jobs_fdb.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl index 3fcad554a..27131ec86 100644 --- a/src/couch_jobs/src/couch_jobs_fdb.erl +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -414,7 +414,7 @@ init_cache() -> % encode_data(#{} = JobData) -> try - jiffy:encode(JobData) + iolist_to_binary(jiffy:encode(JobData, [force_utf8])) catch throw:{error, Error} -> % legacy clause since new versions of jiffy raise error instead @@ -431,7 +431,7 @@ decode_data(#{} = JobData) -> JobData; decode_data(<<_/binary>> = JobData) -> - jiffy:decode(JobData, [return_maps]). + jiffy:decode(JobData, [dedupe_keys, return_maps]). % Cached job transaction object. This object wraps a transaction, caches the -- cgit v1.2.1 From d2c9dffa3aca3b3e2faed49526b0065ebb845fad Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:30:53 -0400 Subject: Add fold_jobs/4 and pending_count/2,3 to couch_jobs API fold_jobs/4 is a generalized folding API which can be used to process all the jobs of a particular type. get_pending_count/2,3 can be used to get the count of pending jobs. It takes the same options as accept, including a limit and `max_sched_time`. Just like accept it reads the range of pending jobs as a snapshot to avoid generating read conflicts. --- src/couch_jobs/src/couch_jobs.erl | 37 +++++++++++++++++++++++++++---- src/couch_jobs/src/couch_jobs_pending.erl | 32 +++++++++++++++++++++----- 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl index f5d6a7b96..6c40f5dff 100644 --- a/src/couch_jobs/src/couch_jobs.erl +++ b/src/couch_jobs/src/couch_jobs.erl @@ -17,10 +17,15 @@ add/4, add/5, remove/3, + + % Job monitoring + get_types/1, get_job_data/3, get_job_state/3, get_active_jobs_ids/2, - get_types/1, + fold_jobs/4, + pending_count/2, + pending_count/3, % Job processing accept/1, @@ -80,6 +85,13 @@ remove(Tx, Type, JobId) when is_binary(JobId) -> end). +-spec get_types(jtx()) -> [job_type()] | {error, any()}. +get_types(Tx) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:get_types(JTx) + end). + + -spec get_job_data(jtx(), job_type(), job_id()) -> {ok, job_data()} | {error, any()}. get_job_data(Tx, Type, JobId) when is_binary(JobId) -> @@ -116,10 +128,27 @@ get_active_jobs_ids(Tx, Type) -> end). --spec get_types(jtx()) -> [job_type()] | {error, any()}. -get_types(Tx) -> +-spec fold_jobs(jtx(), job_type(), fun(), any()) -> any(). +fold_jobs(Tx, Type, Fun, UserAcc) when is_function(Fun, 5) -> couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> - couch_jobs_fdb:get_types(JTx) + maps:fold(fun(JobId, {_Seq, JobState, DataEnc}, Acc) -> + Data = couch_jobs_fdb:decode_data(DataEnc), + Fun(JTx, JobId, JobState, Data, Acc) + end, UserAcc, couch_jobs_fdb:get_jobs(JTx, Type)) + end). + + +-spec pending_count(jtx(), job_type()) -> integer(). +pending_count(Tx, Type) -> + pending_count(Tx, Type, #{}). + + +-spec pending_count(jtx(), job_type(), #{}) -> integer(). +pending_count(Tx, Type, Opts) -> + MaxSTime = maps:get(max_sched_time, Opts, ?UNDEFINED_MAX_SCHEDULED_TIME), + Limit = maps:get(limit, Opts, 1024), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_pending:pending_count(JTx, Type, MaxSTime, Limit) end). diff --git a/src/couch_jobs/src/couch_jobs_pending.erl b/src/couch_jobs/src/couch_jobs_pending.erl index ab53c59d1..a85f2fc5c 100644 --- a/src/couch_jobs/src/couch_jobs_pending.erl +++ b/src/couch_jobs/src/couch_jobs_pending.erl @@ -16,7 +16,8 @@ -export([ enqueue/4, dequeue/4, - remove/4 + remove/4, + pending_count/4 ]). @@ -47,16 +48,14 @@ dequeue(#{jtx := true} = JTx, Type, _, true) -> {ok, JobId} end; -dequeue(#{jtx := true} = JTx, Type, MaxPriority, _) -> +dequeue(#{jtx := true} = JTx, Type, MaxSTime, _) -> #{tx := Tx, jobs_path := Jobs} = JTx, - Prefix = erlfdb_tuple:pack({?PENDING, Type}, Jobs), - StartKeySel = erlfdb_key:first_greater_than(Prefix), - End = erlfdb_tuple:pack({MaxPriority, <<16#FF>>}, Prefix), - EndKeySel = erlfdb_key:first_greater_or_equal(End), + {StartKeySel, EndKeySel} = get_range_selectors(JTx, Type, MaxSTime), case clear_random_key_from_range(Tx, StartKeySel, EndKeySel) of {error, not_found} -> {not_found, get_pending_watch(JTx, Type)}; {ok, PendingKey} -> + Prefix = erlfdb_tuple:pack({?PENDING, Type}, Jobs), {_, JobId} = erlfdb_tuple:unpack(PendingKey, Prefix), {ok, JobId} end. @@ -68,8 +67,29 @@ remove(#{jtx := true} = JTx, Type, JobId, STime) -> erlfdb:clear(Tx, Key). +pending_count(#{jtx := true} = JTx, Type, MaxSTime, Limit) -> + #{tx := Tx} = JTx, + Opts = [ + {limit, Limit}, + {snapshot, true}, + {streaming_mode, want_all} + ], + {StartSel, EndSel} = get_range_selectors(JTx, Type, MaxSTime), + FoldFun = fun(_Row, Cnt) -> Cnt + 1 end, + erlfdb:fold_range(Tx, StartSel, EndSel, FoldFun, 0, Opts). + + %% Private functions +% Get pending key selectors, taking into account max scheduled time value. +get_range_selectors(#{jtx := true} = JTx, Type, MaxSTime) -> + #{jobs_path := Jobs} = JTx, + Prefix = erlfdb_tuple:pack({?PENDING, Type}, Jobs), + StartKeySel = erlfdb_key:first_greater_than(Prefix), + End = erlfdb_tuple:pack({MaxSTime, <<16#FF>>}, Prefix), + EndKeySel = erlfdb_key:first_greater_or_equal(End), + {StartKeySel, EndKeySel}. + % Pick a random item from the range without reading the keys in first. However % the constraint it that IDs should looks like random UUIDs -- cgit v1.2.1 From 4fc9a536ec85456ab60085f020548a08dd19ca36 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:31:04 -0400 Subject: Delete old 2.x-3.x replicator modules These modules are not used by the new replicator. --- .../src/couch_replicator_clustering.erl | 279 ---- .../src/couch_replicator_db_changes.erl | 108 -- .../src/couch_replicator_doc_processor.erl | 962 ----------- .../src/couch_replicator_doc_processor_worker.erl | 284 ---- .../src/couch_replicator_fabric.erl | 155 -- .../src/couch_replicator_fabric_rpc.erl | 97 -- .../src/couch_replicator_httpd_util.erl | 201 --- .../src/couch_replicator_job_sup.erl | 34 - .../src/couch_replicator_js_functions.hrl | 177 -- .../src/couch_replicator_notifier.erl | 58 - .../src/couch_replicator_scheduler.erl | 1688 -------------------- .../src/couch_replicator_scheduler.hrl | 15 - .../src/couch_replicator_scheduler_job.erl | 1090 ------------- .../src/couch_replicator_scheduler_sup.erl | 62 - .../test/eunit/couch_replicator_compact_tests.erl | 455 ------ .../couch_replicator_error_reporting_tests.erl | 271 ---- 16 files changed, 5936 deletions(-) delete mode 100644 src/couch_replicator/src/couch_replicator_clustering.erl delete mode 100644 src/couch_replicator/src/couch_replicator_db_changes.erl delete mode 100644 src/couch_replicator/src/couch_replicator_doc_processor.erl delete mode 100644 src/couch_replicator/src/couch_replicator_doc_processor_worker.erl delete mode 100644 src/couch_replicator/src/couch_replicator_fabric.erl delete mode 100644 src/couch_replicator/src/couch_replicator_fabric_rpc.erl delete mode 100644 src/couch_replicator/src/couch_replicator_httpd_util.erl delete mode 100644 src/couch_replicator/src/couch_replicator_job_sup.erl delete mode 100644 src/couch_replicator/src/couch_replicator_js_functions.hrl delete mode 100644 src/couch_replicator/src/couch_replicator_notifier.erl delete mode 100644 src/couch_replicator/src/couch_replicator_scheduler.erl delete mode 100644 src/couch_replicator/src/couch_replicator_scheduler.hrl delete mode 100644 src/couch_replicator/src/couch_replicator_scheduler_job.erl delete mode 100644 src/couch_replicator/src/couch_replicator_scheduler_sup.erl delete mode 100644 src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl delete mode 100644 src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl diff --git a/src/couch_replicator/src/couch_replicator_clustering.erl b/src/couch_replicator/src/couch_replicator_clustering.erl deleted file mode 100644 index 18de1e825..000000000 --- a/src/couch_replicator/src/couch_replicator_clustering.erl +++ /dev/null @@ -1,279 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - - -% Maintain cluster membership and stability notifications for replications. -% On changes to cluster membership, broadcast events to `replication` gen_event. -% Listeners will get `{cluster, stable}` or `{cluster, unstable}` events. -% -% Cluster stability is defined as "there have been no nodes added or removed in -% last `QuietPeriod` seconds". QuietPeriod value is configurable. To ensure a -% speedier startup, during initialization there is a shorter StartupPeriod -% in effect (also configurable). -% -% This module is also in charge of calculating ownership of replications based -% on where their _replicator db documents shards live. - - --module(couch_replicator_clustering). - --behaviour(gen_server). --behaviour(config_listener). --behaviour(mem3_cluster). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3 -]). - --export([ - owner/2, - is_stable/0, - link_cluster_event_listener/3 -]). - -% config_listener callbacks --export([ - handle_config_change/5, - handle_config_terminate/3 -]). - -% mem3_cluster callbacks --export([ - cluster_stable/1, - cluster_unstable/1 -]). - --include_lib("couch/include/couch_db.hrl"). --include_lib("mem3/include/mem3.hrl"). - --define(DEFAULT_QUIET_PERIOD, 60). % seconds --define(DEFAULT_START_PERIOD, 5). % seconds --define(RELISTEN_DELAY, 5000). - --record(state, { - mem3_cluster_pid :: pid(), - cluster_stable :: boolean() -}). - - --spec start_link() -> {ok, pid()} | ignore | {error, term()}. -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - - -% owner/2 function computes ownership for a {DbName, DocId} tuple -% `unstable` if cluster is considered to be unstable i.e. it has changed -% recently, or returns node() which of the owner. -% --spec owner(Dbname :: binary(), DocId :: binary()) -> node() | unstable. -owner(<<"shards/", _/binary>> = DbName, DocId) -> - case is_stable() of - false -> - unstable; - true -> - owner_int(DbName, DocId) - end; -owner(_DbName, _DocId) -> - node(). - - --spec is_stable() -> true | false. -is_stable() -> - gen_server:call(?MODULE, is_stable). - - --spec link_cluster_event_listener(atom(), atom(), list()) -> pid(). -link_cluster_event_listener(Mod, Fun, Args) - when is_atom(Mod), is_atom(Fun), is_list(Args) -> - CallbackFun = - fun(Event = {cluster, _}) -> erlang:apply(Mod, Fun, Args ++ [Event]); - (_) -> ok - end, - {ok, Pid} = couch_replicator_notifier:start_link(CallbackFun), - Pid. - - -% Mem3 cluster callbacks - -cluster_unstable(Server) -> - ok = gen_server:call(Server, set_unstable), - couch_replicator_notifier:notify({cluster, unstable}), - couch_stats:update_gauge([couch_replicator, cluster_is_stable], 0), - couch_log:notice("~s : cluster unstable", [?MODULE]), - Server. - -cluster_stable(Server) -> - ok = gen_server:call(Server, set_stable), - couch_replicator_notifier:notify({cluster, stable}), - couch_stats:update_gauge([couch_replicator, cluster_is_stable], 1), - couch_log:notice("~s : cluster stable", [?MODULE]), - Server. - - -% gen_server callbacks - -init([]) -> - ok = config:listen_for_changes(?MODULE, nil), - Period = abs(config:get_integer("replicator", "cluster_quiet_period", - ?DEFAULT_QUIET_PERIOD)), - StartPeriod = abs(config:get_integer("replicator", "cluster_start_period", - ?DEFAULT_START_PERIOD)), - couch_stats:update_gauge([couch_replicator, cluster_is_stable], 0), - {ok, Mem3Cluster} = mem3_cluster:start_link(?MODULE, self(), StartPeriod, - Period), - {ok, #state{mem3_cluster_pid = Mem3Cluster, cluster_stable = false}}. - - -terminate(_Reason, _State) -> - ok. - - -handle_call(is_stable, _From, #state{cluster_stable = IsStable} = State) -> - {reply, IsStable, State}; - -handle_call(set_stable, _From, State) -> - {reply, ok, State#state{cluster_stable = true}}; - -handle_call(set_unstable, _From, State) -> - {reply, ok, State#state{cluster_stable = false}}. - - -handle_cast({set_period, Period}, #state{mem3_cluster_pid = Pid} = State) -> - ok = mem3_cluster:set_period(Pid, Period), - {noreply, State}. - - -handle_info(restart_config_listener, State) -> - ok = config:listen_for_changes(?MODULE, nil), - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -%% Internal functions - - -handle_config_change("replicator", "cluster_quiet_period", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_period, list_to_integer(V)}), - {ok, S}; -handle_config_change(_, _, _, _, S) -> - {ok, S}. - - -handle_config_terminate(_, stop, _) -> ok; -handle_config_terminate(_S, _R, _St) -> - Pid = whereis(?MODULE), - erlang:send_after(?RELISTEN_DELAY, Pid, restart_config_listener). - - --spec owner_int(binary(), binary()) -> node(). -owner_int(ShardName, DocId) -> - DbName = mem3:dbname(ShardName), - Live = [node() | nodes()], - Shards = mem3:shards(DbName, DocId), - Nodes = [N || #shard{node=N} <- Shards, lists:member(N, Live)], - mem3:owner(DbName, DocId, Nodes). - - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - - -replicator_clustering_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_stable_callback(), - t_unstable_callback() - ] - } - }. - - -t_stable_callback() -> - ?_test(begin - ?assertEqual(false, is_stable()), - cluster_stable(whereis(?MODULE)), - ?assertEqual(true, is_stable()) - end). - - -t_unstable_callback() -> - ?_test(begin - cluster_stable(whereis(?MODULE)), - ?assertEqual(true, is_stable()), - cluster_unstable(whereis(?MODULE)), - ?assertEqual(false, is_stable()) - end). - - -setup_all() -> - meck:expect(couch_log, notice, 2, ok), - meck:expect(config, get, fun(_, _, Default) -> Default end), - meck:expect(config, listen_for_changes, 2, ok), - meck:expect(couch_stats, update_gauge, 2, ok), - meck:expect(couch_replicator_notifier, notify, 1, ok). - - -teardown_all(_) -> - meck:unload(). - - -setup() -> - meck:reset([ - config, - couch_log, - couch_stats, - couch_replicator_notifier - ]), - stop_clustering_process(), - {ok, Pid} = start_link(), - Pid. - - -teardown(Pid) -> - stop_clustering_process(Pid). - - -stop_clustering_process() -> - stop_clustering_process(whereis(?MODULE)). - - -stop_clustering_process(undefined) -> - ok; - -stop_clustering_process(Pid) when is_pid(Pid) -> - Ref = erlang:monitor(process, Pid), - unlink(Pid), - exit(Pid, kill), - receive {'DOWN', Ref, _, _, _} -> ok end. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_db_changes.erl b/src/couch_replicator/src/couch_replicator_db_changes.erl deleted file mode 100644 index 92b0222c4..000000000 --- a/src/couch_replicator/src/couch_replicator_db_changes.erl +++ /dev/null @@ -1,108 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_db_changes). - --behaviour(gen_server). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3 -]). - --export([ - notify_cluster_event/2 -]). - --record(state, { - event_listener :: pid(), - mdb_changes :: pid() | nil -}). - - --spec notify_cluster_event(pid(), {cluster, any()}) -> ok. -notify_cluster_event(Server, {cluster, _} = Event) -> - gen_server:cast(Server, Event). - - --spec start_link() -> - {ok, pid()} | ignore | {error, any()}. -start_link() -> - gen_server:start_link(?MODULE, [], []). - - -init([]) -> - EvtPid = couch_replicator_clustering:link_cluster_event_listener(?MODULE, - notify_cluster_event, [self()]), - State = #state{event_listener = EvtPid, mdb_changes = nil}, - case couch_replicator_clustering:is_stable() of - true -> - {ok, restart_mdb_changes(State)}; - false -> - {ok, State} - end. - - -terminate(_Reason, _State) -> - ok. - - -handle_call(_Msg, _From, State) -> - {reply, {error, invalid_call}, State}. - - -handle_cast({cluster, unstable}, State) -> - {noreply, stop_mdb_changes(State)}; - -handle_cast({cluster, stable}, State) -> - {noreply, restart_mdb_changes(State)}. - - -handle_info(_Msg, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - --spec restart_mdb_changes(#state{}) -> #state{}. -restart_mdb_changes(#state{mdb_changes = nil} = State) -> - Suffix = <<"_replicator">>, - CallbackMod = couch_replicator_doc_processor, - Options = [skip_ddocs], - {ok, Pid} = couch_multidb_changes:start_link(Suffix, CallbackMod, nil, - Options), - couch_stats:increment_counter([couch_replicator, db_scans]), - couch_log:notice("Started replicator db changes listener ~p", [Pid]), - State#state{mdb_changes = Pid}; - -restart_mdb_changes(#state{mdb_changes = _Pid} = State) -> - restart_mdb_changes(stop_mdb_changes(State)). - - --spec stop_mdb_changes(#state{}) -> #state{}. -stop_mdb_changes(#state{mdb_changes = nil} = State) -> - State; -stop_mdb_changes(#state{mdb_changes = Pid} = State) -> - couch_log:notice("Stopping replicator db changes listener ~p", [Pid]), - unlink(Pid), - exit(Pid, kill), - State#state{mdb_changes = nil}. diff --git a/src/couch_replicator/src/couch_replicator_doc_processor.erl b/src/couch_replicator/src/couch_replicator_doc_processor.erl deleted file mode 100644 index 6778d537d..000000000 --- a/src/couch_replicator/src/couch_replicator_doc_processor.erl +++ /dev/null @@ -1,962 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_doc_processor). - --behaviour(gen_server). --behaviour(couch_multidb_changes). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3 -]). - --export([ - db_created/2, - db_deleted/2, - db_found/2, - db_change/3 -]). - --export([ - docs/1, - doc/2, - doc_lookup/3, - update_docs/0, - get_worker_ref/1, - notify_cluster_event/2 -]). - --include_lib("couch/include/couch_db.hrl"). --include("couch_replicator.hrl"). --include_lib("mem3/include/mem3.hrl"). - --import(couch_replicator_utils, [ - get_json_value/2, - get_json_value/3 -]). - --define(DEFAULT_UPDATE_DOCS, false). --define(ERROR_MAX_BACKOFF_EXPONENT, 12). % ~ 1 day on average --define(TS_DAY_SEC, 86400). --define(INITIAL_BACKOFF_EXPONENT, 64). --define(MIN_FILTER_DELAY_SEC, 60). - --type filter_type() :: nil | view | user | docids | mango. --type repstate() :: initializing | error | scheduled. - - --record(rdoc, { - id :: db_doc_id() | '_' | {any(), '_'}, - state :: repstate() | '_', - rep :: #rep{} | nil | '_', - rid :: rep_id() | nil | '_', - filter :: filter_type() | '_', - info :: binary() | nil | '_', - errcnt :: non_neg_integer() | '_', - worker :: reference() | nil | '_', - last_updated :: erlang:timestamp() | '_' -}). - - -% couch_multidb_changes API callbacks - -db_created(DbName, Server) -> - couch_stats:increment_counter([couch_replicator, docs, dbs_created]), - couch_replicator_docs:ensure_rep_ddoc_exists(DbName), - Server. - - -db_deleted(DbName, Server) -> - couch_stats:increment_counter([couch_replicator, docs, dbs_deleted]), - ok = gen_server:call(?MODULE, {clean_up_replications, DbName}, infinity), - Server. - - -db_found(DbName, Server) -> - couch_stats:increment_counter([couch_replicator, docs, dbs_found]), - couch_replicator_docs:ensure_rep_ddoc_exists(DbName), - Server. - - -db_change(DbName, {ChangeProps} = Change, Server) -> - couch_stats:increment_counter([couch_replicator, docs, db_changes]), - try - ok = process_change(DbName, Change) - catch - exit:{Error, {gen_server, call, [?MODULE, _, _]}} -> - ErrMsg = "~p exited ~p while processing change from db ~p", - couch_log:error(ErrMsg, [?MODULE, Error, DbName]); - _Tag:Error -> - {RepProps} = get_json_value(doc, ChangeProps), - DocId = get_json_value(<<"_id">>, RepProps), - couch_replicator_docs:update_failed(DbName, DocId, Error) - end, - Server. - - --spec get_worker_ref(db_doc_id()) -> reference() | nil. -get_worker_ref({DbName, DocId}) when is_binary(DbName), is_binary(DocId) -> - case ets:lookup(?MODULE, {DbName, DocId}) of - [#rdoc{worker = WRef}] when is_reference(WRef) -> - WRef; - [#rdoc{worker = nil}] -> - nil; - [] -> - nil - end. - - -% Cluster membership change notification callback --spec notify_cluster_event(pid(), {cluster, any()}) -> ok. -notify_cluster_event(Server, {cluster, _} = Event) -> - gen_server:cast(Server, Event). - - -process_change(DbName, {Change}) -> - {RepProps} = JsonRepDoc = get_json_value(doc, Change), - DocId = get_json_value(<<"_id">>, RepProps), - Owner = couch_replicator_clustering:owner(DbName, DocId), - Id = {DbName, DocId}, - case {Owner, get_json_value(deleted, Change, false)} of - {_, true} -> - ok = gen_server:call(?MODULE, {removed, Id}, infinity); - {unstable, false} -> - couch_log:notice("Not starting '~s' as cluster is unstable", [DocId]); - {ThisNode, false} when ThisNode =:= node() -> - case get_json_value(<<"_replication_state">>, RepProps) of - undefined -> - ok = process_updated(Id, JsonRepDoc); - <<"triggered">> -> - maybe_remove_state_fields(DbName, DocId), - ok = process_updated(Id, JsonRepDoc); - <<"completed">> -> - ok = gen_server:call(?MODULE, {completed, Id}, infinity); - <<"error">> -> - % Handle replications started from older versions of replicator - % which wrote transient errors to replication docs - maybe_remove_state_fields(DbName, DocId), - ok = process_updated(Id, JsonRepDoc); - <<"failed">> -> - ok - end; - {Owner, false} -> - ok - end, - ok. - - -maybe_remove_state_fields(DbName, DocId) -> - case update_docs() of - true -> - ok; - false -> - couch_replicator_docs:remove_state_fields(DbName, DocId) - end. - - -process_updated({DbName, _DocId} = Id, JsonRepDoc) -> - % Parsing replication doc (but not calculating the id) could throw an - % exception which would indicate this document is malformed. This exception - % should propagate to db_change function and will be recorded as permanent - % failure in the document. User will have to update the documet to fix the - % problem. - Rep0 = couch_replicator_docs:parse_rep_doc_without_id(JsonRepDoc), - Rep = Rep0#rep{db_name = DbName, start_time = os:timestamp()}, - Filter = case couch_replicator_filters:parse(Rep#rep.options) of - {ok, nil} -> - nil; - {ok, {user, _FName, _QP}} -> - user; - {ok, {view, _FName, _QP}} -> - view; - {ok, {docids, _DocIds}} -> - docids; - {ok, {mango, _Selector}} -> - mango; - {error, FilterError} -> - throw(FilterError) - end, - gen_server:call(?MODULE, {updated, Id, Rep, Filter}, infinity). - - -% Doc processor gen_server API and callbacks - -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - - -init([]) -> - ?MODULE = ets:new(?MODULE, [named_table, {keypos, #rdoc.id}, - {read_concurrency, true}, {write_concurrency, true}]), - couch_replicator_clustering:link_cluster_event_listener(?MODULE, - notify_cluster_event, [self()]), - {ok, nil}. - - -terminate(_Reason, _State) -> - ok. - - -handle_call({updated, Id, Rep, Filter}, _From, State) -> - ok = updated_doc(Id, Rep, Filter), - {reply, ok, State}; - -handle_call({removed, Id}, _From, State) -> - ok = removed_doc(Id), - {reply, ok, State}; - -handle_call({completed, Id}, _From, State) -> - true = ets:delete(?MODULE, Id), - {reply, ok, State}; - -handle_call({clean_up_replications, DbName}, _From, State) -> - ok = removed_db(DbName), - {reply, ok, State}. - -handle_cast({cluster, unstable}, State) -> - % Ignoring unstable state transition - {noreply, State}; - -handle_cast({cluster, stable}, State) -> - % Membership changed recheck all the replication document ownership - nil = ets:foldl(fun cluster_membership_foldl/2, nil, ?MODULE), - {noreply, State}; - -handle_cast(Msg, State) -> - {stop, {error, unexpected_message, Msg}, State}. - - -handle_info({'DOWN', _, _, _, #doc_worker_result{id = Id, wref = Ref, - result = Res}}, State) -> - ok = worker_returned(Ref, Id, Res), - {noreply, State}; - -handle_info(_Msg, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -% Doc processor gen_server private helper functions - -% Handle doc update -- add to ets, then start a worker to try to turn it into -% a replication job. In most cases it will succeed quickly but for filtered -% replications or if there are duplicates, it could take longer -% (theoretically indefinitely) until a replication could be started. Before -% adding replication job, make sure to delete all old jobs associated with -% same document. --spec updated_doc(db_doc_id(), #rep{}, filter_type()) -> ok. -updated_doc(Id, Rep, Filter) -> - NormCurRep = couch_replicator_utils:normalize_rep(current_rep(Id)), - NormNewRep = couch_replicator_utils:normalize_rep(Rep), - case NormCurRep == NormNewRep of - false -> - removed_doc(Id), - Row = #rdoc{ - id = Id, - state = initializing, - rep = Rep, - rid = nil, - filter = Filter, - info = nil, - errcnt = 0, - worker = nil, - last_updated = os:timestamp() - }, - true = ets:insert(?MODULE, Row), - ok = maybe_start_worker(Id); - true -> - ok - end. - - -% Return current #rep{} record if any. If replication hasn't been submitted -% to the scheduler yet, #rep{} record will be in the document processor's -% ETS table, otherwise query scheduler for the #rep{} record. --spec current_rep({binary(), binary()}) -> #rep{} | nil. -current_rep({DbName, DocId}) when is_binary(DbName), is_binary(DocId) -> - case ets:lookup(?MODULE, {DbName, DocId}) of - [] -> - nil; - [#rdoc{state = scheduled, rep = nil, rid = JobId}] -> - % When replication is scheduled, #rep{} record which can be quite - % large compared to other bits in #rdoc is removed in order to avoid - % having to keep 2 copies of it. So have to fetch it from the - % scheduler. - couch_replicator_scheduler:rep_state(JobId); - [#rdoc{rep = Rep}] -> - Rep - end. - - --spec worker_returned(reference(), db_doc_id(), rep_start_result()) -> ok. -worker_returned(Ref, Id, {ok, RepId}) -> - case ets:lookup(?MODULE, Id) of - [#rdoc{worker = Ref} = Row] -> - Row0 = Row#rdoc{ - state = scheduled, - errcnt = 0, - worker = nil, - last_updated = os:timestamp() - }, - NewRow = case Row0 of - #rdoc{rid = RepId, filter = user} -> - % Filtered replication id didn't change. - Row0; - #rdoc{rid = nil, filter = user} -> - % Calculated new replication id for a filtered replication. Make - % sure to schedule another check as filter code could change. - % Replication starts could have been failing, so also clear - % error count. - Row0#rdoc{rid = RepId}; - #rdoc{rid = OldRepId, filter = user} -> - % Replication id of existing replication job with filter has - % changed. Remove old replication job from scheduler and - % schedule check to check for future changes. - ok = couch_replicator_scheduler:remove_job(OldRepId), - Msg = io_lib:format("Replication id changed: ~p -> ~p", [ - OldRepId, RepId]), - Row0#rdoc{rid = RepId, info = couch_util:to_binary(Msg)}; - #rdoc{rid = nil} -> - % Calculated new replication id for non-filtered replication. - % Remove replication doc body, after this we won't need it - % anymore. - Row0#rdoc{rep=nil, rid=RepId, info=nil} - end, - true = ets:insert(?MODULE, NewRow), - ok = maybe_update_doc_triggered(Row#rdoc.rep, RepId), - ok = maybe_start_worker(Id); - _ -> - ok % doc could have been deleted, ignore - end, - ok; - -worker_returned(_Ref, _Id, ignore) -> - ok; - -worker_returned(Ref, Id, {temporary_error, Reason}) -> - case ets:lookup(?MODULE, Id) of - [#rdoc{worker = Ref, errcnt = ErrCnt} = Row] -> - NewRow = Row#rdoc{ - rid = nil, - state = error, - info = Reason, - errcnt = ErrCnt + 1, - worker = nil, - last_updated = os:timestamp() - }, - true = ets:insert(?MODULE, NewRow), - ok = maybe_update_doc_error(NewRow#rdoc.rep, Reason), - ok = maybe_start_worker(Id); - _ -> - ok % doc could have been deleted, ignore - end, - ok; - -worker_returned(Ref, Id, {permanent_failure, _Reason}) -> - case ets:lookup(?MODULE, Id) of - [#rdoc{worker = Ref}] -> - true = ets:delete(?MODULE, Id); - _ -> - ok % doc could have been deleted, ignore - end, - ok. - - --spec maybe_update_doc_error(#rep{}, any()) -> ok. -maybe_update_doc_error(Rep, Reason) -> - case update_docs() of - true -> - couch_replicator_docs:update_error(Rep, Reason); - false -> - ok - end. - - --spec maybe_update_doc_triggered(#rep{}, rep_id()) -> ok. -maybe_update_doc_triggered(Rep, RepId) -> - case update_docs() of - true -> - couch_replicator_docs:update_triggered(Rep, RepId); - false -> - ok - end. - - --spec error_backoff(non_neg_integer()) -> seconds(). -error_backoff(ErrCnt) -> - Exp = min(ErrCnt, ?ERROR_MAX_BACKOFF_EXPONENT), - % ErrCnt is the exponent here. The reason 64 is used is to start at - % 64 (about a minute) max range. Then first backoff would be 30 sec - % on average. Then 1 minute and so on. - couch_rand:uniform(?INITIAL_BACKOFF_EXPONENT bsl Exp). - - --spec filter_backoff() -> seconds(). -filter_backoff() -> - Total = ets:info(?MODULE, size), - % This value scaled by the number of replications. If the are a lot of them - % wait is longer, but not more than a day (?TS_DAY_SEC). If there are just - % few, wait is shorter, starting at about 30 seconds. `2 *` is used since - % the expected wait would then be 0.5 * Range so it is easier to see the - % average wait. `1 +` is used because couch_rand:uniform only - % accepts >= 1 values and crashes otherwise. - Range = 1 + min(2 * (Total / 10), ?TS_DAY_SEC), - ?MIN_FILTER_DELAY_SEC + couch_rand:uniform(round(Range)). - - -% Document removed from db -- clear ets table and remove all scheduled jobs --spec removed_doc(db_doc_id()) -> ok. -removed_doc({DbName, DocId} = Id) -> - ets:delete(?MODULE, Id), - RepIds = couch_replicator_scheduler:find_jobs_by_doc(DbName, DocId), - lists:foreach(fun couch_replicator_scheduler:remove_job/1, RepIds). - - -% Whole db shard is gone -- remove all its ets rows and stop jobs --spec removed_db(binary()) -> ok. -removed_db(DbName) -> - EtsPat = #rdoc{id = {DbName, '_'}, _ = '_'}, - ets:match_delete(?MODULE, EtsPat), - RepIds = couch_replicator_scheduler:find_jobs_by_dbname(DbName), - lists:foreach(fun couch_replicator_scheduler:remove_job/1, RepIds). - - -% Spawn a worker process which will attempt to calculate a replication id, then -% start a replication. Returns a process monitor reference. The worker is -% guaranteed to exit with rep_start_result() type only. --spec maybe_start_worker(db_doc_id()) -> ok. -maybe_start_worker(Id) -> - case ets:lookup(?MODULE, Id) of - [] -> - ok; - [#rdoc{state = scheduled, filter = Filter}] when Filter =/= user -> - ok; - [#rdoc{rep = Rep} = Doc] -> - % For any replication with a user created filter function, periodically - % (every `filter_backoff/0` seconds) to try to see if the user filter - % has changed by using a worker to check for changes. When the worker - % returns check if replication ID has changed. If it hasn't keep - % checking (spawn another worker and so on). If it has stop the job - % with the old ID and continue checking. - Wait = get_worker_wait(Doc), - Ref = make_ref(), - true = ets:insert(?MODULE, Doc#rdoc{worker = Ref}), - couch_replicator_doc_processor_worker:spawn_worker(Id, Rep, Wait, Ref), - ok - end. - - --spec get_worker_wait(#rdoc{}) -> seconds(). -get_worker_wait(#rdoc{state = scheduled, filter = user}) -> - filter_backoff(); -get_worker_wait(#rdoc{state = error, errcnt = ErrCnt}) -> - error_backoff(ErrCnt); -get_worker_wait(#rdoc{state = initializing}) -> - 0. - - --spec update_docs() -> boolean(). -update_docs() -> - config:get_boolean("replicator", "update_docs", ?DEFAULT_UPDATE_DOCS). - - -% _scheduler/docs HTTP endpoint helpers - --spec docs([atom()]) -> [{[_]}] | []. -docs(States) -> - HealthThreshold = couch_replicator_scheduler:health_threshold(), - ets:foldl(fun(RDoc, Acc) -> - case ejson_doc(RDoc, HealthThreshold) of - nil -> - Acc; % Could have been deleted if job just completed - {Props} = EJson -> - {state, DocState} = lists:keyfind(state, 1, Props), - case ejson_doc_state_filter(DocState, States) of - true -> - [EJson | Acc]; - false -> - Acc - end - end - end, [], ?MODULE). - - --spec doc(binary(), binary()) -> {ok, {[_]}} | {error, not_found}. -doc(Db, DocId) -> - HealthThreshold = couch_replicator_scheduler:health_threshold(), - Res = (catch ets:foldl(fun(RDoc, nil) -> - {Shard, RDocId} = RDoc#rdoc.id, - case {mem3:dbname(Shard), RDocId} of - {Db, DocId} -> - throw({found, ejson_doc(RDoc, HealthThreshold)}); - {_OtherDb, _OtherDocId} -> - nil - end - end, nil, ?MODULE)), - case Res of - {found, DocInfo} -> - {ok, DocInfo}; - nil -> - {error, not_found} - end. - - --spec doc_lookup(binary(), binary(), integer()) -> - {ok, {[_]}} | {error, not_found}. -doc_lookup(Db, DocId, HealthThreshold) -> - case ets:lookup(?MODULE, {Db, DocId}) of - [#rdoc{} = RDoc] -> - {ok, ejson_doc(RDoc, HealthThreshold)}; - [] -> - {error, not_found} - end. - - --spec ejson_rep_id(rep_id() | nil) -> binary() | null. -ejson_rep_id(nil) -> - null; -ejson_rep_id({BaseId, Ext}) -> - iolist_to_binary([BaseId, Ext]). - - --spec ejson_doc(#rdoc{}, non_neg_integer()) -> {[_]} | nil. -ejson_doc(#rdoc{state = scheduled} = RDoc, HealthThreshold) -> - #rdoc{id = {DbName, DocId}, rid = RepId} = RDoc, - JobProps = couch_replicator_scheduler:job_summary(RepId, HealthThreshold), - case JobProps of - nil -> - nil; - [{_, _} | _] -> - {[ - {doc_id, DocId}, - {database, DbName}, - {id, ejson_rep_id(RepId)}, - {node, node()} | JobProps - ]} - end; - -ejson_doc(#rdoc{state = RepState} = RDoc, _HealthThreshold) -> - #rdoc{ - id = {DbName, DocId}, - info = StateInfo, - rid = RepId, - errcnt = ErrorCount, - last_updated = StateTime, - rep = Rep - } = RDoc, - {[ - {doc_id, DocId}, - {database, DbName}, - {id, ejson_rep_id(RepId)}, - {state, RepState}, - {info, couch_replicator_utils:ejson_state_info(StateInfo)}, - {error_count, ErrorCount}, - {node, node()}, - {last_updated, couch_replicator_utils:iso8601(StateTime)}, - {start_time, couch_replicator_utils:iso8601(Rep#rep.start_time)} - ]}. - - --spec ejson_doc_state_filter(atom(), [atom()]) -> boolean(). -ejson_doc_state_filter(_DocState, []) -> - true; -ejson_doc_state_filter(State, States) when is_list(States), is_atom(State) -> - lists:member(State, States). - - --spec cluster_membership_foldl(#rdoc{}, nil) -> nil. -cluster_membership_foldl(#rdoc{id = {DbName, DocId} = Id, rid = RepId}, nil) -> - case couch_replicator_clustering:owner(DbName, DocId) of - unstable -> - nil; - ThisNode when ThisNode =:= node() -> - nil; - OtherNode -> - Msg = "Replication doc ~p:~p with id ~p usurped by node ~p", - couch_log:notice(Msg, [DbName, DocId, RepId, OtherNode]), - removed_doc(Id), - nil - end. - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - --define(DB, <<"db">>). --define(EXIT_DB, <<"exit_db">>). --define(DOC1, <<"doc1">>). --define(DOC2, <<"doc2">>). --define(R1, {"1", ""}). --define(R2, {"2", ""}). - - -doc_processor_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_bad_change(), - t_regular_change(), - t_change_with_doc_processor_crash(), - t_change_with_existing_job(), - t_deleted_change(), - t_triggered_change(), - t_completed_change(), - t_active_replication_completed(), - t_error_change(), - t_failed_change(), - t_change_for_different_node(), - t_change_when_cluster_unstable(), - t_ejson_docs(), - t_cluster_membership_foldl() - ] - } - }. - - -% Can't parse replication doc, so should write failure state to document. -t_bad_change() -> - ?_test(begin - ?assertEqual(acc, db_change(?DB, bad_change(), acc)), - ?assert(updated_doc_with_failed_state()) - end). - - -% Regular change, parse to a #rep{} and then add job. -t_regular_change() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Handle cases where doc processor exits or crashes while processing a change -t_change_with_doc_processor_crash() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(acc, db_change(?EXIT_DB, change(), acc)), - ?assert(failed_state_not_updated()) - end). - - -% Regular change, parse to a #rep{} and then add job but there is already -% a running job with same Id found. -t_change_with_existing_job() -> - ?_test(begin - mock_existing_jobs_lookup([test_rep(?R2)]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Change is a deletion, and job is running, so remove job. -t_deleted_change() -> - ?_test(begin - mock_existing_jobs_lookup([test_rep(?R2)]), - ?assertEqual(ok, process_change(?DB, deleted_change())), - ?assert(removed_job(?R2)) - end). - - -% Change is in `triggered` state. Remove legacy state and add job. -t_triggered_change() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change(<<"triggered">>))), - ?assert(removed_state_fields()), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Change is in `completed` state, so skip over it. -t_completed_change() -> - ?_test(begin - ?assertEqual(ok, process_change(?DB, change(<<"completed">>))), - ?assert(did_not_remove_state_fields()), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(did_not_spawn_worker()) - end). - - -% Completed change comes for what used to be an active job. In this case -% remove entry from doc_processor's ets (because there is no linkage or -% callback mechanism for scheduler to tell doc_processsor a replication just -% completed). -t_active_replication_completed() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assertEqual(ok, process_change(?DB, change(<<"completed">>))), - ?assert(did_not_remove_state_fields()), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})) - end). - - -% Change is in `error` state. Remove legacy state and retry -% running the job. This state was used for transient erorrs which are not -% written to the document anymore. -t_error_change() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change(<<"error">>))), - ?assert(removed_state_fields()), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Change is in `failed` state. This is a terminal state and it will not -% be tried again, so skip over it. -t_failed_change() -> - ?_test(begin - ?assertEqual(ok, process_change(?DB, change(<<"failed">>))), - ?assert(did_not_remove_state_fields()), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(did_not_spawn_worker()) - end). - - -% Normal change, but according to cluster ownership algorithm, replication -% belongs to a different node, so this node should skip it. -t_change_for_different_node() -> - ?_test(begin - meck:expect(couch_replicator_clustering, owner, 2, different_node), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(did_not_spawn_worker()) - end). - - -% Change handled when cluster is unstable (nodes are added or removed), so -% job is not added. A rescan will be triggered soon and change will be -% evaluated again. -t_change_when_cluster_unstable() -> - ?_test(begin - meck:expect(couch_replicator_clustering, owner, 2, unstable), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(did_not_spawn_worker()) - end). - - -% Check if docs/0 function produces expected ejson after adding a job -t_ejson_docs() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - EJsonDocs = docs([]), - ?assertMatch([{[_|_]}], EJsonDocs), - [{DocProps}] = EJsonDocs, - {value, StateTime, DocProps1} = lists:keytake(last_updated, 1, - DocProps), - ?assertMatch({last_updated, BinVal1} when is_binary(BinVal1), - StateTime), - {value, StartTime, DocProps2} = lists:keytake(start_time, 1, DocProps1), - ?assertMatch({start_time, BinVal2} when is_binary(BinVal2), StartTime), - ExpectedProps = [ - {database, ?DB}, - {doc_id, ?DOC1}, - {error_count, 0}, - {id, null}, - {info, null}, - {node, node()}, - {state, initializing} - ], - ?assertEqual(ExpectedProps, lists:usort(DocProps2)) - end). - - -% Check that when cluster membership changes records from doc processor and job -% scheduler get removed -t_cluster_membership_foldl() -> - ?_test(begin - mock_existing_jobs_lookup([test_rep(?R1)]), - ?assertEqual(ok, process_change(?DB, change())), - meck:expect(couch_replicator_clustering, owner, 2, different_node), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - gen_server:cast(?MODULE, {cluster, stable}), - meck:wait(2, couch_replicator_scheduler, find_jobs_by_doc, 2, 5000), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(removed_job(?R1)) - end). - - -get_worker_ref_test_() -> - { - setup, - fun() -> - ets:new(?MODULE, [named_table, public, {keypos, #rdoc.id}]) - end, - fun(_) -> ets:delete(?MODULE) end, - ?_test(begin - Id = {<<"db">>, <<"doc">>}, - ?assertEqual(nil, get_worker_ref(Id)), - ets:insert(?MODULE, #rdoc{id = Id, worker = nil}), - ?assertEqual(nil, get_worker_ref(Id)), - Ref = make_ref(), - ets:insert(?MODULE, #rdoc{id = Id, worker = Ref}), - ?assertEqual(Ref, get_worker_ref(Id)) - end) - }. - - -% Test helper functions - - -setup_all() -> - meck:expect(couch_log, info, 2, ok), - meck:expect(couch_log, notice, 2, ok), - meck:expect(couch_log, warning, 2, ok), - meck:expect(couch_log, error, 2, ok), - meck:expect(config, get, fun(_, _, Default) -> Default end), - meck:expect(config, listen_for_changes, 2, ok), - meck:expect(couch_replicator_clustering, owner, 2, node()), - meck:expect(couch_replicator_clustering, link_cluster_event_listener, 3, - ok), - meck:expect(couch_replicator_doc_processor_worker, spawn_worker, fun - ({?EXIT_DB, _}, _, _, _) -> exit(kapow); - (_, _, _, _) -> pid - end), - meck:expect(couch_replicator_scheduler, remove_job, 1, ok), - meck:expect(couch_replicator_docs, remove_state_fields, 2, ok), - meck:expect(couch_replicator_docs, update_failed, 3, ok). - - -teardown_all(_) -> - meck:unload(). - - -setup() -> - meck:reset([ - config, - couch_log, - couch_replicator_clustering, - couch_replicator_doc_processor_worker, - couch_replicator_docs, - couch_replicator_scheduler - ]), - % Set this expectation back to the default for - % each test since some tests change it - meck:expect(couch_replicator_clustering, owner, 2, node()), - {ok, Pid} = start_link(), - unlink(Pid), - Pid. - - -teardown(Pid) -> - exit(Pid, kill). - - -removed_state_fields() -> - meck:called(couch_replicator_docs, remove_state_fields, [?DB, ?DOC1]). - - -started_worker(_Id) -> - 1 == meck:num_calls(couch_replicator_doc_processor_worker, spawn_worker, 4). - - -removed_job(Id) -> - meck:called(couch_replicator_scheduler, remove_job, [test_rep(Id)]). - - -did_not_remove_state_fields() -> - 0 == meck:num_calls(couch_replicator_docs, remove_state_fields, '_'). - - -did_not_spawn_worker() -> - 0 == meck:num_calls(couch_replicator_doc_processor_worker, spawn_worker, - '_'). - -updated_doc_with_failed_state() -> - 1 == meck:num_calls(couch_replicator_docs, update_failed, '_'). - -failed_state_not_updated() -> - 0 == meck:num_calls(couch_replicator_docs, update_failed, '_'). - -mock_existing_jobs_lookup(ExistingJobs) -> - meck:expect(couch_replicator_scheduler, find_jobs_by_doc, fun - (?EXIT_DB, ?DOC1) -> []; - (?DB, ?DOC1) -> ExistingJobs - end). - - -test_rep(Id) -> - #rep{id = Id, start_time = {0, 0, 0}}. - - -change() -> - {[ - {<<"id">>, ?DOC1}, - {doc, {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>} - ]}} - ]}. - - -change(State) -> - {[ - {<<"id">>, ?DOC1}, - {doc, {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>}, - {<<"_replication_state">>, State} - ]}} - ]}. - - -deleted_change() -> - {[ - {<<"id">>, ?DOC1}, - {<<"deleted">>, true}, - {doc, {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>} - ]}} - ]}. - - -bad_change() -> - {[ - {<<"id">>, ?DOC2}, - {doc, {[ - {<<"_id">>, ?DOC2}, - {<<"source">>, <<"src">>} - ]}} - ]}. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_doc_processor_worker.erl b/src/couch_replicator/src/couch_replicator_doc_processor_worker.erl deleted file mode 100644 index a4c829323..000000000 --- a/src/couch_replicator/src/couch_replicator_doc_processor_worker.erl +++ /dev/null @@ -1,284 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_doc_processor_worker). - --export([ - spawn_worker/4 -]). - --include("couch_replicator.hrl"). - --import(couch_replicator_utils, [ - pp_rep_id/1 -]). - -% 61 seconds here because request usually have 10, 15, 30 second -% timeouts set. We'd want the worker to get a chance to make a few -% requests (maybe one failing one and a retry) and then fail with its -% own error (timeout, network error), which would be more specific and -% informative, before it simply gets killed because of the timeout -% here. That is, if all fails and the worker is actually blocked then -% 61 sec is a safety net to brutally kill the worker so doesn't end up -% hung forever. --define(WORKER_TIMEOUT_MSEC, 61000). - - -% Spawn a worker which attempts to calculate replication id then add a -% replication job to scheduler. This function create a monitor to the worker -% a worker will then exit with the #doc_worker_result{} record within -% ?WORKER_TIMEOUT_MSEC timeout period.A timeout is considered a -%`temporary_error`. Result will be sent as the `Reason` in the {'DOWN',...} -% message. --spec spawn_worker(db_doc_id(), #rep{}, seconds(), reference()) -> pid(). -spawn_worker(Id, Rep, WaitSec, WRef) -> - {Pid, _Ref} = spawn_monitor(fun() -> - worker_fun(Id, Rep, WaitSec, WRef) - end), - Pid. - - -% Private functions - --spec worker_fun(db_doc_id(), #rep{}, seconds(), reference()) -> no_return(). -worker_fun(Id, Rep, WaitSec, WRef) -> - timer:sleep(WaitSec * 1000), - Fun = fun() -> - try maybe_start_replication(Id, Rep, WRef) of - Res -> - exit(Res) - catch - throw:{filter_fetch_error, Reason} -> - exit({temporary_error, Reason}); - _Tag:Reason -> - exit({temporary_error, Reason}) - end - end, - {Pid, Ref} = spawn_monitor(Fun), - receive - {'DOWN', Ref, _, Pid, Result} -> - exit(#doc_worker_result{id = Id, wref = WRef, result = Result}) - after ?WORKER_TIMEOUT_MSEC -> - erlang:demonitor(Ref, [flush]), - exit(Pid, kill), - {DbName, DocId} = Id, - TimeoutSec = round(?WORKER_TIMEOUT_MSEC / 1000), - Msg = io_lib:format("Replication for db ~p doc ~p failed to start due " - "to timeout after ~B seconds", [DbName, DocId, TimeoutSec]), - Result = {temporary_error, couch_util:to_binary(Msg)}, - exit(#doc_worker_result{id = Id, wref = WRef, result = Result}) - end. - - -% Try to start a replication. Used by a worker. This function should return -% rep_start_result(), also throws {filter_fetch_error, Reason} if cannot fetch -% filter.It can also block for an indeterminate amount of time while fetching -% filter. -maybe_start_replication(Id, RepWithoutId, WRef) -> - Rep = couch_replicator_docs:update_rep_id(RepWithoutId), - case maybe_add_job_to_scheduler(Id, Rep, WRef) of - ignore -> - ignore; - {ok, RepId} -> - {ok, RepId}; - {temporary_error, Reason} -> - {temporary_error, Reason}; - {permanent_failure, Reason} -> - {DbName, DocId} = Id, - couch_replicator_docs:update_failed(DbName, DocId, Reason), - {permanent_failure, Reason} - end. - - --spec maybe_add_job_to_scheduler(db_doc_id(), #rep{}, reference()) -> - rep_start_result(). -maybe_add_job_to_scheduler({DbName, DocId}, Rep, WRef) -> - RepId = Rep#rep.id, - case couch_replicator_scheduler:rep_state(RepId) of - nil -> - % Before adding a job check that this worker is still the current - % worker. This is to handle a race condition where a worker which was - % sleeping and then checking a replication filter may inadvertently - % re-add a replication which was already deleted. - case couch_replicator_doc_processor:get_worker_ref({DbName, DocId}) of - WRef -> - ok = couch_replicator_scheduler:add_job(Rep), - {ok, RepId}; - _NilOrOtherWRef -> - ignore - end; - #rep{doc_id = DocId} -> - {ok, RepId}; - #rep{doc_id = null} -> - Msg = io_lib:format("Replication `~s` specified by document `~s`" - " already running as a transient replication, started via" - " `_replicate` API endpoint", [pp_rep_id(RepId), DocId]), - {temporary_error, couch_util:to_binary(Msg)}; - #rep{db_name = OtherDb, doc_id = OtherDocId} -> - Msg = io_lib:format("Replication `~s` specified by document `~s`" - " already started, triggered by document `~s` from db `~s`", - [pp_rep_id(RepId), DocId, OtherDocId, mem3:dbname(OtherDb)]), - {permanent_failure, couch_util:to_binary(Msg)} - end. - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - --define(DB, <<"db">>). --define(DOC1, <<"doc1">>). --define(R1, {"ad08e05057046eabe898a2572bbfb573", ""}). - - -doc_processor_worker_test_() -> - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_should_add_job(), - t_already_running_same_docid(), - t_already_running_transient(), - t_already_running_other_db_other_doc(), - t_spawn_worker(), - t_ignore_if_doc_deleted(), - t_ignore_if_worker_ref_does_not_match() - ] - }. - - -% Replication is already running, with same doc id. Ignore change. -t_should_add_job() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertEqual({ok, ?R1}, maybe_start_replication(Id, Rep, nil)), - ?assert(added_job()) - end). - - -% Replication is already running, with same doc id. Ignore change. -t_already_running_same_docid() -> - ?_test(begin - Id = {?DB, ?DOC1}, - mock_already_running(?DB, ?DOC1), - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertEqual({ok, ?R1}, maybe_start_replication(Id, Rep, nil)), - ?assert(did_not_add_job()) - end). - - -% There is a transient replication with same replication id running. Ignore. -t_already_running_transient() -> - ?_test(begin - Id = {?DB, ?DOC1}, - mock_already_running(null, null), - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertMatch({temporary_error, _}, maybe_start_replication(Id, Rep, - nil)), - ?assert(did_not_add_job()) - end). - - -% There is a duplicate replication potentially from a different db and doc. -% Write permanent failure to doc. -t_already_running_other_db_other_doc() -> - ?_test(begin - Id = {?DB, ?DOC1}, - mock_already_running(<<"otherdb">>, <<"otherdoc">>), - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertMatch({permanent_failure, _}, maybe_start_replication(Id, Rep, - nil)), - ?assert(did_not_add_job()), - 1 == meck:num_calls(couch_replicator_docs, update_failed, '_') - end). - - -% Should spawn worker -t_spawn_worker() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - WRef = make_ref(), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, WRef), - Pid = spawn_worker(Id, Rep, 0, WRef), - Res = receive {'DOWN', _Ref, process, Pid, Reason} -> Reason - after 1000 -> timeout end, - Expect = #doc_worker_result{id = Id, wref = WRef, result = {ok, ?R1}}, - ?assertEqual(Expect, Res), - ?assert(added_job()) - end). - - -% Should not add job if by the time worker got to fetching the filter -% and getting a replication id, replication doc was deleted -t_ignore_if_doc_deleted() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, nil), - ?assertEqual(ignore, maybe_start_replication(Id, Rep, make_ref())), - ?assertNot(added_job()) - end). - - -% Should not add job if by the time worker got to fetchign the filter -% and building a replication id, another worker was spawned. -t_ignore_if_worker_ref_does_not_match() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, - make_ref()), - ?assertEqual(ignore, maybe_start_replication(Id, Rep, make_ref())), - ?assertNot(added_job()) - end). - - -% Test helper functions - -setup() -> - meck:expect(couch_replicator_scheduler, add_job, 1, ok), - meck:expect(config, get, fun(_, _, Default) -> Default end), - meck:expect(couch_server, get_uuid, 0, this_is_snek), - meck:expect(couch_replicator_docs, update_failed, 3, ok), - meck:expect(couch_replicator_scheduler, rep_state, 1, nil), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, nil), - ok. - - -teardown(_) -> - meck:unload(). - - -mock_already_running(DbName, DocId) -> - meck:expect(couch_replicator_scheduler, rep_state, - fun(RepId) -> #rep{id = RepId, doc_id = DocId, db_name = DbName} end). - - -added_job() -> - 1 == meck:num_calls(couch_replicator_scheduler, add_job, '_'). - - -did_not_add_job() -> - 0 == meck:num_calls(couch_replicator_scheduler, add_job, '_'). - - -change() -> - {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>} - ]}. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_fabric.erl b/src/couch_replicator/src/couch_replicator_fabric.erl deleted file mode 100644 index 1650105b5..000000000 --- a/src/couch_replicator/src/couch_replicator_fabric.erl +++ /dev/null @@ -1,155 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_fabric). - --export([ - docs/5 -]). - --include_lib("fabric/include/fabric.hrl"). --include_lib("mem3/include/mem3.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - -docs(DbName, Options, QueryArgs, Callback, Acc) -> - Shards = mem3:shards(DbName), - Workers0 = fabric_util:submit_jobs( - Shards, couch_replicator_fabric_rpc, docs, [Options, QueryArgs]), - RexiMon = fabric_util:create_monitors(Workers0), - try - case fabric_streams:start(Workers0, #shard.ref) of - {ok, Workers} -> - try - docs_int(DbName, Workers, QueryArgs, Callback, Acc) - after - fabric_streams:cleanup(Workers) - end; - {timeout, NewState} -> - DefunctWorkers = fabric_util:remove_done_workers( - NewState#stream_acc.workers, waiting - ), - fabric_util:log_timeout( - DefunctWorkers, - "replicator docs" - ), - Callback({error, timeout}, Acc); - {error, Error} -> - Callback({error, Error}, Acc) - end - after - rexi_monitor:stop(RexiMon) - end. - - -docs_int(DbName, Workers, QueryArgs, Callback, Acc0) -> - #mrargs{limit = Limit, skip = Skip} = QueryArgs, - State = #collector{ - db_name = DbName, - query_args = QueryArgs, - callback = Callback, - counters = fabric_dict:init(Workers, 0), - skip = Skip, - limit = Limit, - user_acc = Acc0, - update_seq = nil - }, - case rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, - State, infinity, 5000) of - {ok, NewState} -> - {ok, NewState#collector.user_acc}; - {timeout, NewState} -> - Callback({error, timeout}, NewState#collector.user_acc); - {error, Resp} -> - {ok, Resp} - end. - -handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _, State) -> - fabric_view:check_down_shards(State, NodeRef); - -handle_message({rexi_EXIT, Reason}, Worker, State) -> - fabric_view:handle_worker_exit(State, Worker, Reason); - -handle_message({meta, Meta0}, {Worker, From}, State) -> - Tot = couch_util:get_value(total, Meta0, 0), - Off = couch_util:get_value(offset, Meta0, 0), - #collector{ - callback = Callback, - counters = Counters0, - total_rows = Total0, - offset = Offset0, - user_acc = AccIn - } = State, - % Assert that we don't have other messages from this - % worker when the total_and_offset message arrives. - 0 = fabric_dict:lookup_element(Worker, Counters0), - rexi:stream_ack(From), - Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), - Total = Total0 + Tot, - Offset = Offset0 + Off, - case fabric_dict:any(0, Counters1) of - true -> - {ok, State#collector{ - counters = Counters1, - total_rows = Total, - offset = Offset - }}; - false -> - FinalOffset = erlang:min(Total, Offset+State#collector.skip), - Meta = [{total, Total}, {offset, FinalOffset}], - {Go, Acc} = Callback({meta, Meta}, AccIn), - {Go, State#collector{ - counters = fabric_dict:decrement_all(Counters1), - total_rows = Total, - offset = FinalOffset, - user_acc = Acc - }} - end; - -handle_message(#view_row{id = Id, doc = Doc} = Row0, {Worker, From}, State) -> - #collector{query_args = Args, counters = Counters0, rows = Rows0} = State, - case maybe_fetch_and_filter_doc(Id, Doc, State) of - {[_ | _]} = NewDoc -> - Row = Row0#view_row{doc = NewDoc}, - Dir = Args#mrargs.direction, - Rows = merge_row(Dir, Row#view_row{worker={Worker, From}}, Rows0), - Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), - State1 = State#collector{rows=Rows, counters=Counters1}, - fabric_view:maybe_send_row(State1); - skip -> - rexi:stream_ack(From), - {ok, State} - end; - -handle_message(complete, Worker, State) -> - Counters = fabric_dict:update_counter(Worker, 1, State#collector.counters), - fabric_view:maybe_send_row(State#collector{counters = Counters}). - - -merge_row(fwd, Row, Rows) -> - lists:keymerge(#view_row.id, [Row], Rows); -merge_row(rev, Row, Rows) -> - lists:rkeymerge(#view_row.id, [Row], Rows). - - -maybe_fetch_and_filter_doc(Id, undecided, State) -> - #collector{db_name = DbName, query_args = #mrargs{extra = Extra}} = State, - FilterStates = proplists:get_value(filter_states, Extra), - case couch_replicator:active_doc(DbName, Id) of - {ok, {Props} = DocInfo} -> - DocState = couch_util:get_value(state, Props), - couch_replicator_utils:filter_state(DocState, FilterStates, DocInfo); - {error, not_found} -> - skip % could have been deleted - end; -maybe_fetch_and_filter_doc(_Id, Doc, _State) -> - Doc. diff --git a/src/couch_replicator/src/couch_replicator_fabric_rpc.erl b/src/couch_replicator/src/couch_replicator_fabric_rpc.erl deleted file mode 100644 index d67f87548..000000000 --- a/src/couch_replicator/src/couch_replicator_fabric_rpc.erl +++ /dev/null @@ -1,97 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_fabric_rpc). - --export([ - docs/3 -]). - --include_lib("fabric/include/fabric.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - - -docs(DbName, Options, Args0) -> - set_io_priority(DbName, Options), - #mrargs{skip = Skip, limit = Limit, extra = Extra} = Args0, - FilterStates = proplists:get_value(filter_states, Extra), - Args = Args0#mrargs{skip = 0, limit = Skip + Limit}, - HealthThreshold = couch_replicator_scheduler:health_threshold(), - {ok, Db} = couch_db:open_int(DbName, Options), - Acc = {DbName, FilterStates, HealthThreshold}, - couch_mrview:query_all_docs(Db, Args, fun docs_cb/2, Acc). - - -docs_cb({meta, Meta}, Acc) -> - ok = rexi:stream2({meta, Meta}), - {ok, Acc}; -docs_cb({row, Row}, {DbName, States, HealthThreshold} = Acc) -> - Id = couch_util:get_value(id, Row), - Doc = couch_util:get_value(doc, Row), - ViewRow = #view_row{ - id = Id, - key = couch_util:get_value(key, Row), - value = couch_util:get_value(value, Row) - }, - case rep_doc_state(DbName, Id, Doc, States, HealthThreshold) of - skip -> - ok; - Other -> - ok = rexi:stream2(ViewRow#view_row{doc = Other}) - end, - {ok, Acc}; -docs_cb(complete, Acc) -> - ok = rexi:stream_last(complete), - {ok, Acc}. - - -set_io_priority(DbName, Options) -> - case lists:keyfind(io_priority, 1, Options) of - {io_priority, Pri} -> - erlang:put(io_priority, Pri); - false -> - erlang:put(io_priority, {interactive, DbName}) - end. - - -%% Get the state of the replication document. If it is found and has a terminal -%% state then it can be filtered and either included in the results or skipped. -%% If it is not in a terminal state, look it up in the local doc processor ETS -%% table. If it is there then filter by state. If it is not found there either -%% then mark it as `undecided` and let the coordinator try to fetch it. The -%% The idea is to do as much work as possible locally and leave the minimum -%% amount of work for the coordinator. -rep_doc_state(_Shard, <<"_design/", _/binary>>, _, _, _) -> - skip; -rep_doc_state(Shard, Id, {[_ | _]} = Doc, States, HealthThreshold) -> - DbName = mem3:dbname(Shard), - DocInfo = couch_replicator:info_from_doc(DbName, Doc), - case get_doc_state(DocInfo) of - null -> - % Fetch from local doc processor. If there, filter by state. - % If not there, mark as undecided. Let coordinator figure it out. - case couch_replicator_doc_processor:doc_lookup(Shard, Id, - HealthThreshold) of - {ok, EtsInfo} -> - State = get_doc_state(EtsInfo), - couch_replicator_utils:filter_state(State, States, EtsInfo); - {error, not_found} -> - undecided - end; - OtherState when is_atom(OtherState) -> - couch_replicator_utils:filter_state(OtherState, States, DocInfo) - end. - - -get_doc_state({Props})-> - couch_util:get_value(state, Props). diff --git a/src/couch_replicator/src/couch_replicator_httpd_util.erl b/src/couch_replicator/src/couch_replicator_httpd_util.erl deleted file mode 100644 index 624eddd2f..000000000 --- a/src/couch_replicator/src/couch_replicator_httpd_util.erl +++ /dev/null @@ -1,201 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_httpd_util). - --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - --export([ - validate_rep_props/1, - parse_int_param/5, - parse_replication_state_filter/1, - update_db_name/1, - docs_acc_new/3, - docs_acc_response/1, - docs_cb/2 -]). - --import(couch_httpd, [ - send_json/2, - send_json/3, - send_method_not_allowed/2 -]). - --import(couch_util, [ - to_binary/1 -]). - - -parse_replication_state_filter(undefined) -> - []; % This is the default (wildcard) filter -parse_replication_state_filter(States) when is_list(States) -> - AllStates = couch_replicator:replication_states(), - StrStates = [string:to_lower(S) || S <- string:tokens(States, ",")], - AtomStates = try - [list_to_existing_atom(S) || S <- StrStates] - catch error:badarg -> - Msg1 = io_lib:format("States must be one or more of ~w", [AllStates]), - throw({query_parse_error, ?l2b(Msg1)}) - end, - AllSet = sets:from_list(AllStates), - StatesSet = sets:from_list(AtomStates), - Diff = sets:to_list(sets:subtract(StatesSet, AllSet)), - case Diff of - [] -> - AtomStates; - _ -> - Args = [Diff, AllStates], - Msg2 = io_lib:format("Unknown states ~w. Choose from: ~w", Args), - throw({query_parse_error, ?l2b(Msg2)}) - end. - - -parse_int_param(Req, Param, Default, Min, Max) -> - IntVal = try - list_to_integer(chttpd:qs_value(Req, Param, integer_to_list(Default))) - catch error:badarg -> - Msg1 = io_lib:format("~s must be an integer", [Param]), - throw({query_parse_error, ?l2b(Msg1)}) - end, - case IntVal >= Min andalso IntVal =< Max of - true -> - IntVal; - false -> - Msg2 = io_lib:format("~s not in range of [~w,~w]", [Param, Min, Max]), - throw({query_parse_error, ?l2b(Msg2)}) - end. - - -validate_rep_props([]) -> - ok; -validate_rep_props([{<<"query_params">>, {Params}}|Rest]) -> - lists:foreach(fun - ({_,V}) when is_binary(V) -> ok; - ({K,_}) -> throw({bad_request, - <>}) - end, Params), - validate_rep_props(Rest); -validate_rep_props([_|Rest]) -> - validate_rep_props(Rest). - - -prepend_val(#vacc{prepend=Prepend}) -> - case Prepend of - undefined -> - ""; - _ -> - Prepend - end. - - -maybe_flush_response(#vacc{bufsize=Size, threshold=Max} = Acc, Data, Len) - when Size > 0 andalso (Size + Len) > Max -> - #vacc{buffer = Buffer, resp = Resp} = Acc, - {ok, R1} = chttpd:send_delayed_chunk(Resp, Buffer), - {ok, Acc#vacc{prepend = ",\r\n", buffer = Data, bufsize = Len, resp = R1}}; -maybe_flush_response(Acc0, Data, Len) -> - #vacc{buffer = Buf, bufsize = Size} = Acc0, - Acc = Acc0#vacc{ - prepend = ",\r\n", - buffer = [Buf | Data], - bufsize = Size + Len - }, - {ok, Acc}. - -docs_acc_new(Req, Db, Threshold) -> - #vacc{db=Db, req=Req, threshold=Threshold}. - -docs_acc_response(#vacc{resp = Resp}) -> - Resp. - -docs_cb({error, Reason}, #vacc{resp=undefined}=Acc) -> - {ok, Resp} = chttpd:send_error(Acc#vacc.req, Reason), - {ok, Acc#vacc{resp=Resp}}; - -docs_cb(complete, #vacc{resp=undefined}=Acc) -> - % Nothing in view - {ok, Resp} = chttpd:send_json(Acc#vacc.req, 200, {[{rows, []}]}), - {ok, Acc#vacc{resp=Resp}}; - -docs_cb(Msg, #vacc{resp=undefined}=Acc) -> - %% Start response - Headers = [], - {ok, Resp} = chttpd:start_delayed_json_response(Acc#vacc.req, 200, Headers), - docs_cb(Msg, Acc#vacc{resp=Resp, should_close=true}); - -docs_cb({error, Reason}, #vacc{resp=Resp}=Acc) -> - {ok, Resp1} = chttpd:send_delayed_error(Resp, Reason), - {ok, Acc#vacc{resp=Resp1}}; - -docs_cb(complete, #vacc{resp=Resp, buffer=Buf, threshold=Max}=Acc) -> - % Finish view output and possibly end the response - {ok, Resp1} = chttpd:close_delayed_json_object(Resp, Buf, "\r\n]}", Max), - case Acc#vacc.should_close of - true -> - {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), - {ok, Acc#vacc{resp=Resp2}}; - _ -> - {ok, Acc#vacc{resp=Resp1, meta_sent=false, row_sent=false, - prepend=",\r\n", buffer=[], bufsize=0}} - end; - -docs_cb({meta, Meta}, #vacc{meta_sent=false, row_sent=false}=Acc) -> - % Sending metadata as we've not sent it or any row yet - Parts = case couch_util:get_value(total, Meta) of - undefined -> []; - Total -> [io_lib:format("\"total_rows\":~p", [adjust_total(Total)])] - end ++ case couch_util:get_value(offset, Meta) of - undefined -> []; - Offset -> [io_lib:format("\"offset\":~p", [Offset])] - end ++ ["\"docs\":["], - Chunk = [prepend_val(Acc), "{", string:join(Parts, ","), "\r\n"], - {ok, AccOut} = maybe_flush_response(Acc, Chunk, iolist_size(Chunk)), - {ok, AccOut#vacc{prepend="", meta_sent=true}}; - - -docs_cb({meta, _Meta}, #vacc{}=Acc) -> - %% ignore metadata - {ok, Acc}; - -docs_cb({row, Row}, #vacc{meta_sent=false}=Acc) -> - %% sorted=false and row arrived before meta - % Adding another row - Chunk = [prepend_val(Acc), "{\"docs\":[\r\n", row_to_json(Row)], - maybe_flush_response(Acc#vacc{meta_sent=true, row_sent=true}, Chunk, iolist_size(Chunk)); - -docs_cb({row, Row}, #vacc{meta_sent=true}=Acc) -> - % Adding another row - Chunk = [prepend_val(Acc), row_to_json(Row)], - maybe_flush_response(Acc#vacc{row_sent=true}, Chunk, iolist_size(Chunk)). - - -update_db_name({Props}) -> - {value, {database, DbName}, Props1} = lists:keytake(database, 1, Props), - {[{database, normalize_db_name(DbName)} | Props1]}. - -normalize_db_name(<<"shards/", _/binary>> = DbName) -> - mem3:dbname(DbName); -normalize_db_name(DbName) -> - DbName. - -row_to_json(Row) -> - Doc0 = couch_util:get_value(doc, Row), - Doc1 = update_db_name(Doc0), - ?JSON_ENCODE(Doc1). - - -%% Adjust Total as there is an automatically created validation design doc -adjust_total(Total) when is_integer(Total), Total > 0 -> - Total - 1; -adjust_total(Total) when is_integer(Total) -> - 0. diff --git a/src/couch_replicator/src/couch_replicator_job_sup.erl b/src/couch_replicator/src/couch_replicator_job_sup.erl deleted file mode 100644 index 9ea65e85f..000000000 --- a/src/couch_replicator/src/couch_replicator_job_sup.erl +++ /dev/null @@ -1,34 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_job_sup). - --behaviour(supervisor). - --export([ - init/1, - start_link/0 -]). - -start_link() -> - supervisor:start_link({local,?MODULE}, ?MODULE, []). - -%%============================================================================= -%% supervisor callbacks -%%============================================================================= - -init([]) -> - {ok, {{one_for_one, 3, 10}, []}}. - -%%============================================================================= -%% internal functions -%%============================================================================= diff --git a/src/couch_replicator/src/couch_replicator_js_functions.hrl b/src/couch_replicator/src/couch_replicator_js_functions.hrl deleted file mode 100644 index d41043309..000000000 --- a/src/couch_replicator/src/couch_replicator_js_functions.hrl +++ /dev/null @@ -1,177 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --define(REP_DB_DOC_VALIDATE_FUN, <<" - function(newDoc, oldDoc, userCtx) { - function reportError(error_msg) { - log('Error writing document `' + newDoc._id + - '\\' to the replicator database: ' + error_msg); - throw({forbidden: error_msg}); - } - - function validateEndpoint(endpoint, fieldName) { - if ((typeof endpoint !== 'string') && - ((typeof endpoint !== 'object') || (endpoint === null))) { - - reportError('The `' + fieldName + '\\' property must exist' + - ' and be either a string or an object.'); - } - - if (typeof endpoint === 'object') { - if ((typeof endpoint.url !== 'string') || !endpoint.url) { - reportError('The url property must exist in the `' + - fieldName + '\\' field and must be a non-empty string.'); - } - - if ((typeof endpoint.auth !== 'undefined') && - ((typeof endpoint.auth !== 'object') || - endpoint.auth === null)) { - - reportError('`' + fieldName + - '.auth\\' must be a non-null object.'); - } - - if ((typeof endpoint.headers !== 'undefined') && - ((typeof endpoint.headers !== 'object') || - endpoint.headers === null)) { - - reportError('`' + fieldName + - '.headers\\' must be a non-null object.'); - } - } - } - - var isReplicator = (userCtx.roles.indexOf('_replicator') >= 0); - var isAdmin = (userCtx.roles.indexOf('_admin') >= 0); - - if (isReplicator) { - // Always let replicator update the replication document - return; - } - - if (newDoc._replication_state === 'failed') { - // Skip validation in case when we update the document with the - // failed state. In this case it might be malformed. However, - // replicator will not pay attention to failed documents so this - // is safe. - return; - } - - if (!newDoc._deleted) { - validateEndpoint(newDoc.source, 'source'); - validateEndpoint(newDoc.target, 'target'); - - if ((typeof newDoc.create_target !== 'undefined') && - (typeof newDoc.create_target !== 'boolean')) { - - reportError('The `create_target\\' field must be a boolean.'); - } - - if ((typeof newDoc.continuous !== 'undefined') && - (typeof newDoc.continuous !== 'boolean')) { - - reportError('The `continuous\\' field must be a boolean.'); - } - - if ((typeof newDoc.doc_ids !== 'undefined') && - !isArray(newDoc.doc_ids)) { - - reportError('The `doc_ids\\' field must be an array of strings.'); - } - - if ((typeof newDoc.selector !== 'undefined') && - (typeof newDoc.selector !== 'object')) { - - reportError('The `selector\\' field must be an object.'); - } - - if ((typeof newDoc.filter !== 'undefined') && - ((typeof newDoc.filter !== 'string') || !newDoc.filter)) { - - reportError('The `filter\\' field must be a non-empty string.'); - } - - if ((typeof newDoc.doc_ids !== 'undefined') && - (typeof newDoc.selector !== 'undefined')) { - - reportError('`doc_ids\\' field is incompatible with `selector\\'.'); - } - - if ( ((typeof newDoc.doc_ids !== 'undefined') || - (typeof newDoc.selector !== 'undefined')) && - (typeof newDoc.filter !== 'undefined') ) { - - reportError('`filter\\' field is incompatible with `selector\\' and `doc_ids\\'.'); - } - - if ((typeof newDoc.query_params !== 'undefined') && - ((typeof newDoc.query_params !== 'object') || - newDoc.query_params === null)) { - - reportError('The `query_params\\' field must be an object.'); - } - - if (newDoc.user_ctx) { - var user_ctx = newDoc.user_ctx; - - if ((typeof user_ctx !== 'object') || (user_ctx === null)) { - reportError('The `user_ctx\\' property must be a ' + - 'non-null object.'); - } - - if (!(user_ctx.name === null || - (typeof user_ctx.name === 'undefined') || - ((typeof user_ctx.name === 'string') && - user_ctx.name.length > 0))) { - - reportError('The `user_ctx.name\\' property must be a ' + - 'non-empty string or null.'); - } - - if (!isAdmin && (user_ctx.name !== userCtx.name)) { - reportError('The given `user_ctx.name\\' is not valid'); - } - - if (user_ctx.roles && !isArray(user_ctx.roles)) { - reportError('The `user_ctx.roles\\' property must be ' + - 'an array of strings.'); - } - - if (!isAdmin && user_ctx.roles) { - for (var i = 0; i < user_ctx.roles.length; i++) { - var role = user_ctx.roles[i]; - - if (typeof role !== 'string' || role.length === 0) { - reportError('Roles must be non-empty strings.'); - } - if (userCtx.roles.indexOf(role) === -1) { - reportError('Invalid role (`' + role + - '\\') in the `user_ctx\\''); - } - } - } - } else { - if (!isAdmin) { - reportError('The `user_ctx\\' property is missing (it is ' + - 'optional for admins only).'); - } - } - } else { - if (!isAdmin) { - if (!oldDoc.user_ctx || (oldDoc.user_ctx.name !== userCtx.name)) { - reportError('Replication documents can only be deleted by ' + - 'admins or by the users who created them.'); - } - } - } - } -">>). diff --git a/src/couch_replicator/src/couch_replicator_notifier.erl b/src/couch_replicator/src/couch_replicator_notifier.erl deleted file mode 100644 index f7640a349..000000000 --- a/src/couch_replicator/src/couch_replicator_notifier.erl +++ /dev/null @@ -1,58 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_notifier). - --behaviour(gen_event). --vsn(1). - -% public API --export([start_link/1, stop/1, notify/1]). - -% gen_event callbacks --export([init/1, terminate/2, code_change/3]). --export([handle_event/2, handle_call/2, handle_info/2]). - --include_lib("couch/include/couch_db.hrl"). - -start_link(FunAcc) -> - couch_event_sup:start_link(couch_replication, - {couch_replicator_notifier, make_ref()}, FunAcc). - -notify(Event) -> - gen_event:notify(couch_replication, Event). - -stop(Pid) -> - couch_event_sup:stop(Pid). - - -init(FunAcc) -> - {ok, FunAcc}. - -terminate(_Reason, _State) -> - ok. - -handle_event(Event, Fun) when is_function(Fun, 1) -> - Fun(Event), - {ok, Fun}; -handle_event(Event, {Fun, Acc}) when is_function(Fun, 2) -> - Acc2 = Fun(Event, Acc), - {ok, {Fun, Acc2}}. - -handle_call(_Msg, State) -> - {ok, ok, State}. - -handle_info(_Msg, State) -> - {ok, State}. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl deleted file mode 100644 index 00a352bee..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ /dev/null @@ -1,1688 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_scheduler). - --behaviour(gen_server). --behaviour(config_listener). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3, - format_status/2 -]). - --export([ - add_job/1, - remove_job/1, - reschedule/0, - rep_state/1, - find_jobs_by_dbname/1, - find_jobs_by_doc/2, - job_summary/2, - health_threshold/0, - jobs/0, - job/1, - restart_job/1, - update_job_stats/2 -]). - -%% config_listener callbacks --export([ - handle_config_change/5, - handle_config_terminate/3 -]). - -%% for status updater process to allow hot code loading --export([ - stats_updater_loop/1 -]). - --include("couch_replicator_scheduler.hrl"). --include("couch_replicator.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --include_lib("couch/include/couch_db.hrl"). - -%% types --type event_type() :: added | started | stopped | {crashed, any()}. --type event() :: {Type:: event_type(), When :: erlang:timestamp()}. --type history() :: nonempty_list(event()). - -%% definitions --define(MAX_BACKOFF_EXPONENT, 10). --define(BACKOFF_INTERVAL_MICROS, 30 * 1000 * 1000). --define(DEFAULT_HEALTH_THRESHOLD_SEC, 2 * 60). --define(RELISTEN_DELAY, 5000). --define(STATS_UPDATE_WAIT, 5000). - --define(DEFAULT_MAX_JOBS, 500). --define(DEFAULT_MAX_CHURN, 20). --define(DEFAULT_MAX_HISTORY, 20). --define(DEFAULT_SCHEDULER_INTERVAL, 60000). - - --record(state, {interval, timer, max_jobs, max_churn, max_history, stats_pid}). --record(job, { - id :: job_id() | '$1' | '_', - rep :: #rep{} | '_', - pid :: undefined | pid() | '$1' | '_', - monitor :: undefined | reference() | '_', - history :: history() | '_' -}). - --record(stats_acc, { - pending_n = 0 :: non_neg_integer(), - running_n = 0 :: non_neg_integer(), - crashed_n = 0 :: non_neg_integer() -}). - - -%% public functions - --spec start_link() -> {ok, pid()} | ignore | {error, term()}. -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - - --spec add_job(#rep{}) -> ok. -add_job(#rep{} = Rep) when Rep#rep.id /= undefined -> - case existing_replication(Rep) of - false -> - Job = #job{ - id = Rep#rep.id, - rep = Rep, - history = [{added, os:timestamp()}] - }, - gen_server:call(?MODULE, {add_job, Job}, infinity); - true -> - ok - end. - - --spec remove_job(job_id()) -> ok. -remove_job(Id) -> - gen_server:call(?MODULE, {remove_job, Id}, infinity). - - --spec reschedule() -> ok. -% Trigger a manual reschedule. Used for testing and/or ops. -reschedule() -> - gen_server:call(?MODULE, reschedule, infinity). - - --spec rep_state(rep_id()) -> #rep{} | nil. -rep_state(RepId) -> - case (catch ets:lookup_element(?MODULE, RepId, #job.rep)) of - {'EXIT',{badarg, _}} -> - nil; - Rep -> - Rep - end. - - --spec job_summary(job_id(), non_neg_integer()) -> [_] | nil. -job_summary(JobId, HealthThreshold) -> - case job_by_id(JobId) of - {ok, #job{pid = Pid, history = History, rep = Rep}} -> - ErrorCount = consecutive_crashes(History, HealthThreshold), - {State, Info} = case {Pid, ErrorCount} of - {undefined, 0} -> - case History of - [{{crashed, Error}, _When} | _] -> - {crashing, crash_reason_json(Error)}; - [_ | _] -> - {pending, Rep#rep.stats} - end; - {undefined, ErrorCount} when ErrorCount > 0 -> - [{{crashed, Error}, _When} | _] = History, - {crashing, crash_reason_json(Error)}; - {Pid, ErrorCount} when is_pid(Pid) -> - {running, Rep#rep.stats} - end, - [ - {source, iolist_to_binary(ejson_url(Rep#rep.source))}, - {target, iolist_to_binary(ejson_url(Rep#rep.target))}, - {state, State}, - {info, couch_replicator_utils:ejson_state_info(Info)}, - {error_count, ErrorCount}, - {last_updated, last_updated(History)}, - {start_time, - couch_replicator_utils:iso8601(Rep#rep.start_time)}, - {source_proxy, job_proxy_url(Rep#rep.source)}, - {target_proxy, job_proxy_url(Rep#rep.target)} - ]; - {error, not_found} -> - nil % Job might have just completed - end. - - -job_proxy_url(#httpdb{proxy_url = ProxyUrl}) when is_list(ProxyUrl) -> - list_to_binary(couch_util:url_strip_password(ProxyUrl)); -job_proxy_url(_Endpoint) -> - null. - - -% Health threshold is the minimum amount of time an unhealthy job should run -% crashing before it is considered to be healthy again. HealtThreashold should -% not be 0 as jobs could start and immediately crash, and it shouldn't be -% infinity, since then consecutive crashes would accumulate forever even if -% job is back to normal. --spec health_threshold() -> non_neg_integer(). -health_threshold() -> - config:get_integer("replicator", "health_threshold", - ?DEFAULT_HEALTH_THRESHOLD_SEC). - - --spec find_jobs_by_dbname(binary()) -> list(#rep{}). -find_jobs_by_dbname(DbName) -> - Rep = #rep{db_name = DbName, _ = '_'}, - MatchSpec = #job{id = '$1', rep = Rep, _ = '_'}, - [RepId || [RepId] <- ets:match(?MODULE, MatchSpec)]. - - --spec find_jobs_by_doc(binary(), binary()) -> list(#rep{}). -find_jobs_by_doc(DbName, DocId) -> - Rep = #rep{db_name = DbName, doc_id = DocId, _ = '_'}, - MatchSpec = #job{id = '$1', rep = Rep, _ = '_'}, - [RepId || [RepId] <- ets:match(?MODULE, MatchSpec)]. - - --spec restart_job(binary() | list() | rep_id()) -> - {ok, {[_]}} | {error, not_found}. -restart_job(JobId) -> - case rep_state(JobId) of - nil -> - {error, not_found}; - #rep{} = Rep -> - ok = remove_job(JobId), - ok = add_job(Rep), - job(JobId) - end. - - --spec update_job_stats(job_id(), term()) -> ok. -update_job_stats(JobId, Stats) -> - gen_server:cast(?MODULE, {update_job_stats, JobId, Stats}). - - -%% gen_server functions - -init(_) -> - % Temporarily disable on FDB, as it's not fully implemented yet - % config:enable_feature('scheduler'), - EtsOpts = [named_table, {keypos, #job.id}, {read_concurrency, true}, - {write_concurrency, true}], - ?MODULE = ets:new(?MODULE, EtsOpts), - ok = config:listen_for_changes(?MODULE, nil), - Interval = config:get_integer("replicator", "interval", - ?DEFAULT_SCHEDULER_INTERVAL), - MaxJobs = config:get_integer("replicator", "max_jobs", ?DEFAULT_MAX_JOBS), - MaxChurn = config:get_integer("replicator", "max_churn", - ?DEFAULT_MAX_CHURN), - MaxHistory = config:get_integer("replicator", "max_history", - ?DEFAULT_MAX_HISTORY), - Timer = erlang:send_after(Interval, self(), reschedule), - State = #state{ - interval = Interval, - max_jobs = MaxJobs, - max_churn = MaxChurn, - max_history = MaxHistory, - timer = Timer, - stats_pid = start_stats_updater() - }, - {ok, State}. - - -handle_call({add_job, Job}, _From, State) -> - ok = maybe_remove_job_int(Job#job.id, State), - true = add_job_int(Job), - ok = maybe_start_newly_added_job(Job, State), - couch_stats:increment_counter([couch_replicator, jobs, adds]), - TotalJobs = ets:info(?MODULE, size), - couch_stats:update_gauge([couch_replicator, jobs, total], TotalJobs), - {reply, ok, State}; - -handle_call({remove_job, Id}, _From, State) -> - ok = maybe_remove_job_int(Id, State), - {reply, ok, State}; - -handle_call(reschedule, _From, State) -> - ok = reschedule(State), - {reply, ok, State}; - -handle_call(_, _From, State) -> - {noreply, State}. - - -handle_cast({set_max_jobs, MaxJobs}, State) when is_integer(MaxJobs), - MaxJobs >= 0 -> - couch_log:notice("~p: max_jobs set to ~B", [?MODULE, MaxJobs]), - {noreply, State#state{max_jobs = MaxJobs}}; - -handle_cast({set_max_churn, MaxChurn}, State) when is_integer(MaxChurn), - MaxChurn > 0 -> - couch_log:notice("~p: max_churn set to ~B", [?MODULE, MaxChurn]), - {noreply, State#state{max_churn = MaxChurn}}; - -handle_cast({set_max_history, MaxHistory}, State) when is_integer(MaxHistory), - MaxHistory > 0 -> - couch_log:notice("~p: max_history set to ~B", [?MODULE, MaxHistory]), - {noreply, State#state{max_history = MaxHistory}}; - -handle_cast({set_interval, Interval}, State) when is_integer(Interval), - Interval > 0 -> - couch_log:notice("~p: interval set to ~B", [?MODULE, Interval]), - {noreply, State#state{interval = Interval}}; - -handle_cast({update_job_stats, JobId, Stats}, State) -> - case rep_state(JobId) of - nil -> - ok; - #rep{} = Rep -> - NewRep = Rep#rep{stats = Stats}, - true = ets:update_element(?MODULE, JobId, {#job.rep, NewRep}) - end, - {noreply, State}; - -handle_cast(UnexpectedMsg, State) -> - couch_log:error("~p: received un-expected cast ~p", [?MODULE, UnexpectedMsg]), - {noreply, State}. - - -handle_info(reschedule, State) -> - ok = reschedule(State), - erlang:cancel_timer(State#state.timer), - Timer = erlang:send_after(State#state.interval, self(), reschedule), - {noreply, State#state{timer = Timer}}; - -handle_info({'DOWN', _Ref, process, Pid, normal}, State) -> - {ok, Job} = job_by_pid(Pid), - couch_log:notice("~p: Job ~p completed normally", [?MODULE, Job#job.id]), - remove_job_int(Job), - update_running_jobs_stats(State#state.stats_pid), - {noreply, State}; - -handle_info({'DOWN', _Ref, process, Pid, Reason0}, State) -> - {ok, Job} = job_by_pid(Pid), - Reason = case Reason0 of - {shutdown, ShutdownReason} -> ShutdownReason; - Other -> Other - end, - ok = handle_crashed_job(Job, Reason, State), - {noreply, State}; - -handle_info(restart_config_listener, State) -> - ok = config:listen_for_changes(?MODULE, nil), - {noreply, State}; - -handle_info(_, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -terminate(_Reason, _State) -> - ok. - - -format_status(_Opt, [_PDict, State]) -> - [ - {max_jobs, State#state.max_jobs}, - {running_jobs, running_job_count()}, - {pending_jobs, pending_job_count()} - ]. - - -%% config listener functions - -handle_config_change("replicator", "max_jobs", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_max_jobs, list_to_integer(V)}), - {ok, S}; - -handle_config_change("replicator", "max_churn", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_max_churn, list_to_integer(V)}), - {ok, S}; - -handle_config_change("replicator", "interval", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_interval, list_to_integer(V)}), - {ok, S}; - -handle_config_change("replicator", "max_history", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_max_history, list_to_integer(V)}), - {ok, S}; - -handle_config_change(_, _, _, _, S) -> - {ok, S}. - - -handle_config_terminate(_, stop, _) -> - ok; - -handle_config_terminate(_, _, _) -> - Pid = whereis(?MODULE), - erlang:send_after(?RELISTEN_DELAY, Pid, restart_config_listener). - - -%% Private functions - -% Handle crashed jobs. Handling differs between transient and permanent jobs. -% Transient jobs are those posted to the _replicate endpoint. They don't have a -% db associated with them. When those jobs crash, they are not restarted. That -% is also consistent with behavior when the node they run on, crashed and they -% do not migrate to other nodes. Permanent jobs are those created from -% replicator documents. Those jobs, once they pass basic validation and end up -% in the scheduler will be retried indefinitely (with appropriate exponential -% backoffs). --spec handle_crashed_job(#job{}, any(), #state{}) -> ok. -handle_crashed_job(#job{rep = #rep{db_name = null}} = Job, Reason, State) -> - Msg = "~p : Transient job ~p failed, removing. Error: ~p", - ErrorBinary = couch_replicator_utils:rep_error_to_binary(Reason), - couch_log:error(Msg, [?MODULE, Job#job.id, ErrorBinary]), - remove_job_int(Job), - update_running_jobs_stats(State#state.stats_pid), - ok; - -handle_crashed_job(Job, Reason, State) -> - ok = update_state_crashed(Job, Reason, State), - case couch_replicator_doc_processor:update_docs() of - true -> - couch_replicator_docs:update_error(Job#job.rep, Reason); - false -> - ok - end, - case ets:info(?MODULE, size) < State#state.max_jobs of - true -> - % Starting pending jobs is an O(TotalJobsCount) operation. Only do - % it if there is a relatively small number of jobs. Otherwise - % scheduler could be blocked if there is a cascade of lots failing - % jobs in a row. - start_pending_jobs(State), - update_running_jobs_stats(State#state.stats_pid), - ok; - false -> - ok - end. - - -% Attempt to start a newly added job. First quickly check if total jobs -% already exceed max jobs, then do a more expensive check which runs a -% select (an O(n) operation) to check pending jobs specifically. --spec maybe_start_newly_added_job(#job{}, #state{}) -> ok. -maybe_start_newly_added_job(Job, State) -> - MaxJobs = State#state.max_jobs, - TotalJobs = ets:info(?MODULE, size), - case TotalJobs < MaxJobs andalso running_job_count() < MaxJobs of - true -> - start_job_int(Job, State), - update_running_jobs_stats(State#state.stats_pid), - ok; - false -> - ok - end. - - -% Return up to a given number of oldest, not recently crashed jobs. Try to be -% memory efficient and use ets:foldl to accumulate jobs. --spec pending_jobs(non_neg_integer()) -> [#job{}]. -pending_jobs(0) -> - % Handle this case as user could set max_churn to 0. If this is passed to - % other function clause it will crash as gb_sets:largest assumes set is not - % empty. - []; - -pending_jobs(Count) when is_integer(Count), Count > 0 -> - Set0 = gb_sets:new(), % [{LastStart, Job},...] - Now = os:timestamp(), - Acc0 = {Set0, Now, Count, health_threshold()}, - {Set1, _, _, _} = ets:foldl(fun pending_fold/2, Acc0, ?MODULE), - [Job || {_Started, Job} <- gb_sets:to_list(Set1)]. - - -pending_fold(Job, {Set, Now, Count, HealthThreshold}) -> - Set1 = case {not_recently_crashed(Job, Now, HealthThreshold), - gb_sets:size(Set) >= Count} of - {true, true} -> - % Job is healthy but already reached accumulated limit, so might - % have to replace one of the accumulated jobs - pending_maybe_replace(Job, Set); - {true, false} -> - % Job is healthy and we haven't reached the limit, so add job - % to accumulator - gb_sets:add_element({last_started(Job), Job}, Set); - {false, _} -> - % This job is not healthy (has crashed too recently), so skip it. - Set - end, - {Set1, Now, Count, HealthThreshold}. - - -% Replace Job in the accumulator if it is older than youngest job there. -% "oldest" here means one which has been waiting to run the longest. "youngest" -% means the one with most recent activity. The goal is to keep up to Count -% oldest jobs during iteration. For example if there are jobs with these times -% accumulated so far [5, 7, 11], and start time of current job is 6. Then -% 6 < 11 is true, so 11 (youngest) is dropped and 6 inserted resulting in -% [5, 6, 7]. In the end the result might look like [1, 2, 5], for example. -pending_maybe_replace(Job, Set) -> - Started = last_started(Job), - {Youngest, YoungestJob} = gb_sets:largest(Set), - case Started < Youngest of - true -> - Set1 = gb_sets:delete({Youngest, YoungestJob}, Set), - gb_sets:add_element({Started, Job}, Set1); - false -> - Set - end. - - -start_jobs(Count, State) -> - [start_job_int(Job, State) || Job <- pending_jobs(Count)], - ok. - - --spec stop_jobs(non_neg_integer(), boolean(), #state{}) -> non_neg_integer(). -stop_jobs(Count, _, _) when is_integer(Count), Count =< 0 -> - 0; - -stop_jobs(Count, IsContinuous, State) when is_integer(Count) -> - Running0 = running_jobs(), - ContinuousPred = fun(Job) -> is_continuous(Job) =:= IsContinuous end, - Running1 = lists:filter(ContinuousPred, Running0), - Running2 = lists:sort(fun longest_running/2, Running1), - Running3 = lists:sublist(Running2, Count), - length([stop_job_int(Job, State) || Job <- Running3]). - - -longest_running(#job{} = A, #job{} = B) -> - last_started(A) =< last_started(B). - - -not_recently_crashed(#job{history = History}, Now, HealthThreshold) -> - case History of - [{added, _When}] -> - true; - [{stopped, _When} | _] -> - true; - _ -> - LatestCrashT = latest_crash_timestamp(History), - CrashCount = consecutive_crashes(History, HealthThreshold), - timer:now_diff(Now, LatestCrashT) >= backoff_micros(CrashCount) - end. - - -% Count consecutive crashes. A crash happens when there is a `crashed` event -% within a short period of time (configurable) after any other event. It could -% be `crashed, started` for jobs crashing quickly after starting, `crashed, -% crashed`, `crashed, stopped` if job repeatedly failed to start -% being stopped. Or it could be `crashed, added` if it crashed immediately after -% being added during start. -% -% A streak of "consecutive crashes" ends when a crashed event is seen starting -% and running successfully without crashing for a period of time. That period -% of time is the HealthThreshold. -% - --spec consecutive_crashes(history(), non_neg_integer()) -> non_neg_integer(). -consecutive_crashes(History, HealthThreshold) when is_list(History) -> - consecutive_crashes(History, HealthThreshold, 0). - - --spec consecutive_crashes(history(), non_neg_integer(), non_neg_integer()) -> - non_neg_integer(). -consecutive_crashes([], _HealthThreashold, Count) -> - Count; - -consecutive_crashes([{{crashed, _}, CrashT}, {_, PrevT} = PrevEvent | Rest], - HealthThreshold, Count) -> - case timer:now_diff(CrashT, PrevT) > HealthThreshold * 1000000 of - true -> - Count; - false -> - consecutive_crashes([PrevEvent | Rest], HealthThreshold, Count + 1) - end; - -consecutive_crashes([{stopped, _}, {started, _} | _], _HealthThreshold, - Count) -> - Count; - -consecutive_crashes([_ | Rest], HealthThreshold, Count) -> - consecutive_crashes(Rest, HealthThreshold, Count). - - --spec latest_crash_timestamp(history()) -> erlang:timestamp(). -latest_crash_timestamp([]) -> - {0, 0, 0}; % Used to avoid special-casing "no crash" when doing now_diff - -latest_crash_timestamp([{{crashed, _Reason}, When} | _]) -> - When; - -latest_crash_timestamp([_Event | Rest]) -> - latest_crash_timestamp(Rest). - - --spec backoff_micros(non_neg_integer()) -> non_neg_integer(). -backoff_micros(CrashCount) -> - % When calculating the backoff interval treat consecutive crash count as the - % exponent in Base * 2 ^ CrashCount to achieve an exponential backoff - % doubling every consecutive failure, starting with the base value of - % ?BACKOFF_INTERVAL_MICROS. - BackoffExp = erlang:min(CrashCount - 1, ?MAX_BACKOFF_EXPONENT), - (1 bsl BackoffExp) * ?BACKOFF_INTERVAL_MICROS. - - --spec add_job_int(#job{}) -> boolean(). -add_job_int(#job{} = Job) -> - ets:insert_new(?MODULE, Job). - - --spec maybe_remove_job_int(job_id(), #state{}) -> ok. -maybe_remove_job_int(JobId, State) -> - case job_by_id(JobId) of - {ok, Job} -> - ok = stop_job_int(Job, State), - true = remove_job_int(Job), - couch_stats:increment_counter([couch_replicator, jobs, removes]), - TotalJobs = ets:info(?MODULE, size), - couch_stats:update_gauge([couch_replicator, jobs, total], - TotalJobs), - update_running_jobs_stats(State#state.stats_pid), - ok; - {error, not_found} -> - ok - end. - - -start_job_int(#job{pid = Pid}, _State) when Pid /= undefined -> - ok; - -start_job_int(#job{} = Job0, State) -> - Job = maybe_optimize_job_for_rate_limiting(Job0), - case couch_replicator_scheduler_sup:start_child(Job#job.rep) of - {ok, Child} -> - Ref = monitor(process, Child), - ok = update_state_started(Job, Child, Ref, State), - couch_log:notice("~p: Job ~p started as ~p", - [?MODULE, Job#job.id, Child]); - {error, {already_started, OtherPid}} when node(OtherPid) =:= node() -> - Ref = monitor(process, OtherPid), - ok = update_state_started(Job, OtherPid, Ref, State), - couch_log:notice("~p: Job ~p already running as ~p. Most likely" - " because replicator scheduler was restarted", - [?MODULE, Job#job.id, OtherPid]); - {error, {already_started, OtherPid}} when node(OtherPid) =/= node() -> - CrashMsg = "Duplicate replication running on another node", - couch_log:notice("~p: Job ~p already running as ~p. Most likely" - " because a duplicate replication is running on another node", - [?MODULE, Job#job.id, OtherPid]), - ok = update_state_crashed(Job, CrashMsg, State); - {error, Reason} -> - couch_log:notice("~p: Job ~p failed to start for reason ~p", - [?MODULE, Job, Reason]), - ok = update_state_crashed(Job, Reason, State) - end. - - --spec stop_job_int(#job{}, #state{}) -> ok | {error, term()}. -stop_job_int(#job{pid = undefined}, _State) -> - ok; - -stop_job_int(#job{} = Job, State) -> - ok = couch_replicator_scheduler_sup:terminate_child(Job#job.pid), - demonitor(Job#job.monitor, [flush]), - ok = update_state_stopped(Job, State), - couch_log:notice("~p: Job ~p stopped as ~p", - [?MODULE, Job#job.id, Job#job.pid]). - - --spec remove_job_int(#job{}) -> true. -remove_job_int(#job{} = Job) -> - ets:delete(?MODULE, Job#job.id). - - --spec running_job_count() -> non_neg_integer(). -running_job_count() -> - ets:info(?MODULE, size) - pending_job_count(). - - --spec running_jobs() -> [#job{}]. -running_jobs() -> - ets:select(?MODULE, [{#job{pid = '$1', _='_'}, [{is_pid, '$1'}], ['$_']}]). - - --spec pending_job_count() -> non_neg_integer(). -pending_job_count() -> - ets:select_count(?MODULE, [{#job{pid=undefined, _='_'}, [], [true]}]). - - --spec job_by_pid(pid()) -> {ok, #job{}} | {error, not_found}. -job_by_pid(Pid) when is_pid(Pid) -> - case ets:match_object(?MODULE, #job{pid=Pid, _='_'}) of - [] -> - {error, not_found}; - [#job{}=Job] -> - {ok, Job} - end. - - --spec job_by_id(job_id()) -> {ok, #job{}} | {error, not_found}. -job_by_id(Id) -> - case ets:lookup(?MODULE, Id) of - [] -> - {error, not_found}; - [#job{}=Job] -> - {ok, Job} - end. - - --spec update_state_stopped(#job{}, #state{}) -> ok. -update_state_stopped(Job, State) -> - Job1 = reset_job_process(Job), - Job2 = update_history(Job1, stopped, os:timestamp(), State), - true = ets:insert(?MODULE, Job2), - couch_stats:increment_counter([couch_replicator, jobs, stops]), - ok. - - --spec update_state_started(#job{}, pid(), reference(), #state{}) -> ok. -update_state_started(Job, Pid, Ref, State) -> - Job1 = set_job_process(Job, Pid, Ref), - Job2 = update_history(Job1, started, os:timestamp(), State), - true = ets:insert(?MODULE, Job2), - couch_stats:increment_counter([couch_replicator, jobs, starts]), - ok. - - --spec update_state_crashed(#job{}, any(), #state{}) -> ok. -update_state_crashed(Job, Reason, State) -> - Job1 = reset_job_process(Job), - Job2 = update_history(Job1, {crashed, Reason}, os:timestamp(), State), - true = ets:insert(?MODULE, Job2), - couch_stats:increment_counter([couch_replicator, jobs, crashes]), - ok. - - --spec set_job_process(#job{}, pid(), reference()) -> #job{}. -set_job_process(#job{} = Job, Pid, Ref) when is_pid(Pid), is_reference(Ref) -> - Job#job{pid = Pid, monitor = Ref}. - - --spec reset_job_process(#job{}) -> #job{}. -reset_job_process(#job{} = Job) -> - Job#job{pid = undefined, monitor = undefined}. - - --spec reschedule(#state{}) -> ok. -reschedule(State) -> - StopCount = stop_excess_jobs(State, running_job_count()), - rotate_jobs(State, StopCount), - update_running_jobs_stats(State#state.stats_pid). - - --spec stop_excess_jobs(#state{}, non_neg_integer()) -> non_neg_integer(). -stop_excess_jobs(State, Running) -> - #state{max_jobs=MaxJobs} = State, - StopCount = max(0, Running - MaxJobs), - Stopped = stop_jobs(StopCount, true, State), - OneshotLeft = StopCount - Stopped, - stop_jobs(OneshotLeft, false, State), - StopCount. - - -start_pending_jobs(State) -> - #state{max_jobs=MaxJobs} = State, - Running = running_job_count(), - Pending = pending_job_count(), - if Running < MaxJobs, Pending > 0 -> - start_jobs(MaxJobs - Running, State); - true -> - ok - end. - - --spec rotate_jobs(#state{}, non_neg_integer()) -> ok. -rotate_jobs(State, ChurnSoFar) -> - #state{max_jobs=MaxJobs, max_churn=MaxChurn} = State, - Running = running_job_count(), - Pending = pending_job_count(), - % Reduce MaxChurn by the number of already stopped jobs in the - % current rescheduling cycle. - Churn = max(0, MaxChurn - ChurnSoFar), - SlotsAvailable = MaxJobs - Running, - if SlotsAvailable >= 0 -> - % If there is are enough SlotsAvailable reduce StopCount to avoid - % unnesessarily stopping jobs. `stop_jobs/3` ignores 0 or negative - % values so we don't worry about that here. - StopCount = lists:min([Pending - SlotsAvailable, Running, Churn]), - stop_jobs(StopCount, true, State), - StartCount = max(0, MaxJobs - running_job_count()), - start_jobs(StartCount, State); - true -> - ok - end. - - --spec last_started(#job{}) -> erlang:timestamp(). -last_started(#job{} = Job) -> - case lists:keyfind(started, 1, Job#job.history) of - false -> - {0, 0, 0}; - {started, When} -> - When - end. - - --spec update_history(#job{}, event_type(), erlang:timestamp(), #state{}) -> - #job{}. -update_history(Job, Type, When, State) -> - History0 = [{Type, When} | Job#job.history], - History1 = lists:sublist(History0, State#state.max_history), - Job#job{history = History1}. - - --spec ejson_url(#httpdb{} | binary()) -> binary(). -ejson_url(#httpdb{}=Httpdb) -> - couch_util:url_strip_password(Httpdb#httpdb.url); -ejson_url(DbName) when is_binary(DbName) -> - DbName. - - --spec job_ejson(#job{}) -> {[_ | _]}. -job_ejson(Job) -> - Rep = Job#job.rep, - Source = ejson_url(Rep#rep.source), - Target = ejson_url(Rep#rep.target), - History = lists:map(fun({Type, When}) -> - EventProps = case Type of - {crashed, Reason} -> - [{type, crashed}, {reason, crash_reason_json(Reason)}]; - Type -> - [{type, Type}] - end, - {[{timestamp, couch_replicator_utils:iso8601(When)} | EventProps]} - end, Job#job.history), - {BaseID, Ext} = Job#job.id, - Pid = case Job#job.pid of - undefined -> - null; - P when is_pid(P) -> - ?l2b(pid_to_list(P)) - end, - {[ - {id, iolist_to_binary([BaseID, Ext])}, - {pid, Pid}, - {source, iolist_to_binary(Source)}, - {target, iolist_to_binary(Target)}, - {database, Rep#rep.db_name}, - {user, (Rep#rep.user_ctx)#user_ctx.name}, - {doc_id, Rep#rep.doc_id}, - {info, couch_replicator_utils:ejson_state_info(Rep#rep.stats)}, - {history, History}, - {node, node()}, - {start_time, couch_replicator_utils:iso8601(Rep#rep.start_time)} - ]}. - - --spec jobs() -> [[tuple()]]. -jobs() -> - ets:foldl(fun(Job, Acc) -> [job_ejson(Job) | Acc] end, [], ?MODULE). - - --spec job(job_id()) -> {ok, {[_ | _]}} | {error, not_found}. -job(JobId) -> - case job_by_id(JobId) of - {ok, Job} -> - {ok, job_ejson(Job)}; - Error -> - Error - end. - - -crash_reason_json({_CrashType, Info}) when is_binary(Info) -> - Info; -crash_reason_json(Reason) when is_binary(Reason) -> - Reason; -crash_reason_json(Error) -> - couch_replicator_utils:rep_error_to_binary(Error). - - --spec last_updated([_]) -> binary(). -last_updated([{_Type, When} | _]) -> - couch_replicator_utils:iso8601(When). - - --spec is_continuous(#job{}) -> boolean(). -is_continuous(#job{rep = Rep}) -> - couch_util:get_value(continuous, Rep#rep.options, false). - - -% If job crashed last time because it was rate limited, try to -% optimize some options to help the job make progress. --spec maybe_optimize_job_for_rate_limiting(#job{}) -> #job{}. -maybe_optimize_job_for_rate_limiting(Job = #job{history = - [{{crashed, max_backoff}, _} | _]}) -> - Opts = [ - {checkpoint_interval, 5000}, - {worker_processes, 2}, - {worker_batch_size, 100}, - {http_connections, 5} - ], - Rep = lists:foldl(fun optimize_int_option/2, Job#job.rep, Opts), - Job#job{rep = Rep}; -maybe_optimize_job_for_rate_limiting(Job) -> - Job. - - --spec optimize_int_option({atom(), any()}, #rep{}) -> #rep{}. -optimize_int_option({Key, Val}, #rep{options = Options} = Rep) -> - case couch_util:get_value(Key, Options) of - CurVal when is_integer(CurVal), CurVal > Val -> - Msg = "~p replication ~p : setting ~p = ~p due to rate limiting", - couch_log:warning(Msg, [?MODULE, Rep#rep.id, Key, Val]), - Options1 = lists:keyreplace(Key, 1, Options, {Key, Val}), - Rep#rep{options = Options1}; - _ -> - Rep - end. - - -% Updater is a separate process. It receives `update_stats` messages and -% updates scheduler stats from the scheduler jobs table. Updates are -% performed no more frequently than once per ?STATS_UPDATE_WAIT milliseconds. - -update_running_jobs_stats(StatsPid) when is_pid(StatsPid) -> - StatsPid ! update_stats, - ok. - - -start_stats_updater() -> - erlang:spawn_link(?MODULE, stats_updater_loop, [undefined]). - - -stats_updater_loop(Timer) -> - receive - update_stats when Timer == undefined -> - TRef = erlang:send_after(?STATS_UPDATE_WAIT, self(), refresh_stats), - ?MODULE:stats_updater_loop(TRef); - update_stats when is_reference(Timer) -> - ?MODULE:stats_updater_loop(Timer); - refresh_stats -> - ok = stats_updater_refresh(), - ?MODULE:stats_updater_loop(undefined); - Else -> - erlang:exit({stats_updater_bad_msg, Else}) - end. - - --spec stats_updater_refresh() -> ok. -stats_updater_refresh() -> - #stats_acc{ - pending_n = PendingN, - running_n = RunningN, - crashed_n = CrashedN - } = ets:foldl(fun stats_fold/2, #stats_acc{}, ?MODULE), - couch_stats:update_gauge([couch_replicator, jobs, pending], PendingN), - couch_stats:update_gauge([couch_replicator, jobs, running], RunningN), - couch_stats:update_gauge([couch_replicator, jobs, crashed], CrashedN), - ok. - - --spec stats_fold(#job{}, #stats_acc{}) -> #stats_acc{}. -stats_fold(#job{pid = undefined, history = [{added, _}]}, Acc) -> - Acc#stats_acc{pending_n = Acc#stats_acc.pending_n + 1}; -stats_fold(#job{pid = undefined, history = [{stopped, _} | _]}, Acc) -> - Acc#stats_acc{pending_n = Acc#stats_acc.pending_n + 1}; -stats_fold(#job{pid = undefined, history = [{{crashed, _}, _} | _]}, Acc) -> - Acc#stats_acc{crashed_n =Acc#stats_acc.crashed_n + 1}; -stats_fold(#job{pid = P, history = [{started, _} | _]}, Acc) when is_pid(P) -> - Acc#stats_acc{running_n = Acc#stats_acc.running_n + 1}. - - --spec existing_replication(#rep{}) -> boolean(). -existing_replication(#rep{} = NewRep) -> - case job_by_id(NewRep#rep.id) of - {ok, #job{rep = CurRep}} -> - NormCurRep = couch_replicator_utils:normalize_rep(CurRep), - NormNewRep = couch_replicator_utils:normalize_rep(NewRep), - NormCurRep == NormNewRep; - {error, not_found} -> - false - end. - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - - -backoff_micros_test_() -> - BaseInterval = ?BACKOFF_INTERVAL_MICROS, - [?_assertEqual(R * BaseInterval, backoff_micros(N)) || {R, N} <- [ - {1, 1}, {2, 2}, {4, 3}, {8, 4}, {16, 5}, {32, 6}, {64, 7}, {128, 8}, - {256, 9}, {512, 10}, {1024, 11}, {1024, 12} - ]]. - - -consecutive_crashes_test_() -> - Threshold = ?DEFAULT_HEALTH_THRESHOLD_SEC, - [?_assertEqual(R, consecutive_crashes(H, Threshold)) || {R, H} <- [ - {0, []}, - {0, [added()]}, - {0, [stopped()]}, - {0, [crashed()]}, - {1, [crashed(), added()]}, - {1, [crashed(), crashed()]}, - {1, [crashed(), stopped()]}, - {3, [crashed(), crashed(), crashed(), added()]}, - {2, [crashed(), crashed(), stopped()]}, - {1, [crashed(), started(), added()]}, - {2, [crashed(3), started(2), crashed(1), started(0)]}, - {0, [stopped(3), started(2), crashed(1), started(0)]}, - {1, [crashed(3), started(2), stopped(1), started(0)]}, - {0, [crashed(999), started(0)]}, - {1, [crashed(999), started(998), crashed(997), started(0)]} - ]]. - - -consecutive_crashes_non_default_threshold_test_() -> - [?_assertEqual(R, consecutive_crashes(H, T)) || {R, H, T} <- [ - {0, [crashed(11), started(0)], 10}, - {1, [crashed(10), started(0)], 10} - ]]. - - -latest_crash_timestamp_test_() -> - [?_assertEqual({0, R, 0}, latest_crash_timestamp(H)) || {R, H} <- [ - {0, [added()]}, - {1, [crashed(1)]}, - {3, [crashed(3), started(2), crashed(1), started(0)]}, - {1, [started(3), stopped(2), crashed(1), started(0)]} - ]]. - - -last_started_test_() -> - [?_assertEqual({0, R, 0}, last_started(testjob(H))) || {R, H} <- [ - {0, [added()]}, - {0, [crashed(1)]}, - {1, [started(1)]}, - {1, [added(), started(1)]}, - {2, [started(2), started(1)]}, - {2, [crashed(3), started(2), started(1)]} - ]]. - - -longest_running_test() -> - J0 = testjob([crashed()]), - J1 = testjob([started(1)]), - J2 = testjob([started(2)]), - Sort = fun(Jobs) -> lists:sort(fun longest_running/2, Jobs) end, - ?assertEqual([], Sort([])), - ?assertEqual([J1], Sort([J1])), - ?assertEqual([J1, J2], Sort([J2, J1])), - ?assertEqual([J0, J1, J2], Sort([J2, J1, J0])). - - -scheduler_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_pending_jobs_simple(), - t_pending_jobs_skip_crashed(), - t_one_job_starts(), - t_no_jobs_start_if_max_is_0(), - t_one_job_starts_if_max_is_1(), - t_max_churn_does_not_throttle_initial_start(), - t_excess_oneshot_only_jobs(), - t_excess_continuous_only_jobs(), - t_excess_prefer_continuous_first(), - t_stop_oldest_first(), - t_start_oldest_first(), - t_jobs_churn_even_if_not_all_max_jobs_are_running(), - t_jobs_dont_churn_if_there_are_available_running_slots(), - t_start_only_pending_jobs_do_not_churn_existing_ones(), - t_dont_stop_if_nothing_pending(), - t_max_churn_limits_number_of_rotated_jobs(), - t_existing_jobs(), - t_if_pending_less_than_running_start_all_pending(), - t_running_less_than_pending_swap_all_running(), - t_oneshot_dont_get_rotated(), - t_rotate_continuous_only_if_mixed(), - t_oneshot_dont_get_starting_priority(), - t_oneshot_will_hog_the_scheduler(), - t_if_excess_is_trimmed_rotation_still_happens(), - t_if_transient_job_crashes_it_gets_removed(), - t_if_permanent_job_crashes_it_stays_in_ets(), - t_job_summary_running(), - t_job_summary_pending(), - t_job_summary_crashing_once(), - t_job_summary_crashing_many_times(), - t_job_summary_proxy_fields() - ] - } - }. - - -t_pending_jobs_simple() -> - ?_test(begin - Job1 = oneshot(1), - Job2 = oneshot(2), - setup_jobs([Job2, Job1]), - ?assertEqual([], pending_jobs(0)), - ?assertEqual([Job1], pending_jobs(1)), - ?assertEqual([Job1, Job2], pending_jobs(2)), - ?assertEqual([Job1, Job2], pending_jobs(3)) - end). - - -t_pending_jobs_skip_crashed() -> - ?_test(begin - Job = oneshot(1), - Ts = os:timestamp(), - History = [crashed(Ts), started(Ts) | Job#job.history], - Job1 = Job#job{history = History}, - Job2 = oneshot(2), - Job3 = oneshot(3), - setup_jobs([Job2, Job1, Job3]), - ?assertEqual([Job2], pending_jobs(1)), - ?assertEqual([Job2, Job3], pending_jobs(2)), - ?assertEqual([Job2, Job3], pending_jobs(3)) - end). - - -t_one_job_starts() -> - ?_test(begin - setup_jobs([oneshot(1)]), - ?assertEqual({0, 1}, run_stop_count()), - reschedule(mock_state(?DEFAULT_MAX_JOBS)), - ?assertEqual({1, 0}, run_stop_count()) - end). - - -t_no_jobs_start_if_max_is_0() -> - ?_test(begin - setup_jobs([oneshot(1)]), - reschedule(mock_state(0)), - ?assertEqual({0, 1}, run_stop_count()) - end). - - -t_one_job_starts_if_max_is_1() -> - ?_test(begin - setup_jobs([oneshot(1), oneshot(2)]), - reschedule(mock_state(1)), - ?assertEqual({1, 1}, run_stop_count()) - end). - - -t_max_churn_does_not_throttle_initial_start() -> - ?_test(begin - setup_jobs([oneshot(1), oneshot(2)]), - reschedule(mock_state(?DEFAULT_MAX_JOBS, 0)), - ?assertEqual({2, 0}, run_stop_count()) - end). - - -t_excess_oneshot_only_jobs() -> - ?_test(begin - setup_jobs([oneshot_running(1), oneshot_running(2)]), - ?assertEqual({2, 0}, run_stop_count()), - reschedule(mock_state(1)), - ?assertEqual({1, 1}, run_stop_count()), - reschedule(mock_state(0)), - ?assertEqual({0, 2}, run_stop_count()) - end). - - -t_excess_continuous_only_jobs() -> - ?_test(begin - setup_jobs([continuous_running(1), continuous_running(2)]), - ?assertEqual({2, 0}, run_stop_count()), - reschedule(mock_state(1)), - ?assertEqual({1, 1}, run_stop_count()), - reschedule(mock_state(0)), - ?assertEqual({0, 2}, run_stop_count()) - end). - - -t_excess_prefer_continuous_first() -> - ?_test(begin - Jobs = [ - continuous_running(1), - oneshot_running(2), - continuous_running(3) - ], - setup_jobs(Jobs), - ?assertEqual({3, 0}, run_stop_count()), - ?assertEqual({1, 0}, oneshot_run_stop_count()), - reschedule(mock_state(2)), - ?assertEqual({2, 1}, run_stop_count()), - ?assertEqual({1, 0}, oneshot_run_stop_count()), - reschedule(mock_state(1)), - ?assertEqual({1, 0}, oneshot_run_stop_count()), - reschedule(mock_state(0)), - ?assertEqual({0, 1}, oneshot_run_stop_count()) - end). - - -t_stop_oldest_first() -> - ?_test(begin - Jobs = [ - continuous_running(7), - continuous_running(4), - continuous_running(5) - ], - setup_jobs(Jobs), - reschedule(mock_state(2, 1)), - ?assertEqual({2, 1}, run_stop_count()), - ?assertEqual([4], jobs_stopped()), - reschedule(mock_state(1, 1)), - ?assertEqual([7], jobs_running()) - end). - - -t_start_oldest_first() -> - ?_test(begin - setup_jobs([continuous(7), continuous(2), continuous(5)]), - reschedule(mock_state(1)), - ?assertEqual({1, 2}, run_stop_count()), - ?assertEqual([2], jobs_running()), - reschedule(mock_state(2)), - ?assertEqual({2, 1}, run_stop_count()), - % After rescheduling with max_jobs = 2, 2 was stopped and 5, 7 should - % be running. - ?assertEqual([2], jobs_stopped()) - end). - - -t_jobs_churn_even_if_not_all_max_jobs_are_running() -> - ?_test(begin - setup_jobs([ - continuous_running(7), - continuous(2), - continuous(5) - ]), - reschedule(mock_state(2, 2)), - ?assertEqual({2, 1}, run_stop_count()), - ?assertEqual([7], jobs_stopped()) - end). - - -t_jobs_dont_churn_if_there_are_available_running_slots() -> - ?_test(begin - setup_jobs([ - continuous_running(1), - continuous_running(2) - ]), - reschedule(mock_state(2, 2)), - ?assertEqual({2, 0}, run_stop_count()), - ?assertEqual([], jobs_stopped()), - ?assertEqual(0, meck:num_calls(couch_replicator_scheduler_sup, start_child, 1)) - end). - - -t_start_only_pending_jobs_do_not_churn_existing_ones() -> - ?_test(begin - setup_jobs([ - continuous(1), - continuous_running(2) - ]), - reschedule(mock_state(2, 2)), - ?assertEqual(1, meck:num_calls(couch_replicator_scheduler_sup, start_child, 1)), - ?assertEqual([], jobs_stopped()), - ?assertEqual({2, 0}, run_stop_count()) - end). - - -t_dont_stop_if_nothing_pending() -> - ?_test(begin - setup_jobs([continuous_running(1), continuous_running(2)]), - reschedule(mock_state(2)), - ?assertEqual({2, 0}, run_stop_count()) - end). - - -t_max_churn_limits_number_of_rotated_jobs() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous_running(2), - continuous(3), - continuous_running(4) - ], - setup_jobs(Jobs), - reschedule(mock_state(2, 1)), - ?assertEqual([2, 3], jobs_stopped()) - end). - - -t_if_pending_less_than_running_start_all_pending() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous_running(2), - continuous(3), - continuous_running(4), - continuous_running(5) - ], - setup_jobs(Jobs), - reschedule(mock_state(3)), - ?assertEqual([1, 2, 5], jobs_running()) - end). - - -t_running_less_than_pending_swap_all_running() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous(2), - continuous(3), - continuous_running(4), - continuous_running(5) - ], - setup_jobs(Jobs), - reschedule(mock_state(2)), - ?assertEqual([3, 4, 5], jobs_stopped()) - end). - - -t_oneshot_dont_get_rotated() -> - ?_test(begin - setup_jobs([oneshot_running(1), continuous(2)]), - reschedule(mock_state(1)), - ?assertEqual([1], jobs_running()) - end). - - -t_rotate_continuous_only_if_mixed() -> - ?_test(begin - setup_jobs([continuous(1), oneshot_running(2), continuous_running(3)]), - reschedule(mock_state(2)), - ?assertEqual([1, 2], jobs_running()) - end). - - -t_oneshot_dont_get_starting_priority() -> - ?_test(begin - setup_jobs([continuous(1), oneshot(2), continuous_running(3)]), - reschedule(mock_state(1)), - ?assertEqual([1], jobs_running()) - end). - - -% This tested in other test cases, it is here to mainly make explicit a property -% of one-shot replications -- they can starve other jobs if they "take control" -% of all the available scheduler slots. -t_oneshot_will_hog_the_scheduler() -> - ?_test(begin - Jobs = [ - oneshot_running(1), - oneshot_running(2), - oneshot(3), - continuous(4) - ], - setup_jobs(Jobs), - reschedule(mock_state(2)), - ?assertEqual([1, 2], jobs_running()) - end). - - -t_if_excess_is_trimmed_rotation_still_happens() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous_running(2), - continuous_running(3) - ], - setup_jobs(Jobs), - reschedule(mock_state(1)), - ?assertEqual([1], jobs_running()) - end). - - -t_if_transient_job_crashes_it_gets_removed() -> - ?_test(begin - Pid = mock_pid(), - Job = #job{ - id = job1, - pid = Pid, - history = [added()], - rep = #rep{db_name = null, options = [{continuous, true}]} - }, - setup_jobs([Job]), - ?assertEqual(1, ets:info(?MODULE, size)), - State = #state{max_history = 3, stats_pid = self()}, - {noreply, State} = handle_info({'DOWN', r1, process, Pid, failed}, - State), - ?assertEqual(0, ets:info(?MODULE, size)) - end). - - -t_if_permanent_job_crashes_it_stays_in_ets() -> - ?_test(begin - Pid = mock_pid(), - Job = #job{ - id = job1, - pid = Pid, - history = [added()], - rep = #rep{db_name = <<"db1">>, options = [{continuous, true}]} - }, - setup_jobs([Job]), - ?assertEqual(1, ets:info(?MODULE, size)), - State = #state{max_jobs =1, max_history = 3, stats_pid = self()}, - {noreply, State} = handle_info({'DOWN', r1, process, Pid, failed}, - State), - ?assertEqual(1, ets:info(?MODULE, size)), - [Job1] = ets:lookup(?MODULE, job1), - [Latest | _] = Job1#job.history, - ?assertMatch({{crashed, failed}, _}, Latest) - end). - - -t_existing_jobs() -> - ?_test(begin - Rep = #rep{ - id = job1, - db_name = <<"db">>, - source = <<"s">>, - target = <<"t">>, - options = [{continuous, true}] - }, - setup_jobs([#job{id = Rep#rep.id, rep = Rep}]), - NewRep = #rep{ - id = Rep#rep.id, - db_name = <<"db">>, - source = <<"s">>, - target = <<"t">>, - options = [{continuous, true}] - }, - ?assert(existing_replication(NewRep)), - ?assertNot(existing_replication(NewRep#rep{source = <<"s1">>})), - ?assertNot(existing_replication(NewRep#rep{target = <<"t1">>})), - ?assertNot(existing_replication(NewRep#rep{options = []})) - end). - - -t_job_summary_running() -> - ?_test(begin - Job = #job{ - id = job1, - pid = mock_pid(), - history = [added()], - rep = #rep{ - db_name = <<"db1">>, - source = <<"s">>, - target = <<"t">> - } - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(running, proplists:get_value(state, Summary)), - ?assertEqual(null, proplists:get_value(info, Summary)), - ?assertEqual(0, proplists:get_value(error_count, Summary)), - - Stats = [{source_seq, <<"1-abc">>}], - handle_cast({update_job_stats, job1, Stats}, mock_state(1)), - Summary1 = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual({Stats}, proplists:get_value(info, Summary1)) - end). - - -t_job_summary_pending() -> - ?_test(begin - Job = #job{ - id = job1, - pid = undefined, - history = [stopped(20), started(10), added()], - rep = #rep{source = <<"s">>, target = <<"t">>} - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(pending, proplists:get_value(state, Summary)), - ?assertEqual(null, proplists:get_value(info, Summary)), - ?assertEqual(0, proplists:get_value(error_count, Summary)), - - Stats = [{doc_write_failures, 1}], - handle_cast({update_job_stats, job1, Stats}, mock_state(1)), - Summary1 = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual({Stats}, proplists:get_value(info, Summary1)) - end). - - -t_job_summary_crashing_once() -> - ?_test(begin - Job = #job{ - id = job1, - history = [crashed(?DEFAULT_HEALTH_THRESHOLD_SEC + 1), started(0)], - rep = #rep{source = <<"s">>, target = <<"t">>} - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(crashing, proplists:get_value(state, Summary)), - Info = proplists:get_value(info, Summary), - ?assertEqual({[{<<"error">>, <<"some_reason">>}]}, Info), - ?assertEqual(0, proplists:get_value(error_count, Summary)) - end). - - -t_job_summary_crashing_many_times() -> - ?_test(begin - Job = #job{ - id = job1, - history = [crashed(4), started(3), crashed(2), started(1)], - rep = #rep{source = <<"s">>, target = <<"t">>} - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(crashing, proplists:get_value(state, Summary)), - Info = proplists:get_value(info, Summary), - ?assertEqual({[{<<"error">>, <<"some_reason">>}]}, Info), - ?assertEqual(2, proplists:get_value(error_count, Summary)) - end). - - -t_job_summary_proxy_fields() -> - ?_test(begin - Job = #job{ - id = job1, - history = [started(10), added()], - rep = #rep{ - source = #httpdb{ - url = "https://s", - proxy_url = "http://u:p@sproxy:12" - }, - target = #httpdb{ - url = "http://t", - proxy_url = "socks5://u:p@tproxy:34" - } - } - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(<<"http://u:*****@sproxy:12">>, - proplists:get_value(source_proxy, Summary)), - ?assertEqual(<<"socks5://u:*****@tproxy:34">>, - proplists:get_value(target_proxy, Summary)) - end). - - -% Test helper functions - -setup_all() -> - catch ets:delete(?MODULE), - meck:expect(couch_log, notice, 2, ok), - meck:expect(couch_log, warning, 2, ok), - meck:expect(couch_log, error, 2, ok), - meck:expect(couch_replicator_scheduler_sup, terminate_child, 1, ok), - meck:expect(couch_stats, increment_counter, 1, ok), - meck:expect(couch_stats, update_gauge, 2, ok), - Pid = mock_pid(), - meck:expect(couch_replicator_scheduler_sup, start_child, 1, {ok, Pid}). - - -teardown_all(_) -> - catch ets:delete(?MODULE), - meck:unload(). - - -setup() -> - meck:reset([ - couch_log, - couch_replicator_scheduler_sup, - couch_stats - ]). - - -teardown(_) -> - ok. - - -setup_jobs(Jobs) when is_list(Jobs) -> - ?MODULE = ets:new(?MODULE, [named_table, {keypos, #job.id}]), - ets:insert(?MODULE, Jobs). - - -all_jobs() -> - lists:usort(ets:tab2list(?MODULE)). - - -jobs_stopped() -> - [Job#job.id || Job <- all_jobs(), Job#job.pid =:= undefined]. - - -jobs_running() -> - [Job#job.id || Job <- all_jobs(), Job#job.pid =/= undefined]. - - -run_stop_count() -> - {length(jobs_running()), length(jobs_stopped())}. - - -oneshot_run_stop_count() -> - Running = [Job#job.id || Job <- all_jobs(), Job#job.pid =/= undefined, - not is_continuous(Job)], - Stopped = [Job#job.id || Job <- all_jobs(), Job#job.pid =:= undefined, - not is_continuous(Job)], - {length(Running), length(Stopped)}. - - -mock_state(MaxJobs) -> - #state{ - max_jobs = MaxJobs, - max_churn = ?DEFAULT_MAX_CHURN, - max_history = ?DEFAULT_MAX_HISTORY, - stats_pid = self() - }. - -mock_state(MaxJobs, MaxChurn) -> - #state{ - max_jobs = MaxJobs, - max_churn = MaxChurn, - max_history = ?DEFAULT_MAX_HISTORY, - stats_pid = self() - }. - - -continuous(Id) when is_integer(Id) -> - Started = Id, - Hist = [stopped(Started+1), started(Started), added()], - #job{ - id = Id, - history = Hist, - rep = #rep{options = [{continuous, true}]} - }. - - -continuous_running(Id) when is_integer(Id) -> - Started = Id, - Pid = mock_pid(), - #job{ - id = Id, - history = [started(Started), added()], - rep = #rep{options = [{continuous, true}]}, - pid = Pid, - monitor = monitor(process, Pid) - }. - - -oneshot(Id) when is_integer(Id) -> - Started = Id, - Hist = [stopped(Started + 1), started(Started), added()], - #job{id = Id, history = Hist, rep = #rep{options = []}}. - - -oneshot_running(Id) when is_integer(Id) -> - Started = Id, - Pid = mock_pid(), - #job{ - id = Id, - history = [started(Started), added()], - rep = #rep{options = []}, - pid = Pid, - monitor = monitor(process, Pid) - }. - - -testjob(Hist) when is_list(Hist) -> - #job{history = Hist}. - - -mock_pid() -> - list_to_pid("<0.999.999>"). - -crashed() -> - crashed(0). - - -crashed(WhenSec) when is_integer(WhenSec)-> - {{crashed, some_reason}, {0, WhenSec, 0}}; -crashed({MSec, Sec, USec}) -> - {{crashed, some_reason}, {MSec, Sec, USec}}. - - -started() -> - started(0). - - -started(WhenSec) when is_integer(WhenSec)-> - {started, {0, WhenSec, 0}}; - -started({MSec, Sec, USec}) -> - {started, {MSec, Sec, USec}}. - - -stopped() -> - stopped(0). - - -stopped(WhenSec) -> - {stopped, {0, WhenSec, 0}}. - - -added() -> - {added, {0, 0, 0}}. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_scheduler.hrl b/src/couch_replicator/src/couch_replicator_scheduler.hrl deleted file mode 100644 index 5203b0caa..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler.hrl +++ /dev/null @@ -1,15 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - - --type job_id() :: term(). --type job_args() :: term(). diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl deleted file mode 100644 index 0b33419e1..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl +++ /dev/null @@ -1,1090 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_scheduler_job). - --behaviour(gen_server). - --export([ - start_link/1 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3, - format_status/2 -]). - --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --include("couch_replicator_scheduler.hrl"). --include("couch_replicator.hrl"). - --import(couch_util, [ - get_value/2, - get_value/3, - to_binary/1 -]). - --import(couch_replicator_utils, [ - pp_rep_id/1 -]). - - --define(LOWEST_SEQ, 0). --define(DEFAULT_CHECKPOINT_INTERVAL, 30000). --define(STARTUP_JITTER_DEFAULT, 5000). - --record(rep_state, { - rep_details, - source_name, - target_name, - source, - target, - history, - checkpoint_history, - start_seq, - committed_seq, - current_through_seq, - seqs_in_progress = [], - highest_seq_done = {0, ?LOWEST_SEQ}, - source_log, - target_log, - rep_starttime, - src_starttime, - tgt_starttime, - timer, % checkpoint timer - changes_queue, - changes_manager, - changes_reader, - workers, - stats = couch_replicator_stats:new(), - session_id, - source_seq = nil, - use_checkpoints = true, - checkpoint_interval = ?DEFAULT_CHECKPOINT_INTERVAL, - type = db, - view = nil -}). - - -start_link(#rep{id = {BaseId, Ext}, source = Src, target = Tgt} = Rep) -> - RepChildId = BaseId ++ Ext, - Source = couch_replicator_api_wrap:db_uri(Src), - Target = couch_replicator_api_wrap:db_uri(Tgt), - ServerName = {global, {?MODULE, Rep#rep.id}}, - - case gen_server:start_link(ServerName, ?MODULE, Rep, []) of - {ok, Pid} -> - {ok, Pid}; - {error, Reason} -> - couch_log:warning("failed to start replication `~s` (`~s` -> `~s`)", - [RepChildId, Source, Target]), - {error, Reason} - end. - - -init(InitArgs) -> - {ok, InitArgs, 0}. - - -do_init(#rep{options = Options, id = {BaseId, Ext}, user_ctx=UserCtx} = Rep) -> - process_flag(trap_exit, true), - - timer:sleep(startup_jitter()), - - #rep_state{ - source = Source, - target = Target, - source_name = SourceName, - target_name = TargetName, - start_seq = {_Ts, StartSeq}, - highest_seq_done = {_, HighestSeq}, - checkpoint_interval = CheckpointInterval - } = State = init_state(Rep), - - NumWorkers = get_value(worker_processes, Options), - BatchSize = get_value(worker_batch_size, Options), - {ok, ChangesQueue} = couch_work_queue:new([ - {max_items, BatchSize * NumWorkers * 2}, - {max_size, 100 * 1024 * NumWorkers} - ]), - % This starts the _changes reader process. It adds the changes from - % the source db to the ChangesQueue. - {ok, ChangesReader} = couch_replicator_changes_reader:start_link( - StartSeq, Source, ChangesQueue, Options - ), - % Changes manager - responsible for dequeing batches from the changes queue - % and deliver them to the worker processes. - ChangesManager = spawn_changes_manager(self(), ChangesQueue, BatchSize), - % This starts the worker processes. They ask the changes queue manager for a - % a batch of _changes rows to process -> check which revs are missing in the - % target, and for the missing ones, it copies them from the source to the target. - MaxConns = get_value(http_connections, Options), - Workers = lists:map( - fun(_) -> - couch_stats:increment_counter([couch_replicator, workers_started]), - {ok, Pid} = couch_replicator_worker:start_link( - self(), Source, Target, ChangesManager, MaxConns), - Pid - end, - lists:seq(1, NumWorkers)), - - couch_task_status:add_task([ - {type, replication}, - {user, UserCtx#user_ctx.name}, - {replication_id, ?l2b(BaseId ++ Ext)}, - {database, Rep#rep.db_name}, - {doc_id, Rep#rep.doc_id}, - {source, ?l2b(SourceName)}, - {target, ?l2b(TargetName)}, - {continuous, get_value(continuous, Options, false)}, - {source_seq, HighestSeq}, - {checkpoint_interval, CheckpointInterval} - ] ++ rep_stats(State)), - couch_task_status:set_update_frequency(1000), - - % Until OTP R14B03: - % - % Restarting a temporary supervised child implies that the original arguments - % (#rep{} record) specified in the MFA component of the supervisor - % child spec will always be used whenever the child is restarted. - % This implies the same replication performance tunning parameters will - % always be used. The solution is to delete the child spec (see - % cancel_replication/1) and then start the replication again, but this is - % unfortunately not immune to race conditions. - - log_replication_start(State), - couch_log:debug("Worker pids are: ~p", [Workers]), - - doc_update_triggered(Rep), - - {ok, State#rep_state{ - changes_queue = ChangesQueue, - changes_manager = ChangesManager, - changes_reader = ChangesReader, - workers = Workers - } - }. - - -handle_call({add_stats, Stats}, From, State) -> - gen_server:reply(From, ok), - NewStats = couch_replicator_utils:sum_stats(State#rep_state.stats, Stats), - {noreply, State#rep_state{stats = NewStats}}; - -handle_call({report_seq_done, Seq, StatsInc}, From, - #rep_state{seqs_in_progress = SeqsInProgress, highest_seq_done = HighestDone, - current_through_seq = ThroughSeq, stats = Stats} = State) -> - gen_server:reply(From, ok), - {NewThroughSeq0, NewSeqsInProgress} = case SeqsInProgress of - [] -> - {Seq, []}; - [Seq | Rest] -> - {Seq, Rest}; - [_ | _] -> - {ThroughSeq, ordsets:del_element(Seq, SeqsInProgress)} - end, - NewHighestDone = lists:max([HighestDone, Seq]), - NewThroughSeq = case NewSeqsInProgress of - [] -> - lists:max([NewThroughSeq0, NewHighestDone]); - _ -> - NewThroughSeq0 - end, - couch_log:debug("Worker reported seq ~p, through seq was ~p, " - "new through seq is ~p, highest seq done was ~p, " - "new highest seq done is ~p~n" - "Seqs in progress were: ~p~nSeqs in progress are now: ~p", - [Seq, ThroughSeq, NewThroughSeq, HighestDone, - NewHighestDone, SeqsInProgress, NewSeqsInProgress]), - NewState = State#rep_state{ - stats = couch_replicator_utils:sum_stats(Stats, StatsInc), - current_through_seq = NewThroughSeq, - seqs_in_progress = NewSeqsInProgress, - highest_seq_done = NewHighestDone - }, - update_task(NewState), - {noreply, NewState}. - - -handle_cast(checkpoint, State) -> - case do_checkpoint(State) of - {ok, NewState} -> - couch_stats:increment_counter([couch_replicator, checkpoints, success]), - {noreply, NewState#rep_state{timer = start_timer(State)}}; - Error -> - couch_stats:increment_counter([couch_replicator, checkpoints, failure]), - {stop, Error, State} - end; - -handle_cast({report_seq, Seq}, - #rep_state{seqs_in_progress = SeqsInProgress} = State) -> - NewSeqsInProgress = ordsets:add_element(Seq, SeqsInProgress), - {noreply, State#rep_state{seqs_in_progress = NewSeqsInProgress}}. - - -handle_info(shutdown, St) -> - {stop, shutdown, St}; - -handle_info({'EXIT', Pid, max_backoff}, State) -> - couch_log:error("Max backoff reached child process ~p", [Pid]), - {stop, {shutdown, max_backoff}, State}; - -handle_info({'EXIT', Pid, {shutdown, max_backoff}}, State) -> - couch_log:error("Max backoff reached child process ~p", [Pid]), - {stop, {shutdown, max_backoff}, State}; - -handle_info({'EXIT', Pid, normal}, #rep_state{changes_reader=Pid} = State) -> - {noreply, State}; - -handle_info({'EXIT', Pid, Reason0}, #rep_state{changes_reader=Pid} = State) -> - couch_stats:increment_counter([couch_replicator, changes_reader_deaths]), - Reason = case Reason0 of - {changes_req_failed, _, _} = HttpFail -> - HttpFail; - {http_request_failed, _, _, {error, {code, Code}}} -> - {changes_req_failed, Code}; - {http_request_failed, _, _, {error, Err}} -> - {changes_req_failed, Err}; - Other -> - {changes_reader_died, Other} - end, - couch_log:error("ChangesReader process died with reason: ~p", [Reason]), - {stop, {shutdown, Reason}, cancel_timer(State)}; - -handle_info({'EXIT', Pid, normal}, #rep_state{changes_manager = Pid} = State) -> - {noreply, State}; - -handle_info({'EXIT', Pid, Reason}, #rep_state{changes_manager = Pid} = State) -> - couch_stats:increment_counter([couch_replicator, changes_manager_deaths]), - couch_log:error("ChangesManager process died with reason: ~p", [Reason]), - {stop, {shutdown, {changes_manager_died, Reason}}, cancel_timer(State)}; - -handle_info({'EXIT', Pid, normal}, #rep_state{changes_queue=Pid} = State) -> - {noreply, State}; - -handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) -> - couch_stats:increment_counter([couch_replicator, changes_queue_deaths]), - couch_log:error("ChangesQueue process died with reason: ~p", [Reason]), - {stop, {shutdown, {changes_queue_died, Reason}}, cancel_timer(State)}; - -handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) -> - case Workers -- [Pid] of - Workers -> - couch_log:error("unknown pid bit the dust ~p ~n",[Pid]), - {noreply, State#rep_state{workers = Workers}}; - %% not clear why a stop was here before - %%{stop, {unknown_process_died, Pid, normal}, State}; - [] -> - catch unlink(State#rep_state.changes_manager), - catch exit(State#rep_state.changes_manager, kill), - do_last_checkpoint(State); - Workers2 -> - {noreply, State#rep_state{workers = Workers2}} - end; - -handle_info({'EXIT', Pid, Reason}, #rep_state{workers = Workers} = State) -> - State2 = cancel_timer(State), - case lists:member(Pid, Workers) of - false -> - {stop, {unknown_process_died, Pid, Reason}, State2}; - true -> - couch_stats:increment_counter([couch_replicator, worker_deaths]), - StopReason = case Reason of - {shutdown, _} = Err -> - Err; - Other -> - couch_log:error("Worker ~p died with reason: ~p", [Pid, Reason]), - {worker_died, Pid, Other} - end, - {stop, StopReason, State2} - end; - -handle_info(timeout, InitArgs) -> - try do_init(InitArgs) of {ok, State} -> - {noreply, State} - catch - exit:{http_request_failed, _, _, max_backoff} -> - {stop, {shutdown, max_backoff}, {error, InitArgs}}; - Class:Error -> - ShutdownReason = {error, replication_start_error(Error)}, - StackTop2 = lists:sublist(erlang:get_stacktrace(), 2), - % Shutdown state is a hack as it is not really the state of the - % gen_server (it failed to initialize, so it doesn't have one). - % Shutdown state is used to pass extra info about why start failed. - ShutdownState = {error, Class, StackTop2, InitArgs}, - {stop, {shutdown, ShutdownReason}, ShutdownState} - end. - - -terminate(normal, #rep_state{rep_details = #rep{id = RepId} = Rep, - checkpoint_history = CheckpointHistory} = State) -> - terminate_cleanup(State), - couch_replicator_notifier:notify({finished, RepId, CheckpointHistory}), - doc_update_completed(Rep, rep_stats(State)); - -terminate(shutdown, #rep_state{rep_details = #rep{id = RepId}} = State) -> - % Replication stopped via _scheduler_sup:terminate_child/1, which can be - % occur during regular scheduler operation or when job is removed from - % the scheduler. - State1 = case do_checkpoint(State) of - {ok, NewState} -> - NewState; - Error -> - LogMsg = "~p : Failed last checkpoint. Job: ~p Error: ~p", - couch_log:error(LogMsg, [?MODULE, RepId, Error]), - State - end, - couch_replicator_notifier:notify({stopped, RepId, <<"stopped">>}), - terminate_cleanup(State1); - -terminate({shutdown, max_backoff}, {error, InitArgs}) -> - #rep{id = {BaseId, Ext} = RepId} = InitArgs, - couch_stats:increment_counter([couch_replicator, failed_starts]), - couch_log:warning("Replication `~s` reached max backoff ", [BaseId ++ Ext]), - couch_replicator_notifier:notify({error, RepId, max_backoff}); - -terminate({shutdown, {error, Error}}, {error, Class, Stack, InitArgs}) -> - #rep{ - id = {BaseId, Ext} = RepId, - source = Source0, - target = Target0, - doc_id = DocId, - db_name = DbName - } = InitArgs, - Source = couch_replicator_api_wrap:db_uri(Source0), - Target = couch_replicator_api_wrap:db_uri(Target0), - RepIdStr = BaseId ++ Ext, - Msg = "~p:~p: Replication ~s failed to start ~p -> ~p doc ~p:~p stack:~p", - couch_log:error(Msg, [Class, Error, RepIdStr, Source, Target, DbName, - DocId, Stack]), - couch_stats:increment_counter([couch_replicator, failed_starts]), - couch_replicator_notifier:notify({error, RepId, Error}); - -terminate({shutdown, max_backoff}, State) -> - #rep_state{ - source_name = Source, - target_name = Target, - rep_details = #rep{id = {BaseId, Ext} = RepId} - } = State, - couch_log:error("Replication `~s` (`~s` -> `~s`) reached max backoff", - [BaseId ++ Ext, Source, Target]), - terminate_cleanup(State), - couch_replicator_notifier:notify({error, RepId, max_backoff}); - -terminate({shutdown, Reason}, State) -> - % Unwrap so when reporting we don't have an extra {shutdown, ...} tuple - % wrapped around the message - terminate(Reason, State); - -terminate(Reason, State) -> -#rep_state{ - source_name = Source, - target_name = Target, - rep_details = #rep{id = {BaseId, Ext} = RepId} - } = State, - couch_log:error("Replication `~s` (`~s` -> `~s`) failed: ~s", - [BaseId ++ Ext, Source, Target, to_binary(Reason)]), - terminate_cleanup(State), - couch_replicator_notifier:notify({error, RepId, Reason}). - -terminate_cleanup(State) -> - update_task(State), - couch_replicator_api_wrap:db_close(State#rep_state.source), - couch_replicator_api_wrap:db_close(State#rep_state.target). - - -code_change(_OldVsn, #rep_state{}=State, _Extra) -> - {ok, State}. - - -format_status(_Opt, [_PDict, State]) -> - #rep_state{ - source = Source, - target = Target, - rep_details = RepDetails, - start_seq = StartSeq, - source_seq = SourceSeq, - committed_seq = CommitedSeq, - current_through_seq = ThroughSeq, - highest_seq_done = HighestSeqDone, - session_id = SessionId - } = state_strip_creds(State), - #rep{ - id = RepId, - options = Options, - doc_id = DocId, - db_name = DbName - } = RepDetails, - [ - {rep_id, RepId}, - {source, couch_replicator_api_wrap:db_uri(Source)}, - {target, couch_replicator_api_wrap:db_uri(Target)}, - {db_name, DbName}, - {doc_id, DocId}, - {options, Options}, - {session_id, SessionId}, - {start_seq, StartSeq}, - {source_seq, SourceSeq}, - {committed_seq, CommitedSeq}, - {current_through_seq, ThroughSeq}, - {highest_seq_done, HighestSeqDone} - ]. - - -startup_jitter() -> - Jitter = config:get_integer("replicator", "startup_jitter", - ?STARTUP_JITTER_DEFAULT), - couch_rand:uniform(erlang:max(1, Jitter)). - - -headers_strip_creds([], Acc) -> - lists:reverse(Acc); -headers_strip_creds([{Key, Value0} | Rest], Acc) -> - Value = case string:to_lower(Key) of - "authorization" -> - "****"; - _ -> - Value0 - end, - headers_strip_creds(Rest, [{Key, Value} | Acc]). - - -httpdb_strip_creds(#httpdb{url = Url, headers = Headers} = HttpDb) -> - HttpDb#httpdb{ - url = couch_util:url_strip_password(Url), - headers = headers_strip_creds(Headers, []) - }; -httpdb_strip_creds(LocalDb) -> - LocalDb. - - -rep_strip_creds(#rep{source = Source, target = Target} = Rep) -> - Rep#rep{ - source = httpdb_strip_creds(Source), - target = httpdb_strip_creds(Target) - }. - - -state_strip_creds(#rep_state{rep_details = Rep, source = Source, target = Target} = State) -> - % #rep_state contains the source and target at the top level and also - % in the nested #rep_details record - State#rep_state{ - rep_details = rep_strip_creds(Rep), - source = httpdb_strip_creds(Source), - target = httpdb_strip_creds(Target) - }. - - -adjust_maxconn(Src = #httpdb{http_connections = 1}, RepId) -> - Msg = "Adjusting minimum number of HTTP source connections to 2 for ~p", - couch_log:notice(Msg, [RepId]), - Src#httpdb{http_connections = 2}; -adjust_maxconn(Src, _RepId) -> - Src. - - --spec doc_update_triggered(#rep{}) -> ok. -doc_update_triggered(#rep{db_name = null}) -> - ok; -doc_update_triggered(#rep{id = RepId, doc_id = DocId} = Rep) -> - case couch_replicator_doc_processor:update_docs() of - true -> - couch_replicator_docs:update_triggered(Rep, RepId); - false -> - ok - end, - couch_log:notice("Document `~s` triggered replication `~s`", - [DocId, pp_rep_id(RepId)]), - ok. - - --spec doc_update_completed(#rep{}, list()) -> ok. -doc_update_completed(#rep{db_name = null}, _Stats) -> - ok; -doc_update_completed(#rep{id = RepId, doc_id = DocId, db_name = DbName, - start_time = StartTime}, Stats0) -> - Stats = Stats0 ++ [{start_time, couch_replicator_utils:iso8601(StartTime)}], - couch_replicator_docs:update_doc_completed(DbName, DocId, Stats), - couch_log:notice("Replication `~s` completed (triggered by `~s`)", - [pp_rep_id(RepId), DocId]), - ok. - - -do_last_checkpoint(#rep_state{seqs_in_progress = [], - highest_seq_done = {_Ts, ?LOWEST_SEQ}} = State) -> - {stop, normal, cancel_timer(State)}; -do_last_checkpoint(#rep_state{seqs_in_progress = [], - highest_seq_done = Seq} = State) -> - case do_checkpoint(State#rep_state{current_through_seq = Seq}) of - {ok, NewState} -> - couch_stats:increment_counter([couch_replicator, checkpoints, success]), - {stop, normal, cancel_timer(NewState)}; - Error -> - couch_stats:increment_counter([couch_replicator, checkpoints, failure]), - {stop, Error, State} - end. - - -start_timer(State) -> - After = State#rep_state.checkpoint_interval, - case timer:apply_after(After, gen_server, cast, [self(), checkpoint]) of - {ok, Ref} -> - Ref; - Error -> - couch_log:error("Replicator, error scheduling checkpoint: ~p", [Error]), - nil - end. - - -cancel_timer(#rep_state{timer = nil} = State) -> - State; -cancel_timer(#rep_state{timer = Timer} = State) -> - {ok, cancel} = timer:cancel(Timer), - State#rep_state{timer = nil}. - - -init_state(Rep) -> - #rep{ - id = {BaseId, _Ext}, - source = Src0, target = Tgt, - options = Options, - type = Type, view = View, - start_time = StartTime, - stats = ArgStats0 - } = Rep, - % Adjust minimum number of http source connections to 2 to avoid deadlock - Src = adjust_maxconn(Src0, BaseId), - {ok, Source} = couch_replicator_api_wrap:db_open(Src), - {CreateTargetParams} = get_value(create_target_params, Options, {[]}), - {ok, Target} = couch_replicator_api_wrap:db_open(Tgt, - get_value(create_target, Options, false), CreateTargetParams), - - {ok, SourceInfo} = couch_replicator_api_wrap:get_db_info(Source), - {ok, TargetInfo} = couch_replicator_api_wrap:get_db_info(Target), - - [SourceLog, TargetLog] = find_and_migrate_logs([Source, Target], Rep), - - {StartSeq0, History} = compare_replication_logs(SourceLog, TargetLog), - - ArgStats1 = couch_replicator_stats:new(ArgStats0), - HistoryStats = case History of - [{[_ | _] = HProps} | _] -> couch_replicator_stats:new(HProps); - _ -> couch_replicator_stats:new() - end, - Stats = couch_replicator_stats:max_stats(ArgStats1, HistoryStats), - - StartSeq1 = get_value(since_seq, Options, StartSeq0), - StartSeq = {0, StartSeq1}, - - SourceSeq = get_value(<<"update_seq">>, SourceInfo, ?LOWEST_SEQ), - - #doc{body={CheckpointHistory}} = SourceLog, - State = #rep_state{ - rep_details = Rep, - source_name = couch_replicator_api_wrap:db_uri(Source), - target_name = couch_replicator_api_wrap:db_uri(Target), - source = Source, - target = Target, - history = History, - checkpoint_history = {[{<<"no_changes">>, true}| CheckpointHistory]}, - start_seq = StartSeq, - current_through_seq = StartSeq, - committed_seq = StartSeq, - source_log = SourceLog, - target_log = TargetLog, - rep_starttime = StartTime, - src_starttime = get_value(<<"instance_start_time">>, SourceInfo), - tgt_starttime = get_value(<<"instance_start_time">>, TargetInfo), - session_id = couch_uuids:random(), - source_seq = SourceSeq, - use_checkpoints = get_value(use_checkpoints, Options, true), - checkpoint_interval = get_value(checkpoint_interval, Options, - ?DEFAULT_CHECKPOINT_INTERVAL), - type = Type, - view = View, - stats = Stats - }, - State#rep_state{timer = start_timer(State)}. - - -find_and_migrate_logs(DbList, #rep{id = {BaseId, _}} = Rep) -> - LogId = ?l2b(?LOCAL_DOC_PREFIX ++ BaseId), - fold_replication_logs(DbList, ?REP_ID_VERSION, LogId, LogId, Rep, []). - - -fold_replication_logs([], _Vsn, _LogId, _NewId, _Rep, Acc) -> - lists:reverse(Acc); - -fold_replication_logs([Db | Rest] = Dbs, Vsn, LogId, NewId, Rep, Acc) -> - case couch_replicator_api_wrap:open_doc(Db, LogId, [ejson_body]) of - {error, <<"not_found">>} when Vsn > 1 -> - OldRepId = couch_replicator_utils:replication_id(Rep, Vsn - 1), - fold_replication_logs(Dbs, Vsn - 1, - ?l2b(?LOCAL_DOC_PREFIX ++ OldRepId), NewId, Rep, Acc); - {error, <<"not_found">>} -> - fold_replication_logs( - Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [#doc{id = NewId} | Acc]); - {ok, Doc} when LogId =:= NewId -> - fold_replication_logs( - Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [Doc | Acc]); - {ok, Doc} -> - MigratedLog = #doc{id = NewId, body = Doc#doc.body}, - maybe_save_migrated_log(Rep, Db, MigratedLog, Doc#doc.id), - fold_replication_logs( - Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [MigratedLog | Acc]) - end. - - -maybe_save_migrated_log(Rep, Db, #doc{} = Doc, OldId) -> - case get_value(use_checkpoints, Rep#rep.options, true) of - true -> - update_checkpoint(Db, Doc), - Msg = "Migrated replication checkpoint. Db:~p ~p -> ~p", - couch_log:notice(Msg, [httpdb_strip_creds(Db), OldId, Doc#doc.id]); - false -> - ok - end. - - -spawn_changes_manager(Parent, ChangesQueue, BatchSize) -> - spawn_link(fun() -> - changes_manager_loop_open(Parent, ChangesQueue, BatchSize, 1) - end). - - -changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts) -> - receive - {get_changes, From} -> - case couch_work_queue:dequeue(ChangesQueue, BatchSize) of - closed -> - From ! {closed, self()}; - {ok, ChangesOrLastSeqs} -> - ReportSeq = case lists:last(ChangesOrLastSeqs) of - {last_seq, Seq} -> - {Ts, Seq}; - #doc_info{high_seq = Seq} -> - {Ts, Seq} - end, - Changes = lists:filter( - fun(#doc_info{}) -> - true; - ({last_seq, _Seq}) -> - false - end, ChangesOrLastSeqs), - ok = gen_server:cast(Parent, {report_seq, ReportSeq}), - From ! {changes, self(), Changes, ReportSeq} - end, - changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts + 1) - end. - - -do_checkpoint(#rep_state{use_checkpoints=false} = State) -> - NewState = State#rep_state{checkpoint_history = {[{<<"use_checkpoints">>, false}]} }, - {ok, NewState}; -do_checkpoint(#rep_state{current_through_seq=Seq, committed_seq=Seq} = State) -> - update_task(State), - {ok, State}; -do_checkpoint(State) -> - #rep_state{ - source_name=SourceName, - target_name=TargetName, - source = Source, - target = Target, - history = OldHistory, - start_seq = {_, StartSeq}, - current_through_seq = {_Ts, NewSeq} = NewTsSeq, - source_log = SourceLog, - target_log = TargetLog, - rep_starttime = ReplicationStartTime, - src_starttime = SrcInstanceStartTime, - tgt_starttime = TgtInstanceStartTime, - stats = Stats, - rep_details = #rep{options = Options}, - session_id = SessionId - } = State, - case commit_to_both(Source, Target) of - {source_error, Reason} -> - {checkpoint_commit_failure, - <<"Failure on source commit: ", (to_binary(Reason))/binary>>}; - {target_error, Reason} -> - {checkpoint_commit_failure, - <<"Failure on target commit: ", (to_binary(Reason))/binary>>}; - {SrcInstanceStartTime, TgtInstanceStartTime} -> - couch_log:notice("recording a checkpoint for `~s` -> `~s` at source update_seq ~p", - [SourceName, TargetName, NewSeq]), - LocalStartTime = calendar:now_to_local_time(ReplicationStartTime), - StartTime = ?l2b(httpd_util:rfc1123_date(LocalStartTime)), - EndTime = ?l2b(httpd_util:rfc1123_date()), - NewHistoryEntry = {[ - {<<"session_id">>, SessionId}, - {<<"start_time">>, StartTime}, - {<<"end_time">>, EndTime}, - {<<"start_last_seq">>, StartSeq}, - {<<"end_last_seq">>, NewSeq}, - {<<"recorded_seq">>, NewSeq}, - {<<"missing_checked">>, couch_replicator_stats:missing_checked(Stats)}, - {<<"missing_found">>, couch_replicator_stats:missing_found(Stats)}, - {<<"docs_read">>, couch_replicator_stats:docs_read(Stats)}, - {<<"docs_written">>, couch_replicator_stats:docs_written(Stats)}, - {<<"doc_write_failures">>, couch_replicator_stats:doc_write_failures(Stats)} - ]}, - BaseHistory = [ - {<<"session_id">>, SessionId}, - {<<"source_last_seq">>, NewSeq}, - {<<"replication_id_version">>, ?REP_ID_VERSION} - ] ++ case get_value(doc_ids, Options) of - undefined -> - []; - _DocIds -> - % backwards compatibility with the result of a replication by - % doc IDs in versions 0.11.x and 1.0.x - % TODO: deprecate (use same history format, simplify code) - [ - {<<"start_time">>, StartTime}, - {<<"end_time">>, EndTime}, - {<<"docs_read">>, couch_replicator_stats:docs_read(Stats)}, - {<<"docs_written">>, couch_replicator_stats:docs_written(Stats)}, - {<<"doc_write_failures">>, couch_replicator_stats:doc_write_failures(Stats)} - ] - end, - % limit history to 50 entries - NewRepHistory = { - BaseHistory ++ - [{<<"history">>, lists:sublist([NewHistoryEntry | OldHistory], 50)}] - }, - - try - {SrcRevPos, SrcRevId} = update_checkpoint( - Source, SourceLog#doc{body = NewRepHistory}, source), - {TgtRevPos, TgtRevId} = update_checkpoint( - Target, TargetLog#doc{body = NewRepHistory}, target), - NewState = State#rep_state{ - checkpoint_history = NewRepHistory, - committed_seq = NewTsSeq, - source_log = SourceLog#doc{revs={SrcRevPos, [SrcRevId]}}, - target_log = TargetLog#doc{revs={TgtRevPos, [TgtRevId]}} - }, - update_task(NewState), - {ok, NewState} - catch throw:{checkpoint_commit_failure, _} = Failure -> - Failure - end; - {SrcInstanceStartTime, _NewTgtInstanceStartTime} -> - {checkpoint_commit_failure, <<"Target database out of sync. " - "Try to increase max_dbs_open at the target's server.">>}; - {_NewSrcInstanceStartTime, TgtInstanceStartTime} -> - {checkpoint_commit_failure, <<"Source database out of sync. " - "Try to increase max_dbs_open at the source's server.">>}; - {_NewSrcInstanceStartTime, _NewTgtInstanceStartTime} -> - {checkpoint_commit_failure, <<"Source and target databases out of " - "sync. Try to increase max_dbs_open at both servers.">>} - end. - - -update_checkpoint(Db, Doc, DbType) -> - try - update_checkpoint(Db, Doc) - catch throw:{checkpoint_commit_failure, Reason} -> - throw({checkpoint_commit_failure, - <<"Error updating the ", (to_binary(DbType))/binary, - " checkpoint document: ", (to_binary(Reason))/binary>>}) - end. - - -update_checkpoint(Db, #doc{id = LogId, body = LogBody} = Doc) -> - try - case couch_replicator_api_wrap:update_doc(Db, Doc, [delay_commit]) of - {ok, PosRevId} -> - PosRevId; - {error, Reason} -> - throw({checkpoint_commit_failure, Reason}) - end - catch throw:conflict -> - case (catch couch_replicator_api_wrap:open_doc(Db, LogId, [ejson_body])) of - {ok, #doc{body = LogBody, revs = {Pos, [RevId | _]}}} -> - % This means that we were able to update successfully the - % checkpoint doc in a previous attempt but we got a connection - % error (timeout for e.g.) before receiving the success response. - % Therefore the request was retried and we got a conflict, as the - % revision we sent is not the current one. - % We confirm this by verifying the doc body we just got is the same - % that we have just sent. - {Pos, RevId}; - _ -> - throw({checkpoint_commit_failure, conflict}) - end - end. - - -commit_to_both(Source, Target) -> - % commit the src async - ParentPid = self(), - SrcCommitPid = spawn_link( - fun() -> - Result = (catch couch_replicator_api_wrap:ensure_full_commit(Source)), - ParentPid ! {self(), Result} - end), - - % commit tgt sync - TargetResult = (catch couch_replicator_api_wrap:ensure_full_commit(Target)), - - SourceResult = receive - {SrcCommitPid, Result} -> - unlink(SrcCommitPid), - receive {'EXIT', SrcCommitPid, _} -> ok after 0 -> ok end, - Result; - {'EXIT', SrcCommitPid, Reason} -> - {error, Reason} - end, - case TargetResult of - {ok, TargetStartTime} -> - case SourceResult of - {ok, SourceStartTime} -> - {SourceStartTime, TargetStartTime}; - SourceError -> - {source_error, SourceError} - end; - TargetError -> - {target_error, TargetError} - end. - - -compare_replication_logs(SrcDoc, TgtDoc) -> - #doc{body={RepRecProps}} = SrcDoc, - #doc{body={RepRecPropsTgt}} = TgtDoc, - case get_value(<<"session_id">>, RepRecProps) == - get_value(<<"session_id">>, RepRecPropsTgt) of - true -> - % if the records have the same session id, - % then we have a valid replication history - OldSeqNum = get_value(<<"source_last_seq">>, RepRecProps, ?LOWEST_SEQ), - OldHistory = get_value(<<"history">>, RepRecProps, []), - {OldSeqNum, OldHistory}; - false -> - SourceHistory = get_value(<<"history">>, RepRecProps, []), - TargetHistory = get_value(<<"history">>, RepRecPropsTgt, []), - couch_log:notice("Replication records differ. " - "Scanning histories to find a common ancestor.", []), - couch_log:debug("Record on source:~p~nRecord on target:~p~n", - [RepRecProps, RepRecPropsTgt]), - compare_rep_history(SourceHistory, TargetHistory) - end. - - -compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> - couch_log:notice("no common ancestry -- performing full replication", []), - {?LOWEST_SEQ, []}; -compare_rep_history([{S} | SourceRest], [{T} | TargetRest] = Target) -> - SourceId = get_value(<<"session_id">>, S), - case has_session_id(SourceId, Target) of - true -> - RecordSeqNum = get_value(<<"recorded_seq">>, S, ?LOWEST_SEQ), - couch_log:notice("found a common replication record with source_seq ~p", - [RecordSeqNum]), - {RecordSeqNum, SourceRest}; - false -> - TargetId = get_value(<<"session_id">>, T), - case has_session_id(TargetId, SourceRest) of - true -> - RecordSeqNum = get_value(<<"recorded_seq">>, T, ?LOWEST_SEQ), - couch_log:notice("found a common replication record with source_seq ~p", - [RecordSeqNum]), - {RecordSeqNum, TargetRest}; - false -> - compare_rep_history(SourceRest, TargetRest) - end - end. - - -has_session_id(_SessionId, []) -> - false; -has_session_id(SessionId, [{Props} | Rest]) -> - case get_value(<<"session_id">>, Props, nil) of - SessionId -> - true; - _Else -> - has_session_id(SessionId, Rest) - end. - - -get_pending_count(St) -> - Rep = St#rep_state.rep_details, - Timeout = get_value(connection_timeout, Rep#rep.options), - TimeoutMicro = Timeout * 1000, - case get(pending_count_state) of - {LastUpdate, PendingCount} -> - case timer:now_diff(os:timestamp(), LastUpdate) > TimeoutMicro of - true -> - NewPendingCount = get_pending_count_int(St), - put(pending_count_state, {os:timestamp(), NewPendingCount}), - NewPendingCount; - false -> - PendingCount - end; - undefined -> - NewPendingCount = get_pending_count_int(St), - put(pending_count_state, {os:timestamp(), NewPendingCount}), - NewPendingCount - end. - - -get_pending_count_int(#rep_state{source = #httpdb{} = Db0}=St) -> - {_, Seq} = St#rep_state.highest_seq_done, - Db = Db0#httpdb{retries = 3}, - case (catch couch_replicator_api_wrap:get_pending_count(Db, Seq)) of - {ok, Pending} -> - Pending; - _ -> - null - end; -get_pending_count_int(#rep_state{source = Db}=St) -> - {_, Seq} = St#rep_state.highest_seq_done, - {ok, Pending} = couch_replicator_api_wrap:get_pending_count(Db, Seq), - Pending. - - -update_task(State) -> - #rep_state{ - rep_details = #rep{id = JobId}, - current_through_seq = {_, ThroughSeq}, - highest_seq_done = {_, HighestSeq} - } = State, - Status = rep_stats(State) ++ [ - {source_seq, HighestSeq}, - {through_seq, ThroughSeq} - ], - couch_replicator_scheduler:update_job_stats(JobId, Status), - couch_task_status:update(Status). - - -rep_stats(State) -> - #rep_state{ - committed_seq = {_, CommittedSeq}, - stats = Stats - } = State, - [ - {revisions_checked, couch_replicator_stats:missing_checked(Stats)}, - {missing_revisions_found, couch_replicator_stats:missing_found(Stats)}, - {docs_read, couch_replicator_stats:docs_read(Stats)}, - {docs_written, couch_replicator_stats:docs_written(Stats)}, - {changes_pending, get_pending_count(State)}, - {doc_write_failures, couch_replicator_stats:doc_write_failures(Stats)}, - {checkpointed_source_seq, CommittedSeq} - ]. - - -replication_start_error({unauthorized, DbUri}) -> - {unauthorized, <<"unauthorized to access or create database ", DbUri/binary>>}; -replication_start_error({db_not_found, DbUri}) -> - {db_not_found, <<"could not open ", DbUri/binary>>}; -replication_start_error({http_request_failed, _Method, Url0, - {error, {error, {conn_failed, {error, nxdomain}}}}}) -> - Url = ?l2b(couch_util:url_strip_password(Url0)), - {nxdomain, <<"could not resolve ", Url/binary>>}; -replication_start_error({http_request_failed, Method0, Url0, - {error, {code, Code}}}) when is_integer(Code) -> - Url = ?l2b(couch_util:url_strip_password(Url0)), - Method = ?l2b(Method0), - {http_error_code, Code, <>}; -replication_start_error(Error) -> - Error. - - -log_replication_start(#rep_state{rep_details = Rep} = RepState) -> - #rep{ - id = {BaseId, Ext}, - doc_id = DocId, - db_name = DbName, - options = Options - } = Rep, - Id = BaseId ++ Ext, - Workers = get_value(worker_processes, Options), - BatchSize = get_value(worker_batch_size, Options), - #rep_state{ - source_name = Source, % credentials already stripped - target_name = Target, % credentials already stripped - session_id = Sid - } = RepState, - From = case DbName of - ShardName when is_binary(ShardName) -> - io_lib:format("from doc ~s:~s", [mem3:dbname(ShardName), DocId]); - _ -> - "from _replicate endpoint" - end, - Msg = "Starting replication ~s (~s -> ~s) ~s worker_procesess:~p" - " worker_batch_size:~p session_id:~s", - couch_log:notice(Msg, [Id, Source, Target, From, Workers, BatchSize, Sid]). - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - - -replication_start_error_test() -> - ?assertEqual({unauthorized, <<"unauthorized to access or create database" - " http://x/y">>}, replication_start_error({unauthorized, - <<"http://x/y">>})), - ?assertEqual({db_not_found, <<"could not open http://x/y">>}, - replication_start_error({db_not_found, <<"http://x/y">>})), - ?assertEqual({nxdomain,<<"could not resolve http://x/y">>}, - replication_start_error({http_request_failed, "GET", "http://x/y", - {error, {error, {conn_failed, {error, nxdomain}}}}})), - ?assertEqual({http_error_code,503,<<"GET http://x/y">>}, - replication_start_error({http_request_failed, "GET", "http://x/y", - {error, {code, 503}}})). - - -scheduler_job_format_status_test() -> - Source = <<"http://u:p@h1/d1">>, - Target = <<"http://u:p@h2/d2">>, - Rep = #rep{ - id = {"base", "+ext"}, - source = couch_replicator_docs:parse_rep_db(Source, [], []), - target = couch_replicator_docs:parse_rep_db(Target, [], []), - options = [{create_target, true}], - doc_id = <<"mydoc">>, - db_name = <<"mydb">> - }, - State = #rep_state{ - rep_details = Rep, - source = Rep#rep.source, - target = Rep#rep.target, - session_id = <<"a">>, - start_seq = <<"1">>, - source_seq = <<"2">>, - committed_seq = <<"3">>, - current_through_seq = <<"4">>, - highest_seq_done = <<"5">> - }, - Format = format_status(opts_ignored, [pdict, State]), - ?assertEqual("http://u:*****@h1/d1/", proplists:get_value(source, Format)), - ?assertEqual("http://u:*****@h2/d2/", proplists:get_value(target, Format)), - ?assertEqual({"base", "+ext"}, proplists:get_value(rep_id, Format)), - ?assertEqual([{create_target, true}], proplists:get_value(options, Format)), - ?assertEqual(<<"mydoc">>, proplists:get_value(doc_id, Format)), - ?assertEqual(<<"mydb">>, proplists:get_value(db_name, Format)), - ?assertEqual(<<"a">>, proplists:get_value(session_id, Format)), - ?assertEqual(<<"1">>, proplists:get_value(start_seq, Format)), - ?assertEqual(<<"2">>, proplists:get_value(source_seq, Format)), - ?assertEqual(<<"3">>, proplists:get_value(committed_seq, Format)), - ?assertEqual(<<"4">>, proplists:get_value(current_through_seq, Format)), - ?assertEqual(<<"5">>, proplists:get_value(highest_seq_done, Format)). - - --endif. diff --git a/src/couch_replicator/src/couch_replicator_scheduler_sup.erl b/src/couch_replicator/src/couch_replicator_scheduler_sup.erl deleted file mode 100644 index 8ab55f838..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler_sup.erl +++ /dev/null @@ -1,62 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_scheduler_sup). - --behaviour(supervisor). - -%% public api --export([ - start_link/0, - start_child/1, - terminate_child/1 -]). - -%% supervisor api --export([ - init/1 -]). - - -%% includes --include("couch_replicator.hrl"). - - -%% public functions - -start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). - - -start_child(#rep{} = Rep) -> - supervisor:start_child(?MODULE, [Rep]). - - -terminate_child(Pid) -> - supervisor:terminate_child(?MODULE, Pid). - -%% supervisor functions - -init(_Args) -> - Start = {couch_replicator_scheduler_job, start_link, []}, - Restart = temporary, % A crashed job is not entitled to immediate restart. - Shutdown = 5000, - Type = worker, - Modules = [couch_replicator_scheduler_job], - - RestartStrategy = simple_one_for_one, - MaxR = 10, - MaxT = 3, - - ChildSpec = - {undefined, Start, Restart, Shutdown, Type, Modules}, - {ok, {{RestartStrategy, MaxR, MaxT}, [ChildSpec]}}. diff --git a/src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl deleted file mode 100644 index 997c84863..000000000 --- a/src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl +++ /dev/null @@ -1,455 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_compact_tests). - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/src/couch_replicator.hrl"). - --import(couch_replicator_test_helper, [ - db_url/1, - get_pid/1 -]). - --define(ATTFILE, filename:join([?FIXTURESDIR, "logo.png"])). --define(DELAY, 500). --define(TIMEOUT, 360000). --define(TIMEOUT_WRITER, 100000). --define(TIMEOUT_EUNIT, ?TIMEOUT div 1000 + 70). --define(WRITE_BATCH_SIZE, 25). - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - -compact_test_() -> - Pairs = [{remote, remote}], - { - "Compaction during replication tests", - { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_populate_replicate_compact/2} - || Pair <- Pairs] - } - }. - - -should_populate_replicate_compact({From, To}, {_Ctx, {Source, Target}}) -> - {ok, RepPid, RepId} = replicate(Source, Target), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_run_replication(RepPid, RepId, Source, Target), - should_all_processes_be_alive(RepPid, Source, Target), - should_populate_and_compact(RepPid, Source, Target, 50, 3), - should_wait_target_in_sync(Source, Target), - should_ensure_replication_still_running(RepPid, RepId, Source, Target), - should_cancel_replication(RepId, RepPid), - should_compare_databases(Source, Target) - ]}}. - -should_all_processes_be_alive(RepPid, Source, Target) -> - ?_test(begin - {ok, SourceDb} = reopen_db(Source), - {ok, TargetDb} = reopen_db(Target), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(SourceDb))), - ?assert(is_process_alive(couch_db:get_pid(TargetDb))) - end). - -should_run_replication(RepPid, RepId, Source, Target) -> - ?_test(check_active_tasks(RepPid, RepId, Source, Target)). - -should_ensure_replication_still_running(RepPid, RepId, Source, Target) -> - ?_test(check_active_tasks(RepPid, RepId, Source, Target)). - -check_active_tasks(RepPid, {BaseId, Ext} = _RepId, Src, Tgt) -> - Source = case Src of - {remote, NameSrc} -> - <<(db_url(NameSrc))/binary, $/>>; - _ -> - Src - end, - Target = case Tgt of - {remote, NameTgt} -> - <<(db_url(NameTgt))/binary, $/>>; - _ -> - Tgt - end, - FullRepId = ?l2b(BaseId ++ Ext), - Pid = ?l2b(pid_to_list(RepPid)), - RepTasks = wait_for_task_status(), - ?assertNotEqual(timeout, RepTasks), - [RepTask] = RepTasks, - ?assertEqual(Pid, couch_util:get_value(pid, RepTask)), - ?assertEqual(FullRepId, couch_util:get_value(replication_id, RepTask)), - ?assertEqual(true, couch_util:get_value(continuous, RepTask)), - ?assertEqual(Source, couch_util:get_value(source, RepTask)), - ?assertEqual(Target, couch_util:get_value(target, RepTask)), - ?assert(is_integer(couch_util:get_value(docs_read, RepTask))), - ?assert(is_integer(couch_util:get_value(docs_written, RepTask))), - ?assert(is_integer(couch_util:get_value(doc_write_failures, RepTask))), - ?assert(is_integer(couch_util:get_value(revisions_checked, RepTask))), - ?assert(is_integer(couch_util:get_value(missing_revisions_found, RepTask))), - ?assert(is_integer(couch_util:get_value(checkpointed_source_seq, RepTask))), - ?assert(is_integer(couch_util:get_value(source_seq, RepTask))), - Pending = couch_util:get_value(changes_pending, RepTask), - ?assert(is_integer(Pending)). - -replication_tasks() -> - lists:filter(fun(P) -> - couch_util:get_value(type, P) =:= replication - end, couch_task_status:all()). - - -wait_for_task_status() -> - test_util:wait(fun() -> - case replication_tasks() of - [] -> - wait; - Tasks -> - Tasks - end - end). - -should_cancel_replication(RepId, RepPid) -> - ?_assertNot(begin - ok = couch_replicator_scheduler:remove_job(RepId), - is_process_alive(RepPid) - end). - -should_populate_and_compact(RepPid, Source, Target, BatchSize, Rounds) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(begin - {ok, SourceDb0} = reopen_db(Source), - Writer = spawn_writer(SourceDb0), - lists:foreach( - fun(N) -> - {ok, SourceDb} = reopen_db(Source), - {ok, TargetDb} = reopen_db(Target), - pause_writer(Writer), - - compact_db("source", SourceDb), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(SourceDb))), - wait_for_compaction("source", SourceDb), - - compact_db("target", TargetDb), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(TargetDb))), - wait_for_compaction("target", TargetDb), - - {ok, SourceDb2} = reopen_db(SourceDb), - {ok, TargetDb2} = reopen_db(TargetDb), - - resume_writer(Writer), - wait_writer(Writer, BatchSize * N), - - compact_db("source", SourceDb2), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(SourceDb2))), - pause_writer(Writer), - wait_for_compaction("source", SourceDb2), - resume_writer(Writer), - - compact_db("target", TargetDb2), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(TargetDb2))), - pause_writer(Writer), - wait_for_compaction("target", TargetDb2), - resume_writer(Writer) - end, lists:seq(1, Rounds)), - stop_writer(Writer) - end)}. - -should_wait_target_in_sync({remote, Source}, Target) -> - should_wait_target_in_sync(Source, Target); -should_wait_target_in_sync(Source, {remote, Target}) -> - should_wait_target_in_sync(Source, Target); -should_wait_target_in_sync(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_assert(begin - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, SourceInfo} = couch_db:get_db_info(SourceDb), - ok = couch_db:close(SourceDb), - SourceDocCount = couch_util:get_value(doc_count, SourceInfo), - wait_target_in_sync_loop(SourceDocCount, Target, 300) - end)}. - -wait_target_in_sync_loop(_DocCount, _TargetName, 0) -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, "Could not get source and target databases in sync"}]}); -wait_target_in_sync_loop(DocCount, {remote, TargetName}, RetriesLeft) -> - wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft); -wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> - {ok, Target} = couch_db:open_int(TargetName, []), - {ok, TargetInfo} = couch_db:get_db_info(Target), - ok = couch_db:close(Target), - TargetDocCount = couch_util:get_value(doc_count, TargetInfo), - case TargetDocCount == DocCount of - true -> - true; - false -> - ok = timer:sleep(?DELAY), - wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft - 1) - end. - -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, 35, ?_test(begin - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - Fun = fun(FullDocInfo, Acc) -> - {ok, Doc} = couch_db:open_doc(SourceDb, FullDocInfo), - {Props} = DocJson = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - DocTarget = case couch_db:open_doc(TargetDb, DocId) of - {ok, DocT} -> - DocT; - Error -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Error opening document '", - ?b2l(DocId), "' from target: ", - couch_util:to_list(Error)])}]}) - end, - DocTargetJson = couch_doc:to_json_obj(DocTarget, [attachments]), - ?assertEqual(DocJson, DocTargetJson), - {ok, Acc} - end, - {ok, _} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb) - end)}. - - -reopen_db({remote, Db}) -> - reopen_db(Db); -reopen_db(DbName) when is_binary(DbName) -> - {ok, Db} = couch_db:open_int(DbName, []), - ok = couch_db:close(Db), - {ok, Db}; -reopen_db(Db) -> - reopen_db(couch_db:name(Db)). - - -compact_db(Type, Db0) -> - Name = couch_db:name(Db0), - {ok, Db} = couch_db:open_int(Name, []), - {ok, CompactPid} = couch_db:start_compact(Db), - MonRef = erlang:monitor(process, CompactPid), - receive - {'DOWN', MonRef, process, CompactPid, normal} -> - ok; - {'DOWN', MonRef, process, CompactPid, noproc} -> - ok; - {'DOWN', MonRef, process, CompactPid, Reason} -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, - lists:concat(["Error compacting ", Type, " database ", - ?b2l(Name), ": ", - couch_util:to_list(Reason)])}]}) - after ?TIMEOUT -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Compaction for ", Type, " database ", - ?b2l(Name), " didn't finish"])}]}) - end, - ok = couch_db:close(Db). - -wait_for_compaction(Type, Db) -> - case couch_db:wait_for_compaction(Db) of - ok -> - ok; - {error, noproc} -> - ok; - {error, Reason} -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Compaction of ", Type, - " database failed with: ", Reason])}]}) - end. - -replicate({remote, Db}, Target) -> - replicate(db_url(Db), Target); - -replicate(Source, {remote, Db}) -> - replicate(Source, db_url(Db)); - -replicate(Source, Target) -> - RepObject = {[ - {<<"source">>, Source}, - {<<"target">>, Target}, - {<<"continuous">>, true} - ]}, - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - Pid = get_pid(Rep#rep.id), - {ok, Pid, Rep#rep.id}. - - -wait_writer(Pid, NumDocs) -> - case get_writer_num_docs_written(Pid) of - N when N >= NumDocs -> - ok; - _ -> - wait_writer(Pid, NumDocs) - end. - -spawn_writer(Db) -> - Parent = self(), - Pid = spawn(fun() -> writer_loop(Db, Parent, 0) end), - Pid. - - -pause_writer(Pid) -> - Ref = make_ref(), - Pid ! {pause, Ref}, - receive - {paused, Ref} -> - ok - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Failed to pause source database writer"}]}) - end. - -resume_writer(Pid) -> - Ref = make_ref(), - Pid ! {continue, Ref}, - receive - {ok, Ref} -> - ok - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Failed to pause source database writer"}]}) - end. - -get_writer_num_docs_written(Pid) -> - Ref = make_ref(), - Pid ! {get_count, Ref}, - receive - {count, Ref, Count} -> - Count - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Timeout getting number of documents written" - " from source database writer"}]}) - end. - -stop_writer(Pid) -> - Ref = make_ref(), - Pid ! {stop, Ref}, - receive - {stopped, Ref, DocsWritten} -> - MonRef = erlang:monitor(process, Pid), - receive - {'DOWN', MonRef, process, Pid, _Reason} -> - DocsWritten - after ?TIMEOUT -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Timeout stopping source database writer"}]}) - end - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Timeout stopping source database writer"}]}) - end. - -writer_loop(Db0, Parent, Counter) -> - DbName = couch_db:name(Db0), - {ok, Data} = file:read_file(?ATTFILE), - maybe_pause(Parent, Counter), - Docs = lists:map(fun(I) -> - couch_doc:from_json_obj({[ - {<<"_id">>, ?l2b(integer_to_list(Counter + I))}, - {<<"value">>, Counter + I}, - {<<"_attachments">>, {[ - {<<"icon1.png">>, {[ - {<<"data">>, base64:encode(Data)}, - {<<"content_type">>, <<"image/png">>} - ]}}, - {<<"icon2.png">>, {[ - {<<"data">>, base64:encode(iolist_to_binary([Data, Data]))}, - {<<"content_type">>, <<"image/png">>} - ]}} - ]}} - ]}) - end, lists:seq(1, ?WRITE_BATCH_SIZE)), - maybe_pause(Parent, Counter), - {ok, Db} = couch_db:open_int(DbName, []), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db), - receive - {get_count, Ref} -> - Parent ! {count, Ref, Counter + ?WRITE_BATCH_SIZE}, - writer_loop(Db, Parent, Counter + ?WRITE_BATCH_SIZE); - {stop, Ref} -> - Parent ! {stopped, Ref, Counter + ?WRITE_BATCH_SIZE} - after 0 -> - timer:sleep(?DELAY), - writer_loop(Db, Parent, Counter + ?WRITE_BATCH_SIZE) - end. - -maybe_pause(Parent, Counter) -> - receive - {get_count, Ref} -> - Parent ! {count, Ref, Counter}; - {pause, Ref} -> - Parent ! {paused, Ref}, - receive - {continue, Ref2} -> - Parent ! {ok, Ref2} - end - after 0 -> - ok - end. diff --git a/src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl deleted file mode 100644 index 6b4f95c25..000000000 --- a/src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl +++ /dev/null @@ -1,271 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_error_reporting_tests). - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/src/couch_replicator.hrl"). - - -setup_all() -> - test_util:start_couch([couch_replicator, chttpd, mem3, fabric]). - - -teardown_all(Ctx) -> - ok = test_util:stop_couch(Ctx). - - -setup() -> - meck:unload(), - Source = setup_db(), - Target = setup_db(), - {Source, Target}. - - -teardown({Source, Target}) -> - meck:unload(), - teardown_db(Source), - teardown_db(Target), - ok. - - -error_reporting_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - fun t_fail_bulk_docs/1, - fun t_fail_changes_reader/1, - fun t_fail_revs_diff/1, - fun t_fail_changes_queue/1, - fun t_fail_changes_manager/1, - fun t_fail_changes_reader_proc/1 - ] - } - }. - - -t_fail_bulk_docs({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - {ok, Listener} = rep_result_listener(RepId), - mock_fail_req("/_bulk_docs", {ok, "403", [], [<<"{\"x\":\"y\"}">>]}), - populate_db(Source, 6, 6), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({bulk_docs_failed, 403, {[{<<"x">>, <<"y">>}]}}, Result), - - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_reader({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - {ok, Listener} = rep_result_listener(RepId), - mock_fail_req("/_changes", {ok, "418", [], [<<"{\"x\":\"y\"}">>]}), - populate_db(Source, 6, 6), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_req_failed, 418, {[{<<"x">>, <<"y">>}]}}, Result), - - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_revs_diff({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - {ok, Listener} = rep_result_listener(RepId), - mock_fail_req("/_revs_diff", {ok, "407", [], [<<"{\"x\":\"y\"}">>]}), - populate_db(Source, 6, 6), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({revs_diff_failed, 407, {[{<<"x">>, <<"y">>}]}}, Result), - - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_queue({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - RepPid = couch_replicator_test_helper:get_pid(RepId), - State = sys:get_state(RepPid), - ChangesQueue = element(20, State), - ?assert(is_process_alive(ChangesQueue)), - - {ok, Listener} = rep_result_listener(RepId), - exit(ChangesQueue, boom), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_queue_died, boom}, Result), - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_manager({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - RepPid = couch_replicator_test_helper:get_pid(RepId), - State = sys:get_state(RepPid), - ChangesManager = element(21, State), - ?assert(is_process_alive(ChangesManager)), - - {ok, Listener} = rep_result_listener(RepId), - exit(ChangesManager, bam), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_manager_died, bam}, Result), - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_reader_proc({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - RepPid = couch_replicator_test_helper:get_pid(RepId), - State = sys:get_state(RepPid), - ChangesReader = element(22, State), - ?assert(is_process_alive(ChangesReader)), - - {ok, Listener} = rep_result_listener(RepId), - exit(ChangesReader, kapow), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_reader_died, kapow}, Result), - couch_replicator_notifier:stop(Listener) - end). - - -mock_fail_req(Path, Return) -> - meck:expect(ibrowse, send_req_direct, - fun(W, Url, Headers, Meth, Body, Opts, TOut) -> - Args = [W, Url, Headers, Meth, Body, Opts, TOut], - {ok, {_, _, _, _, UPath, _}} = http_uri:parse(Url), - case lists:suffix(Path, UPath) of - true -> Return; - false -> meck:passthrough(Args) - end - end). - - -rep_result_listener(RepId) -> - ReplyTo = self(), - {ok, _Listener} = couch_replicator_notifier:start_link( - fun({_, RepId2, _} = Ev) when RepId2 =:= RepId -> - ReplyTo ! Ev; - (_) -> - ok - end). - - -wait_rep_result(RepId) -> - receive - {finished, RepId, RepResult} -> {ok, RepResult}; - {error, RepId, Reason} -> {error, Reason} - end. - - - -setup_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -teardown_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - - -populate_db(DbName, Start, End) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Id = integer_to_binary(DocIdCounter), - Doc = #doc{id = Id, body = {[]}}, - [Doc | Acc] - end, - [], lists:seq(Start, End)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db). - - -wait_target_in_sync(Source, Target) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, SourceInfo} = couch_db:get_db_info(SourceDb), - ok = couch_db:close(SourceDb), - SourceDocCount = couch_util:get_value(doc_count, SourceInfo), - wait_target_in_sync_loop(SourceDocCount, Target, 300). - - -wait_target_in_sync_loop(_DocCount, _TargetName, 0) -> - erlang:error({assertion_failed, [ - {module, ?MODULE}, {line, ?LINE}, - {reason, "Could not get source and target databases in sync"} - ]}); - -wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> - {ok, Target} = couch_db:open_int(TargetName, []), - {ok, TargetInfo} = couch_db:get_db_info(Target), - ok = couch_db:close(Target), - TargetDocCount = couch_util:get_value(doc_count, TargetInfo), - case TargetDocCount == DocCount of - true -> - true; - false -> - ok = timer:sleep(500), - wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft - 1) - end. - - -replicate(Source, Target) -> - SrcUrl = couch_replicator_test_helper:db_url(Source), - TgtUrl = couch_replicator_test_helper:db_url(Target), - RepObject = {[ - {<<"source">>, SrcUrl}, - {<<"target">>, TgtUrl}, - {<<"continuous">>, true}, - {<<"worker_processes">>, 1}, - {<<"retries_per_request">>, 1}, - % Low connection timeout so _changes feed gets restarted quicker - {<<"connection_timeout">>, 3000} - ]}, - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - {ok, Rep#rep.id}. -- cgit v1.2.1 From b6e87f8a43eebb4d02dfa52227ba5b77cd4ebc68 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:31:38 -0400 Subject: Cleanup couch_replicator_utils module * Remove unused functions and some function used only from one place like `sum_stats/2`. * Update time functions to use the more modern `erlang:system_time/1` API. * `parse_int_param/5` and `parse_replication_states/1` was moved from the old _httpd_util module as they were they only ones need from there. * `default_headers_map/0` Used to the default httpd record headers as a map since part of the replication data will be kept as map object. * `proplist_options/1` Some parts of the replicator, like _httpc and _api_wrap still use proplist options, so this function can be used to translate options as maps to a proplist version. --- .../src/couch_replicator_stats.erl | 2 + .../src/couch_replicator_utils.erl | 241 ++++++++++++--------- .../src/couch_replicator_worker.erl | 2 +- 3 files changed, 137 insertions(+), 108 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_stats.erl b/src/couch_replicator/src/couch_replicator_stats.erl index 37848b3ee..69e60a05c 100644 --- a/src/couch_replicator/src/couch_replicator_stats.erl +++ b/src/couch_replicator/src/couch_replicator_stats.erl @@ -32,6 +32,8 @@ new() -> orddict:new(). +new(#{} = Map) -> + new(maps:to_list(Map)); new(Initializers0) when is_list(Initializers0) -> Initializers1 = lists:filtermap(fun fmap/1, Initializers0), orddict:from_list(Initializers1). diff --git a/src/couch_replicator/src/couch_replicator_utils.erl b/src/couch_replicator/src/couch_replicator_utils.erl index 5f608dee7..cbed78ead 100644 --- a/src/couch_replicator/src/couch_replicator_utils.erl +++ b/src/couch_replicator/src/couch_replicator_utils.erl @@ -13,19 +13,18 @@ -module(couch_replicator_utils). -export([ - parse_rep_doc/2, - replication_id/2, - sum_stats/2, - is_deleted/1, rep_error_to_binary/1, - get_json_value/2, - get_json_value/3, - pp_rep_id/1, + iso8601/0, iso8601/1, - filter_state/3, + rfc1123_local/0, + rfc1123_local/1, remove_basic_auth_from_headers/1, normalize_rep/1, - ejson_state_info/1 + compare_reps/2, + default_headers_map/0, + parse_replication_states/1, + parse_int_param/5, + proplist_options/1 ]). @@ -33,11 +32,6 @@ -include("couch_replicator.hrl"). -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --import(couch_util, [ - get_value/2, - get_value/3 -]). - rep_error_to_binary(Error) -> couch_util:to_binary(error_reason(Error)). @@ -54,77 +48,27 @@ error_reason(Reason) -> Reason. -get_json_value(Key, Props) -> - get_json_value(Key, Props, undefined). - -get_json_value(Key, Props, Default) when is_atom(Key) -> - Ref = make_ref(), - case get_value(Key, Props, Ref) of - Ref -> - get_value(?l2b(atom_to_list(Key)), Props, Default); - Else -> - Else - end; -get_json_value(Key, Props, Default) when is_binary(Key) -> - Ref = make_ref(), - case get_value(Key, Props, Ref) of - Ref -> - get_value(list_to_atom(?b2l(Key)), Props, Default); - Else -> - Else - end. - - -% pretty-print replication id --spec pp_rep_id(#rep{} | rep_id()) -> string(). -pp_rep_id(#rep{id = RepId}) -> - pp_rep_id(RepId); -pp_rep_id({Base, Extension}) -> - Base ++ Extension. - - -% NV: TODO: this function is not used outside api wrap module -% consider moving it there during final cleanup -is_deleted(Change) -> - get_json_value(<<"deleted">>, Change, false). - - -% NV: TODO: proxy some functions which used to be here, later remove -% these and replace calls to their respective modules -replication_id(Rep, Version) -> - couch_replicator_ids:replication_id(Rep, Version). +-spec iso8601() -> binary(). +iso8601() -> + iso8601(erlang:system_time(second)). -sum_stats(S1, S2) -> - couch_replicator_stats:sum_stats(S1, S2). - - -parse_rep_doc(Props, UserCtx) -> - couch_replicator_docs:parse_rep_doc(Props, UserCtx). - - --spec iso8601(erlang:timestamp()) -> binary(). -iso8601({_Mega, _Sec, _Micro} = Timestamp) -> - {{Y, Mon, D}, {H, Min, S}} = calendar:now_to_universal_time(Timestamp), +-spec iso8601(integer()) -> binary(). +iso8601(Sec) when is_integer(Sec) -> + Time = unix_sec_to_timestamp(Sec), + {{Y, Mon, D}, {H, Min, S}} = calendar:now_to_universal_time(Time), Format = "~B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0BZ", iolist_to_binary(io_lib:format(Format, [Y, Mon, D, H, Min, S])). -%% Filter replication info ejson by state provided. If it matches return -%% the input value, if it doesn't return 'skip'. This is used from replicator -%% fabric coordinator and worker. --spec filter_state(atom(), [atom()], {[_ | _]}) -> {[_ | _]} | skip. -filter_state(null = _State, _States, _Info) -> - skip; -filter_state(_ = _State, [] = _States, Info) -> - Info; -filter_state(State, States, Info) -> - case lists:member(State, States) of - true -> - Info; - false -> - skip - end. +rfc1123_local() -> + list_to_binary(httpd_util:rfc1123_date()). + + +rfc1123_local(Sec) -> + Time = unix_sec_to_timestamp(Sec), + Local = calendar:now_to_local_time(Time), + list_to_binary(httpd_util:rfc1123_date(Local)). remove_basic_auth_from_headers(Headers) -> @@ -158,37 +102,101 @@ decode_basic_creds(Base64) -> end. -% Normalize a #rep{} record such that it doesn't contain time dependent fields +-spec compare_reps(#{} | null, #{} | null) -> boolean(). +compare_reps(Rep1, Rep2) -> + NormRep1 = normalize_rep(Rep1), + NormRep2 = normalize_rep(Rep2), + NormRep1 =:= NormRep2. + + +% Normalize a rep map such that it doesn't contain time dependent fields % pids (like httpc pools), and options / props are sorted. This function would % used during comparisons. --spec normalize_rep(#rep{} | nil) -> #rep{} | nil. -normalize_rep(nil) -> - nil; - -normalize_rep(#rep{} = Rep)-> - #rep{ - source = couch_replicator_api_wrap:normalize_db(Rep#rep.source), - target = couch_replicator_api_wrap:normalize_db(Rep#rep.target), - options = Rep#rep.options, % already sorted in make_options/1 - type = Rep#rep.type, - view = Rep#rep.view, - doc_id = Rep#rep.doc_id, - db_name = Rep#rep.db_name +-spec normalize_rep(#{} | null) -> #{} | null. +normalize_rep(null) -> + null; + +normalize_rep(#{} = Rep)-> + #{ + ?SOURCE := Source, + ?TARGET := Target, + ?OPTIONS := Options + } = Rep, + #{ + ?SOURCE => normalize_endpoint(Source), + ?TARGET => normalize_endpoint(Target), + ?OPTIONS => Options }. --spec ejson_state_info(binary() | nil) -> binary() | null. -ejson_state_info(nil) -> - null; -ejson_state_info(Info) when is_binary(Info) -> - {[{<<"error">>, Info}]}; -ejson_state_info([]) -> - null; % Status not set yet => null for compatibility reasons -ejson_state_info([{_, _} | _] = Info) -> - {Info}; -ejson_state_info(Info) -> - ErrMsg = couch_replicator_utils:rep_error_to_binary(Info), - {[{<<"error">>, ErrMsg}]}. +normalize_endpoint(<>) -> + DbName; + +normalize_endpoint(#{} = Endpoint) -> + Ks = [ + <<"url">>, + <<"auth_props">>, + <<"headers">>, + <<"timeout">>, + <<"ibrowse_options">>, + <<"retries">>, + <<"http_connections">>, + <<"proxy_url">> + ], + maps:with(Ks, Endpoint). + + +default_headers_map() -> + lists:foldl(fun({K, V}, Acc) -> + Acc#{list_to_binary(K) => list_to_binary(V)} + end, #{}, (#httpdb{})#httpdb.headers). + + +parse_replication_states(undefined) -> + []; % This is the default (wildcard) filter + +parse_replication_states(States) when is_list(States) -> + All = [?ST_RUNNING, ?ST_FAILED, ?ST_COMPLETED, ?ST_PENDING, ?ST_CRASHING], + AllSet = sets:from_list(All), + BinStates = [?l2b(string:to_lower(S)) || S <- string:tokens(States, ",")], + StatesSet = sets:from_list(BinStates), + Diff = sets:to_list(sets:subtract(StatesSet, AllSet)), + case Diff of + [] -> + BinStates; + _ -> + Args = [Diff, All], + Msg2 = io_lib:format("Unknown states ~p. Choose from: ~p", Args), + throw({query_parse_error, ?l2b(Msg2)}) + end. + + +parse_int_param(Req, Param, Default, Min, Max) -> + IntVal = try + list_to_integer(chttpd:qs_value(Req, Param, integer_to_list(Default))) + catch error:badarg -> + Msg1 = io_lib:format("~s must be an integer", [Param]), + throw({query_parse_error, ?l2b(Msg1)}) + end, + case IntVal >= Min andalso IntVal =< Max of + true -> + IntVal; + false -> + Msg2 = io_lib:format("~s not in range of [~w,~w]", [Param, Min, Max]), + throw({query_parse_error, ?l2b(Msg2)}) + end. + + +proplist_options(#{} = OptionsMap) -> + maps:fold(fun(K, V, Acc) -> + [{binary_to_atom(K, utf8), V} | Acc] + end, [], OptionsMap). + + +unix_sec_to_timestamp(Sec) when is_integer(Sec) -> + MegaSecPart = Sec div 1000000, + SecPart = Sec - MegaSecPart * 1000000, + {MegaSecPart, SecPart, 0}. -ifdef(TEST). @@ -256,7 +264,7 @@ normalize_rep_test_() -> {<<"doc_ids">>, [<<"a">>, <<"c">>, <<"b">>]}, {<<"other_field">>, <<"some_value">>} ]}, - Rep1 = couch_replicator_docs:parse_rep_doc_without_id(EJson1), + Rep1 = couch_replicator_parse:parse_rep_doc(EJson1), EJson2 = {[ {<<"other_field">>, <<"unrelated">>}, {<<"target">>, <<"http://target.local/db">>}, @@ -264,9 +272,28 @@ normalize_rep_test_() -> {<<"doc_ids">>, [<<"c">>, <<"a">>, <<"b">>]}, {<<"other_field2">>, <<"unrelated2">>} ]}, - Rep2 = couch_replicator_docs:parse_rep_doc_without_id(EJson2), + Rep2 = couch_replicator_parse:parse_rep_doc(EJson2), ?assertEqual(normalize_rep(Rep1), normalize_rep(Rep2)) end) }. + +normalize_endpoint() -> + HttpDb = #httpdb{ + url = "http://host/db", + auth_props = [{"key", "val"}], + headers = [{"k2","v2"}, {"k1","v1"}], + timeout = 30000, + ibrowse_options = [{k2, v2}, {k1, v1}], + retries = 10, + http_connections = 20 + }, + Expected = HttpDb#httpdb{ + headers = [{"k1","v1"}, {"k2","v2"}], + ibrowse_options = [{k1, v1}, {k2, v2}] + }, + ?assertEqual(Expected, normalize_endpoint(HttpDb)), + ?assertEqual(<<"local">>, normalize_endpoint(<<"local">>)). + + -endif. diff --git a/src/couch_replicator/src/couch_replicator_worker.erl b/src/couch_replicator/src/couch_replicator_worker.erl index eb8beaaa9..4cd984c1a 100644 --- a/src/couch_replicator/src/couch_replicator_worker.erl +++ b/src/couch_replicator/src/couch_replicator_worker.erl @@ -103,7 +103,7 @@ handle_call({batch_doc, Doc}, From, State) -> handle_call({add_stats, IncStats}, From, #state{stats = Stats} = State) -> gen_server:reply(From, ok), - NewStats = couch_replicator_utils:sum_stats(Stats, IncStats), + NewStats = couch_replicator_stats:sum_stats(Stats, IncStats), NewStats2 = maybe_report_stats(State#state.cp, NewStats), {noreply, State#state{stats = NewStats2}}; -- cgit v1.2.1 From b38d77fbada7cce7de288d2cdcca8839b09888f4 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:32:32 -0400 Subject: Move parsing and validation to couch_replicator_parse module This module is in responsible for parsing either an HTTP `_replicate` request body, or a _replicator doc into an internal `Rep` object (an Erlang map). `parse_transient_rep/2` parses _replicate requests. It also handles cancelations, where requests bodies look like ```{"id": ..., "cancel": true}``` instead of having all the expected parameters. `parse_rep_doc/1` parses _replicator docs. Parsing consists of 3 main parts: - Parsing the endpoint definitions: source and target url, headers, TLS bits and proxies - Parsing options into an options map, possibly using defaults from config parameters - Parsing socket parameters. These now have a hard-coded allow-list as opposed accepting all possible Erlang socket options. The parsing function also double as validation function which gets called from the _replicator's before_doc_update callback when users update replication documents. They would get an immediate feedback if their replicationd document is malformed. Everything is turned into a map object. This object should be able to be serialized and de-serialized to (from) JSON. Since maps are used, add the definitions of some common fields couch_replicator.hrl. Mistyping them should raise a compiler error. couch_replicator_docs lost all of its parsing function and also functions which update intermediate replication doc states (triggered and error). It still handles function which relate to interacting with _replicator docs. --- src/couch_replicator/src/couch_replicator.hrl | 102 ++- src/couch_replicator/src/couch_replicator_docs.erl | 870 ++++----------------- .../src/couch_replicator_parse.erl | 545 +++++++++++++ 3 files changed, 751 insertions(+), 766 deletions(-) create mode 100644 src/couch_replicator/src/couch_replicator_parse.erl diff --git a/src/couch_replicator/src/couch_replicator.hrl b/src/couch_replicator/src/couch_replicator.hrl index 2a5b7c8c8..28a86d91b 100644 --- a/src/couch_replicator/src/couch_replicator.hrl +++ b/src/couch_replicator/src/couch_replicator.hrl @@ -12,32 +12,80 @@ -define(REP_ID_VERSION, 4). --record(rep, { - id :: rep_id() | '_' | 'undefined', - source :: any() | '_', - target :: any() | '_', - options :: [_] | '_', - user_ctx :: any() | '_', - type = db :: atom() | '_', - view = nil :: any() | '_', - doc_id :: any() | '_', - db_name = null :: null | binary() | '_', - start_time = {0, 0, 0} :: erlang:timestamp() | '_', - stats = couch_replicator_stats:new() :: orddict:orddict() | '_' -}). - --type rep_id() :: {string(), string()}. +% Some fields from the replication doc +-define(SOURCE, <<"source">>). +-define(TARGET, <<"target">>). +-define(CREATE_TARGET, <<"create_target">>). +-define(DOC_IDS, <<"doc_ids">>). +-define(SELECTOR, <<"selector">>). +-define(FILTER, <<"filter">>). +-define(QUERY_PARAMS, <<"query_params">>). +-define(URL, <<"url">>). +-define(AUTH, <<"auth">>). +-define(HEADERS, <<"headers">>). +-define(PROXY, <<"proxy">>). +-define(SOURCE_PROXY, <<"source_proxy">>). +-define(TARGET_PROXY, <<"target_proxy">>). + +-define(REPLICATION_STATE, <<"_replication_state">>). +-define(REPLICATION_STATS, <<"_replication_stats">>). +-define(REPLICATION_ID, <<"_replication_id">>). +-define(REPLICATION_STATE_TIME, <<"_replication_state_time">>). +-define(REPLICATION_STATE_REASON, <<"_replication_state_reason">>). + +% Replication states +-define(ST_ERROR, <<"error">>). +-define(ST_COMPLETED, <<"completed">>). +-define(ST_RUNNING, <<"running">>). +-define(ST_FAILED, <<"failed">>). +-define(ST_PENDING, <<"pending">>). +-define(ST_CRASHING, <<"crashing">>). + +% Some fields from a rep object +-define(REP_ID, <<"rep_id">>). +-define(BASE_ID, <<"base_id">>). +-define(DB_NAME, <<"db_name">>). +-define(DB_UUID, <<"db_uuid">>). +-define(DOC_ID, <<"doc_id">>). +-define(REP_USER, <<"rep_user">>). +-define(START_TIME, <<"start_time">>). +-define(OPTIONS, <<"options">>). + +% Fields for couch job data objects +-define(REP, <<"rep">>). +-define(REP_PARSE_ERROR, <<"rep_parse_error">>). +-define(REP_STATS, <<"rep_stats">>). +-define(STATE, <<"state">>). +-define(STATE_INFO, <<"state_info">>). +-define(DOC_STATE, <<"doc_state">>). +-define(ERROR_COUNT, <<"error_count">>). +-define(LAST_UPDATED, <<"last_updated">>). +-define(LAST_START, <<"last_start">>). +-define(LAST_ERROR, <<"last_error">>). +-define(JOB_HISTORY, <<"job_history">>). +-define(CHECKPOINT_HISTORY, <<"checkpoint_history">>). +-define(REP_NODE, <<"node">>). +-define(REP_PID, <<"pid">>). + +% Job history tags +-define(HIST_TYPE, <<"type">>). +-define(HIST_TIMESTAMP, <<"timestamp">>). +-define(HIST_REASON, <<"reason">>). +-define(HIST_ADDED, <<"added">>). +-define(HIST_STARTED, <<"started">>). +-define(HIST_STOPPED, <<"stopped">>). +-define(HIST_PENDING, <<"pending">>). +-define(HIST_CRASHED, <<"crashed">>). + +-define(REP_DB_NAME, <<"_replicator">>). + +% Can be used as a guard +-define(IS_REP_DB(X), (X =:= ?REP_DB_NAME orelse + binary_part(X, {byte_size(X), -12}) =:= <<"/_replicator">>)). + + +-type rep_id() :: binary(). +-type job_id() :: binary(). +-type user_name() :: binary() | null. -type db_doc_id() :: {binary(), binary() | '_'}. -type seconds() :: non_neg_integer(). --type rep_start_result() :: - {ok, rep_id()} | - ignore | - {temporary_error, binary()} | - {permanent_failure, binary()}. - - --record(doc_worker_result, { - id :: db_doc_id(), - wref :: reference(), - result :: rep_start_result() -}). diff --git a/src/couch_replicator/src/couch_replicator_docs.erl b/src/couch_replicator/src/couch_replicator_docs.erl index 619063222..f84d1299a 100644 --- a/src/couch_replicator/src/couch_replicator_docs.erl +++ b/src/couch_replicator/src/couch_replicator_docs.erl @@ -13,306 +13,142 @@ -module(couch_replicator_docs). -export([ - parse_rep_doc/1, - parse_rep_doc/2, - parse_rep_db/3, - parse_rep_doc_without_id/1, - parse_rep_doc_without_id/2, + remove_state_fields/3, + update_completed/4, + update_failed/4, before_doc_update/3, - after_doc_read/2, - ensure_rep_ddoc_exists/1, - ensure_cluster_rep_ddoc_exists/1, - remove_state_fields/2, - update_doc_completed/3, - update_failed/3, - update_rep_id/1, - update_triggered/2, - update_error/2 + after_doc_read/2 ]). -include_lib("couch/include/couch_db.hrl"). --include_lib("ibrowse/include/ibrowse.hrl"). --include_lib("mem3/include/mem3.hrl"). -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). -include("couch_replicator.hrl"). --include("couch_replicator_js_functions.hrl"). - --import(couch_util, [ - get_value/2, - get_value/3, - to_binary/1 -]). - --import(couch_replicator_utils, [ - get_json_value/2, - get_json_value/3 -]). --define(REP_DB_NAME, <<"_replicator">>). --define(REP_DESIGN_DOC, <<"_design/_replicator">>). -define(OWNER, <<"owner">>). -define(CTX, {user_ctx, #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}}). -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})). -remove_state_fields(DbName, DocId) -> - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, undefined}, - {<<"_replication_state_time">>, undefined}, - {<<"_replication_state_reason">>, undefined}, - {<<"_replication_id">>, undefined}, - {<<"_replication_stats">>, undefined}]). +remove_state_fields(null, null, null) -> + ok; +remove_state_fields(DbName, DbUUID, DocId) -> + update_rep_doc(DbName, DbUUID, DocId, [ + {?REPLICATION_STATE, undefined}, + {?REPLICATION_STATE_TIME, undefined}, + {?REPLICATION_STATE_REASON, undefined}, + {?REPLICATION_ID, undefined}, + {?REPLICATION_STATS, undefined} + ]), + ok. --spec update_doc_completed(binary(), binary(), [_]) -> any(). -update_doc_completed(DbName, DocId, Stats) -> - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"completed">>}, - {<<"_replication_state_reason">>, undefined}, - {<<"_replication_stats">>, {Stats}}]), - couch_stats:increment_counter([couch_replicator, docs, - completed_state_updates]). +-spec update_completed(binary(), binary(), binary(), [_]) -> ok. +update_completed(null, null, _, _) -> + ok; --spec update_failed(binary(), binary(), any()) -> any(). -update_failed(DbName, DocId, Error) -> - Reason = error_reason(Error), - couch_log:error("Error processing replication doc `~s` from `~s`: ~s", - [DocId, DbName, Reason]), - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"failed">>}, - {<<"_replication_stats">>, undefined}, - {<<"_replication_state_reason">>, Reason}]), +update_completed(DbName, DbUUID, DocId, #{} = Stats0) -> + Stats = {maps:to_list(Stats0)}, + update_rep_doc(DbName, DbUUID, DocId, [ + {?REPLICATION_STATE, ?ST_COMPLETED}, + {?REPLICATION_STATE_REASON, undefined}, + {?REPLICATION_STATS, Stats}]), couch_stats:increment_counter([couch_replicator, docs, - failed_state_updates]). - - --spec update_triggered(#rep{}, rep_id()) -> ok. -update_triggered(Rep, {Base, Ext}) -> - #rep{ - db_name = DbName, - doc_id = DocId - } = Rep, - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"triggered">>}, - {<<"_replication_state_reason">>, undefined}, - {<<"_replication_id">>, iolist_to_binary([Base, Ext])}, - {<<"_replication_stats">>, undefined}]), + completed_state_updates + ]), ok. --spec update_error(#rep{}, any()) -> ok. -update_error(#rep{db_name = DbName, doc_id = DocId, id = RepId}, Error) -> - Reason = error_reason(Error), - BinRepId = case RepId of - {Base, Ext} -> - iolist_to_binary([Base, Ext]); - _Other -> - null - end, - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"error">>}, - {<<"_replication_state_reason">>, Reason}, - {<<"_replication_stats">>, undefined}, - {<<"_replication_id">>, BinRepId}]), - ok. - +-spec update_failed(binary(), binary(), binary(), any()) -> ok. +update_failed(null, null, null, _) -> + ok; --spec ensure_rep_ddoc_exists(binary()) -> ok. -ensure_rep_ddoc_exists(RepDb) -> - case mem3:belongs(RepDb, ?REP_DESIGN_DOC) of - true -> - ensure_rep_ddoc_exists(RepDb, ?REP_DESIGN_DOC); - false -> - ok - end. - - --spec ensure_rep_ddoc_exists(binary(), binary()) -> ok. -ensure_rep_ddoc_exists(RepDb, DDocId) -> - case open_rep_doc(RepDb, DDocId) of - {not_found, no_db_file} -> - %% database was deleted. - ok; - {not_found, _Reason} -> - DocProps = replication_design_doc_props(DDocId), - DDoc = couch_doc:from_json_obj({DocProps}), - couch_log:notice("creating replicator ddoc ~p", [RepDb]), - {ok, _Rev} = save_rep_doc(RepDb, DDoc); - {ok, Doc} -> - Latest = replication_design_doc_props(DDocId), - {Props0} = couch_doc:to_json_obj(Doc, []), - {value, {_, Rev}, Props} = lists:keytake(<<"_rev">>, 1, Props0), - case compare_ejson({Props}, {Latest}) of - true -> - ok; - false -> - LatestWithRev = [{<<"_rev">>, Rev} | Latest], - DDoc = couch_doc:from_json_obj({LatestWithRev}), - couch_log:notice("updating replicator ddoc ~p", [RepDb]), - try - {ok, _} = save_rep_doc(RepDb, DDoc) - catch - throw:conflict -> - %% ignore, we'll retry next time - ok - end - end - end, +update_failed(DbName, DbUUID, DocId, Error) -> + Reason = error_reason(Error), + couch_log:error("Error processing replication doc `~s` from `~s`: ~s", + [DocId, DbName, Reason]), + update_rep_doc(DbName, DbUUID, DocId, [ + {?REPLICATION_STATE, ?ST_FAILED}, + {?REPLICATION_STATS, undefined}, + {?REPLICATION_STATE_REASON, Reason} + ]), + couch_stats:increment_counter([couch_replicator, docs, + failed_state_updates]), ok. --spec ensure_cluster_rep_ddoc_exists(binary()) -> ok. -ensure_cluster_rep_ddoc_exists(RepDb) -> - DDocId = ?REP_DESIGN_DOC, - [#shard{name = DbShard} | _] = mem3:shards(RepDb, DDocId), - ensure_rep_ddoc_exists(DbShard, DDocId). - - --spec compare_ejson({[_]}, {[_]}) -> boolean(). -compare_ejson(EJson1, EJson2) -> - EjsonSorted1 = couch_replicator_filters:ejsort(EJson1), - EjsonSorted2 = couch_replicator_filters:ejsort(EJson2), - EjsonSorted1 == EjsonSorted2. - - --spec replication_design_doc_props(binary()) -> [_]. -replication_design_doc_props(DDocId) -> - [ - {<<"_id">>, DDocId}, - {<<"language">>, <<"javascript">>}, - {<<"validate_doc_update">>, ?REP_DB_DOC_VALIDATE_FUN} - ]. - +-spec before_doc_update(#doc{}, Db::any(), couch_db:update_type()) -> #doc{}. +before_doc_update(#doc{id = <>} = Doc, _, _) -> + Doc; +before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) -> + #user_ctx{roles = Roles, name = Name} = fabric2_db:get_user_ctx(Db), + IsReplicator = lists:member(<<"_replicator">>, Roles), -% Note: parse_rep_doc can handle filtered replications. During parsing of the -% replication doc it will make possibly remote http requests to the source -% database. If failure or parsing of filter docs fails, parse_doc throws a -% {filter_fetch_error, Error} excation. This exception should be considered -% transient in respect to the contents of the document itself, since it depends -% on netowrk availability of the source db and other factors. --spec parse_rep_doc({[_]}) -> #rep{}. -parse_rep_doc(RepDoc) -> - {ok, Rep} = try - parse_rep_doc(RepDoc, rep_user_ctx(RepDoc)) - catch - throw:{error, Reason} -> - throw({bad_rep_doc, Reason}); - throw:{filter_fetch_error, Reason} -> - throw({filter_fetch_error, Reason}); - Tag:Err -> - throw({bad_rep_doc, to_binary({Tag, Err})}) + Doc1 = case IsReplicator of true -> Doc; false -> + case couch_util:get_value(?OWNER, Body) of + undefined -> + Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; + Name -> + Doc; + Other -> + case (catch fabric2_db:check_is_admin(Db)) of + ok when Other =:= null -> + Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; + ok -> + Doc; + _ -> + throw({forbidden, <<"Can't update replication", + "documents from other users.">>}) + end + end end, - Rep. - --spec parse_rep_doc_without_id({[_]}) -> #rep{}. -parse_rep_doc_without_id(RepDoc) -> - {ok, Rep} = try - parse_rep_doc_without_id(RepDoc, rep_user_ctx(RepDoc)) - catch - throw:{error, Reason} -> - throw({bad_rep_doc, Reason}); - Tag:Err -> - throw({bad_rep_doc, to_binary({Tag, Err})}) + Deleted = Doc1#doc.deleted, + IsFailed = couch_util:get_value(?REPLICATION_STATE, Body) == ?ST_FAILED, + case IsReplicator orelse Deleted orelse IsFailed of true -> ok; false -> + try + couch_replicator_parse:parse_rep_doc(Doc1#doc.body) + catch + throw:{bad_rep_doc, Error} -> + throw({forbidden, Error}) + end end, - Rep. - - --spec parse_rep_doc({[_]}, #user_ctx{}) -> {ok, #rep{}}. -parse_rep_doc(Doc, UserCtx) -> - {ok, Rep} = parse_rep_doc_without_id(Doc, UserCtx), - Cancel = get_value(cancel, Rep#rep.options, false), - Id = get_value(id, Rep#rep.options, nil), - case {Cancel, Id} of - {true, nil} -> - % Cancel request with no id, must parse id out of body contents - {ok, update_rep_id(Rep)}; - {true, Id} -> - % Cancel request with an id specified, so do not parse id from body - {ok, Rep}; - {false, _Id} -> - % Not a cancel request, regular replication doc - {ok, update_rep_id(Rep)} - end. - - --spec parse_rep_doc_without_id({[_]}, #user_ctx{}) -> {ok, #rep{}}. -parse_rep_doc_without_id({Props}, UserCtx) -> - {SrcProxy, TgtProxy} = parse_proxy_settings(Props), - Opts = make_options(Props), - case get_value(cancel, Opts, false) andalso - (get_value(id, Opts, nil) =/= nil) of - true -> - {ok, #rep{options = Opts, user_ctx = UserCtx}}; - false -> - Source = parse_rep_db(get_value(<<"source">>, Props), SrcProxy, Opts), - Target = parse_rep_db(get_value(<<"target">>, Props), TgtProxy, Opts), - {Type, View} = case couch_replicator_filters:view_type(Props, Opts) of - {error, Error} -> - throw({bad_request, Error}); - Result -> - Result - end, - Rep = #rep{ - source = Source, - target = Target, - options = Opts, - user_ctx = UserCtx, - type = Type, - view = View, - doc_id = get_value(<<"_id">>, Props, null) - }, - % Check if can parse filter code, if not throw exception - case couch_replicator_filters:parse(Opts) of - {error, FilterError} -> - throw({error, FilterError}); - {ok, _Filter} -> - ok - end, - {ok, Rep} - end. + Doc1. -parse_proxy_settings(Props) when is_list(Props) -> - Proxy = get_value(<<"proxy">>, Props, <<>>), - SrcProxy = get_value(<<"source_proxy">>, Props, <<>>), - TgtProxy = get_value(<<"target_proxy">>, Props, <<>>), - - case Proxy =/= <<>> of - true when SrcProxy =/= <<>> -> - Error = "`proxy` is mutually exclusive with `source_proxy`", - throw({bad_request, Error}); - true when TgtProxy =/= <<>> -> - Error = "`proxy` is mutually exclusive with `target_proxy`", - throw({bad_request, Error}); - true -> - {Proxy, Proxy}; - false -> - {SrcProxy, TgtProxy} +-spec after_doc_read(#doc{}, Db::any()) -> #doc{}. +after_doc_read(#doc{id = <>} = Doc, _Db) -> + Doc; +after_doc_read(#doc{body = {Body}} = Doc, Db) -> + #user_ctx{name = Name} = fabric2_db:get_user_ctx(Db), + case (catch fabric2_db:check_is_admin(Db)) of ok -> Doc; _ -> + case couch_util:get_value(?OWNER, Body) of Name -> Doc; _ -> + Source0 = couch_util:get_value(<<"source">>, Body), + Target0 = couch_util:get_value(<<"target">>, Body), + Source = strip_credentials(Source0), + Target = strip_credentials(Target0), + NewBody0 = ?replace(Body, <<"source">>, Source), + NewBody = ?replace(NewBody0, <<"target">>, Target), + #doc{revs = {Pos, [_ | Revs]}} = Doc, + NewDoc = Doc#doc{body = {NewBody}, revs = {Pos - 1, Revs}}, + fabric2_db:new_revid(Db, NewDoc) + end end. -% Update a #rep{} record with a replication_id. Calculating the id might involve -% fetching a filter from the source db, and so it could fail intermetently. -% In case of a failure to fetch the filter this function will throw a -% `{filter_fetch_error, Reason} exception. -update_rep_id(Rep) -> - RepId = couch_replicator_ids:replication_id(Rep), - Rep#rep{id = RepId}. +update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs) -> + update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs, 1). -update_rep_doc(RepDbName, RepDocId, KVs) -> - update_rep_doc(RepDbName, RepDocId, KVs, 1). - - -update_rep_doc(RepDbName, RepDocId, KVs, Wait) when is_binary(RepDocId) -> +update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs, Wait) + when is_binary(RepDocId) -> try - case open_rep_doc(RepDbName, RepDocId) of + case open_rep_doc(RepDbName, RepDbUUID, RepDocId) of {ok, LastRepDoc} -> - update_rep_doc(RepDbName, LastRepDoc, KVs, Wait * 2); + update_rep_doc(RepDbName, RepDbUUID, LastRepDoc, KVs, + Wait * 2); _ -> ok end @@ -321,25 +157,25 @@ update_rep_doc(RepDbName, RepDocId, KVs, Wait) when is_binary(RepDocId) -> Msg = "Conflict when updating replication doc `~s`. Retrying.", couch_log:error(Msg, [RepDocId]), ok = timer:sleep(couch_rand:uniform(erlang:min(128, Wait)) * 100), - update_rep_doc(RepDbName, RepDocId, KVs, Wait * 2) + update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs, Wait * 2) end; -update_rep_doc(RepDbName, #doc{body = {RepDocBody}} = RepDoc, KVs, _Try) -> +update_rep_doc(RepDbName, RepDbUUID, #doc{body = {RepDocBody}} = RepDoc, KVs, _Try) -> NewRepDocBody = lists:foldl( - fun({K, undefined}, Body) -> + fun({K, undefined}, Body) when is_binary(K) -> lists:keydelete(K, 1, Body); - ({<<"_replication_state">> = K, State} = KV, Body) -> - case get_json_value(K, Body) of + ({?REPLICATION_STATE = K, State} = KV, Body) when is_binary(K) -> + case couch_util:get_value(K, Body) of State -> Body; _ -> Body1 = lists:keystore(K, 1, Body, KV), - Timestamp = couch_replicator_utils:iso8601(os:timestamp()), + Timestamp = couch_replicator_utils:iso8601(), lists:keystore( - <<"_replication_state_time">>, 1, Body1, - {<<"_replication_state_time">>, Timestamp}) + ?REPLICATION_STATE_TIME, 1, Body1, + {?REPLICATION_STATE_TIME, Timestamp}) end; - ({K, _V} = KV, Body) -> + ({K, _V} = KV, Body) when is_binary(K) -> lists:keystore(K, 1, Body, KV) end, RepDocBody, KVs), @@ -349,331 +185,37 @@ update_rep_doc(RepDbName, #doc{body = {RepDocBody}} = RepDoc, KVs, _Try) -> _ -> % Might not succeed - when the replication doc is deleted right % before this update (not an error, ignore). - save_rep_doc(RepDbName, RepDoc#doc{body = {NewRepDocBody}}) + save_rep_doc(RepDbName, RepDbUUID, RepDoc#doc{body = {NewRepDocBody}}) end. -open_rep_doc(DbName, DocId) -> - case couch_db:open_int(DbName, [?CTX, sys_db]) of - {ok, Db} -> - try - couch_db:open_doc(Db, DocId, [ejson_body]) - after - couch_db:close(Db) - end; - Else -> - Else +open_rep_doc(DbName, DbUUID, DocId) when is_binary(DbName), is_binary(DbUUID), + is_binary(DocId) -> + try + case fabric2_db:open(DbName, [?CTX, sys_db, {uuid, DbUUID}]) of + {ok, Db} -> fabric2_db:open_doc(Db, DocId, [ejson_body]); + Else -> Else + end + catch + error:database_does_not_exist -> + {not_found, database_does_not_exist} end. -save_rep_doc(DbName, Doc) -> - {ok, Db} = couch_db:open_int(DbName, [?CTX, sys_db]), +save_rep_doc(DbName, DbUUID, Doc) when is_binary(DbName), is_binary(DbUUID) -> try - couch_db:update_doc(Db, Doc, []) + {ok, Db} = fabric2_db:open(DbName, [?CTX, sys_db, {uuid, DbUUID}]), + fabric2_db:update_doc(Db, Doc, []) catch + error:database_does_not_exist -> + {not_found, database_does_not_exist}; % User can accidently write a VDU which prevents _replicator from % updating replication documents. Avoid crashing replicator and thus % preventing all other replication jobs on the node from running. throw:{forbidden, Reason} -> - Msg = "~p VDU function preventing doc update to ~s ~s ~p", + Msg = "~p VDU or BDU function preventing doc update to ~s ~s ~p", couch_log:error(Msg, [?MODULE, DbName, Doc#doc.id, Reason]), {ok, forbidden} - after - couch_db:close(Db) - end. - - --spec rep_user_ctx({[_]}) -> #user_ctx{}. -rep_user_ctx({RepDoc}) -> - case get_json_value(<<"user_ctx">>, RepDoc) of - undefined -> - #user_ctx{}; - {UserCtx} -> - #user_ctx{ - name = get_json_value(<<"name">>, UserCtx, null), - roles = get_json_value(<<"roles">>, UserCtx, []) - } - end. - - --spec parse_rep_db({[_]} | binary(), binary(), [_]) -> #httpd{} | binary(). -parse_rep_db({Props}, Proxy, Options) -> - ProxyParams = parse_proxy_params(Proxy), - ProxyURL = case ProxyParams of - [] -> undefined; - _ -> binary_to_list(Proxy) - end, - Url = maybe_add_trailing_slash(get_value(<<"url">>, Props)), - {AuthProps} = get_value(<<"auth">>, Props, {[]}), - {BinHeaders} = get_value(<<"headers">>, Props, {[]}), - Headers = lists:ukeysort(1, [{?b2l(K), ?b2l(V)} || {K, V} <- BinHeaders]), - DefaultHeaders = (#httpdb{})#httpdb.headers, - #httpdb{ - url = Url, - auth_props = AuthProps, - headers = lists:ukeymerge(1, Headers, DefaultHeaders), - ibrowse_options = lists:keysort(1, - [{socket_options, get_value(socket_options, Options)} | - ProxyParams ++ ssl_params(Url)]), - timeout = get_value(connection_timeout, Options), - http_connections = get_value(http_connections, Options), - retries = get_value(retries, Options), - proxy_url = ProxyURL - }; - -parse_rep_db(<<"http://", _/binary>> = Url, Proxy, Options) -> - parse_rep_db({[{<<"url">>, Url}]}, Proxy, Options); - -parse_rep_db(<<"https://", _/binary>> = Url, Proxy, Options) -> - parse_rep_db({[{<<"url">>, Url}]}, Proxy, Options); - -parse_rep_db(<<_/binary>>, _Proxy, _Options) -> - throw({error, local_endpoints_not_supported}); - -parse_rep_db(undefined, _Proxy, _Options) -> - throw({error, <<"Missing replicator database">>}). - - --spec maybe_add_trailing_slash(binary() | list()) -> list(). -maybe_add_trailing_slash(Url) when is_binary(Url) -> - maybe_add_trailing_slash(?b2l(Url)); -maybe_add_trailing_slash(Url) -> - case lists:member($?, Url) of - true -> - Url; % skip if there are query params - false -> - case lists:last(Url) of - $/ -> - Url; - _ -> - Url ++ "/" - end - end. - - --spec make_options([_]) -> [_]. -make_options(Props) -> - Options0 = lists:ukeysort(1, convert_options(Props)), - Options = check_options(Options0), - DefWorkers = config:get("replicator", "worker_processes", "4"), - DefBatchSize = config:get("replicator", "worker_batch_size", "500"), - DefConns = config:get("replicator", "http_connections", "20"), - DefTimeout = config:get("replicator", "connection_timeout", "30000"), - DefRetries = config:get("replicator", "retries_per_request", "5"), - UseCheckpoints = config:get("replicator", "use_checkpoints", "true"), - DefCheckpointInterval = config:get("replicator", "checkpoint_interval", - "30000"), - {ok, DefSocketOptions} = couch_util:parse_term( - config:get("replicator", "socket_options", - "[{keepalive, true}, {nodelay, false}]")), - lists:ukeymerge(1, Options, lists:keysort(1, [ - {connection_timeout, list_to_integer(DefTimeout)}, - {retries, list_to_integer(DefRetries)}, - {http_connections, list_to_integer(DefConns)}, - {socket_options, DefSocketOptions}, - {worker_batch_size, list_to_integer(DefBatchSize)}, - {worker_processes, list_to_integer(DefWorkers)}, - {use_checkpoints, list_to_existing_atom(UseCheckpoints)}, - {checkpoint_interval, list_to_integer(DefCheckpointInterval)} - ])). - - --spec convert_options([_]) -> [_]. -convert_options([])-> - []; -convert_options([{<<"cancel">>, V} | _R]) when not is_boolean(V)-> - throw({bad_request, <<"parameter `cancel` must be a boolean">>}); -convert_options([{<<"cancel">>, V} | R]) -> - [{cancel, V} | convert_options(R)]; -convert_options([{IdOpt, V} | R]) when IdOpt =:= <<"_local_id">>; - IdOpt =:= <<"replication_id">>; IdOpt =:= <<"id">> -> - [{id, couch_replicator_ids:convert(V)} | convert_options(R)]; -convert_options([{<<"create_target">>, V} | _R]) when not is_boolean(V)-> - throw({bad_request, <<"parameter `create_target` must be a boolean">>}); -convert_options([{<<"create_target">>, V} | R]) -> - [{create_target, V} | convert_options(R)]; -convert_options([{<<"create_target_params">>, V} | _R]) when not is_tuple(V) -> - throw({bad_request, - <<"parameter `create_target_params` must be a JSON object">>}); -convert_options([{<<"create_target_params">>, V} | R]) -> - [{create_target_params, V} | convert_options(R)]; -convert_options([{<<"continuous">>, V} | _R]) when not is_boolean(V)-> - throw({bad_request, <<"parameter `continuous` must be a boolean">>}); -convert_options([{<<"continuous">>, V} | R]) -> - [{continuous, V} | convert_options(R)]; -convert_options([{<<"filter">>, V} | R]) -> - [{filter, V} | convert_options(R)]; -convert_options([{<<"query_params">>, V} | R]) -> - [{query_params, V} | convert_options(R)]; -convert_options([{<<"doc_ids">>, null} | R]) -> - convert_options(R); -convert_options([{<<"doc_ids">>, V} | _R]) when not is_list(V) -> - throw({bad_request, <<"parameter `doc_ids` must be an array">>}); -convert_options([{<<"doc_ids">>, V} | R]) -> - % Ensure same behaviour as old replicator: accept a list of percent - % encoded doc IDs. - DocIds = lists:usort([?l2b(couch_httpd:unquote(Id)) || Id <- V]), - [{doc_ids, DocIds} | convert_options(R)]; -convert_options([{<<"selector">>, V} | _R]) when not is_tuple(V) -> - throw({bad_request, <<"parameter `selector` must be a JSON object">>}); -convert_options([{<<"selector">>, V} | R]) -> - [{selector, V} | convert_options(R)]; -convert_options([{<<"worker_processes">>, V} | R]) -> - [{worker_processes, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"worker_batch_size">>, V} | R]) -> - [{worker_batch_size, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"http_connections">>, V} | R]) -> - [{http_connections, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"connection_timeout">>, V} | R]) -> - [{connection_timeout, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"retries_per_request">>, V} | R]) -> - [{retries, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"socket_options">>, V} | R]) -> - {ok, SocketOptions} = couch_util:parse_term(V), - [{socket_options, SocketOptions} | convert_options(R)]; -convert_options([{<<"since_seq">>, V} | R]) -> - [{since_seq, V} | convert_options(R)]; -convert_options([{<<"use_checkpoints">>, V} | R]) -> - [{use_checkpoints, V} | convert_options(R)]; -convert_options([{<<"checkpoint_interval">>, V} | R]) -> - [{checkpoint_interval, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([_ | R]) -> % skip unknown option - convert_options(R). - - --spec check_options([_]) -> [_]. -check_options(Options) -> - DocIds = lists:keyfind(doc_ids, 1, Options), - Filter = lists:keyfind(filter, 1, Options), - Selector = lists:keyfind(selector, 1, Options), - case {DocIds, Filter, Selector} of - {false, false, false} -> Options; - {false, false, _} -> Options; - {false, _, false} -> Options; - {_, false, false} -> Options; - _ -> - throw({bad_request, - "`doc_ids`,`filter`,`selector` are mutually exclusive"}) - end. - - --spec parse_proxy_params(binary() | [_]) -> [_]. -parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl) -> - parse_proxy_params(?b2l(ProxyUrl)); -parse_proxy_params([]) -> - []; -parse_proxy_params(ProxyUrl) -> - #url{ - host = Host, - port = Port, - username = User, - password = Passwd, - protocol = Protocol - } = ibrowse_lib:parse_url(ProxyUrl), - [ - {proxy_protocol, Protocol}, - {proxy_host, Host}, - {proxy_port, Port} - ] ++ case is_list(User) andalso is_list(Passwd) of - false -> - []; - true -> - [{proxy_user, User}, {proxy_password, Passwd}] - end. - - --spec ssl_params([_]) -> [_]. -ssl_params(Url) -> - case ibrowse_lib:parse_url(Url) of - #url{protocol = https} -> - Depth = list_to_integer( - config:get("replicator", "ssl_certificate_max_depth", "3") - ), - VerifyCerts = config:get("replicator", "verify_ssl_certificates"), - CertFile = config:get("replicator", "cert_file", undefined), - KeyFile = config:get("replicator", "key_file", undefined), - Password = config:get("replicator", "password", undefined), - SslOpts = [{depth, Depth} | ssl_verify_options(VerifyCerts =:= "true")], - SslOpts1 = case CertFile /= undefined andalso KeyFile /= undefined of - true -> - case Password of - undefined -> - [{certfile, CertFile}, {keyfile, KeyFile}] ++ SslOpts; - _ -> - [{certfile, CertFile}, {keyfile, KeyFile}, - {password, Password}] ++ SslOpts - end; - false -> SslOpts - end, - [{is_ssl, true}, {ssl_options, SslOpts1}]; - #url{protocol = http} -> - [] - end. - - --spec ssl_verify_options(true | false) -> [_]. -ssl_verify_options(true) -> - CAFile = config:get("replicator", "ssl_trusted_certificates_file"), - [{verify, verify_peer}, {cacertfile, CAFile}]; -ssl_verify_options(false) -> - [{verify, verify_none}]. - - --spec before_doc_update(#doc{}, Db::any(), couch_db:update_type()) -> #doc{}. -before_doc_update(#doc{id = <>} = Doc, _Db, _UpdateType) -> - Doc; -before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) -> - #user_ctx{ - roles = Roles, - name = Name - } = couch_db:get_user_ctx(Db), - case lists:member(<<"_replicator">>, Roles) of - true -> - Doc; - false -> - case couch_util:get_value(?OWNER, Body) of - undefined -> - Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; - Name -> - Doc; - Other -> - case (catch couch_db:check_is_admin(Db)) of - ok when Other =:= null -> - Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; - ok -> - Doc; - _ -> - throw({forbidden, <<"Can't update replication documents", - " from other users.">>}) - end - end - end. - - --spec after_doc_read(#doc{}, Db::any()) -> #doc{}. -after_doc_read(#doc{id = <>} = Doc, _Db) -> - Doc; -after_doc_read(#doc{body = {Body}} = Doc, Db) -> - #user_ctx{name = Name} = couch_db:get_user_ctx(Db), - case (catch couch_db:check_is_admin(Db)) of - ok -> - Doc; - _ -> - case couch_util:get_value(?OWNER, Body) of - Name -> - Doc; - _Other -> - Source = strip_credentials(couch_util:get_value(<<"source">>, -Body)), - Target = strip_credentials(couch_util:get_value(<<"target">>, -Body)), - NewBody0 = ?replace(Body, <<"source">>, Source), - NewBody = ?replace(NewBody0, <<"target">>, Target), - #doc{revs = {Pos, [_ | Revs]}} = Doc, - NewDoc = Doc#doc{body = {NewBody}, revs = {Pos - 1, Revs}}, - NewRevId = couch_db:new_revid(NewDoc), - NewDoc#doc{revs = {Pos, [NewRevId | Revs]}} - end end. @@ -698,164 +240,14 @@ strip_credentials({Props0}) -> error_reason({shutdown, Error}) -> error_reason(Error); error_reason({bad_rep_doc, Reason}) -> - to_binary(Reason); + couch_util:to_binary(Reason); +error_reason(#{<<"error">> := Error, <<"reason">> := Reason}) + when is_binary(Error), is_binary(Reason) -> + couch_util:to_binary(io_list:format("~s: ~s", [Error, Reason])); error_reason({error, {Error, Reason}}) - when is_atom(Error), is_binary(Reason) -> - to_binary(io_lib:format("~s: ~s", [Error, Reason])); + when is_atom(Error), is_binary(Reason) -> + couch_util:to_binary(io_lib:format("~s: ~s", [Error, Reason])); error_reason({error, Reason}) -> - to_binary(Reason); + couch_util:to_binary(Reason); error_reason(Reason) -> - to_binary(Reason). - - --ifdef(TEST). - - --include_lib("couch/include/couch_eunit.hrl"). - - -check_options_pass_values_test() -> - ?assertEqual(check_options([]), []), - ?assertEqual(check_options([baz, {other, fiz}]), [baz, {other, fiz}]), - ?assertEqual(check_options([{doc_ids, x}]), [{doc_ids, x}]), - ?assertEqual(check_options([{filter, x}]), [{filter, x}]), - ?assertEqual(check_options([{selector, x}]), [{selector, x}]). - - -check_options_fail_values_test() -> - ?assertThrow({bad_request, _}, - check_options([{doc_ids, x}, {filter, y}])), - ?assertThrow({bad_request, _}, - check_options([{doc_ids, x}, {selector, y}])), - ?assertThrow({bad_request, _}, - check_options([{filter, x}, {selector, y}])), - ?assertThrow({bad_request, _}, - check_options([{doc_ids, x}, {selector, y}, {filter, z}])). - - -check_convert_options_pass_test() -> - ?assertEqual([], convert_options([])), - ?assertEqual([], convert_options([{<<"random">>, 42}])), - ?assertEqual([{cancel, true}], - convert_options([{<<"cancel">>, true}])), - ?assertEqual([{create_target, true}], - convert_options([{<<"create_target">>, true}])), - ?assertEqual([{continuous, true}], - convert_options([{<<"continuous">>, true}])), - ?assertEqual([{doc_ids, [<<"id">>]}], - convert_options([{<<"doc_ids">>, [<<"id">>]}])), - ?assertEqual([{selector, {key, value}}], - convert_options([{<<"selector">>, {key, value}}])). - - -check_convert_options_fail_test() -> - ?assertThrow({bad_request, _}, - convert_options([{<<"cancel">>, <<"true">>}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"create_target">>, <<"true">>}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"continuous">>, <<"true">>}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"doc_ids">>, not_a_list}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"selector">>, [{key, value}]}])). - -check_strip_credentials_test() -> - [?assertEqual(Expected, strip_credentials(Body)) || {Expected, Body} <- [ - { - undefined, - undefined - }, - { - <<"https://remote_server/database">>, - <<"https://foo:bar@remote_server/database">> - }, - { - {[{<<"_id">>, <<"foo">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"headers">>, <<"bar">>}]} - }, - { - {[{<<"_id">>, <<"foo">>}, {<<"other">>, <<"bar">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"other">>, <<"bar">>}]} - }, - { - {[{<<"_id">>, <<"foo">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"headers">>, <<"baz">>}]} - }, - { - {[{<<"_id">>, <<"foo">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"auth">>, <<"pluginsecret">>}]} - } - ]]. - - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - create_vdu(DbName), - DbName. - - -teardown(DbName) when is_binary(DbName) -> - couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - - -create_vdu(DbName) -> - couch_util:with_db(DbName, fun(Db) -> - VduFun = <<"function(newdoc, olddoc, userctx) {throw({'forbidden':'fail'})}">>, - Doc = #doc{ - id = <<"_design/vdu">>, - body = {[{<<"validate_doc_update">>, VduFun}]} - }, - {ok, _} = couch_db:update_docs(Db, [Doc]) - end). - - -update_replicator_doc_with_bad_vdu_test_() -> - { - setup, - fun test_util:start_couch/0, - fun test_util:stop_couch/1, - { - foreach, fun setup/0, fun teardown/1, - [ - fun t_vdu_does_not_crash_on_save/1 - ] - } - }. - - -t_vdu_does_not_crash_on_save(DbName) -> - ?_test(begin - Doc = #doc{id = <<"some_id">>, body = {[{<<"foo">>, 42}]}}, - ?assertEqual({ok, forbidden}, save_rep_doc(DbName, Doc)) - end). - - -local_replication_endpoint_error_test_() -> - { - foreach, - fun () -> meck:expect(config, get, - fun(_, _, Default) -> Default end) - end, - fun (_) -> meck:unload() end, - [ - t_error_on_local_endpoint() - ] - }. - - -t_error_on_local_endpoint() -> - ?_test(begin - RepDoc = {[ - {<<"_id">>, <<"someid">>}, - {<<"source">>, <<"localdb">>}, - {<<"target">>, <<"http://somehost.local/tgt">>} - ]}, - Expect = local_endpoints_not_supported, - ?assertThrow({bad_rep_doc, Expect}, parse_rep_doc_without_id(RepDoc)) - end). - --endif. + couch_util:to_binary(Reason). diff --git a/src/couch_replicator/src/couch_replicator_parse.erl b/src/couch_replicator/src/couch_replicator_parse.erl new file mode 100644 index 000000000..5996ec507 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_parse.erl @@ -0,0 +1,545 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_parse). + + +-export([ + parse_rep_doc/1, + parse_transient_rep/2, + parse_rep/2, + parse_rep_db/3 +]). + + +-include_lib("ibrowse/include/ibrowse.hrl"). +-include("couch_replicator.hrl"). + + +-define(DEFAULT_SOCK_OPTS, "[{keepalive, true}, {nodelay, false}]"). +-define(VALID_SOCK_OPTS, [ + buffer, + delay_send, + exit_on_close, + ipv6_v6only, + keepalive, + nodelay, + recbuf, + send_timeout, + send_timout_close, + sndbuf, + priority, + tos, + tclass +]). +-define(VALID_PROXY_PROTOCOLS, [http, https, socks5]). +-define(CONFIG_DEFAULTS, [ + {"worker_processes", "4", fun list_to_integer/1}, + {"worker_batch_size", "500", fun list_to_integer/1}, + {"http_connections", "20", fun list_to_integer/1}, + {"connection_timeout", "30000", fun list_to_integer/1}, + {"retries_per_request", "5", fun list_to_integer/1}, + {"use_checkpoints", "true", fun list_to_existing_atom/1}, + {"checkpoint_interval", "30000", fun list_to_integer/1}, + {"socket_options", ?DEFAULT_SOCK_OPTS, fun parse_sock_opts/1} +]). + + +-spec parse_rep_doc({[_]}) -> #{}. +parse_rep_doc(RepDoc) -> + {ok, Rep} = try + parse_rep(RepDoc, null) + catch + throw:{error, Reason} -> + Stack = erlang:get_stacktrace(), + LogErr1 = "~p parse_rep_doc fail ~p ~p", + couch_log:error(LogErr1, [?MODULE, Reason, Stack]), + throw({bad_rep_doc, Reason}); + Tag:Err -> + Stack = erlang:get_stacktrace(), + LogErr2 = "~p parse_rep_doc fail ~p:~p ~p", + couch_log:error(LogErr2, [?MODULE, Tag, Err, Stack]), + throw({bad_rep_doc, couch_util:to_binary({Tag, Err})}) + end, + Rep. + + +-spec parse_transient_rep({[_]} | #{}, user_name()) -> {ok, #{}}. +parse_transient_rep({Props} = EJson, UserName) when is_list(Props) -> + Str = couch_util:json_encode(EJson), + Map = couch_util:json_decode(Str, [return_maps]), + parse_transient_rep(Map, UserName); + +parse_transient_rep(#{} = Body, UserName) -> + {ok, Rep} = try + parse_rep(Body, UserName) + catch + throw:{error, Reason} -> + Stack = erlang:get_stacktrace(), + LogErr1 = "~p parse_transient_rep fail ~p ~p", + couch_log:error(LogErr1, [?MODULE, Reason, Stack]), + throw({bad_request, Reason}); + Tag:Err -> + Stack = erlang:get_stacktrace(), + LogErr2 = "~p parse_transient_rep fail ~p ~p", + couch_log:error(LogErr2, [?MODULE, Tag, Err, Stack]), + throw({bad_request, couch_util:to_binary({Tag, Err})}) + end, + #{?OPTIONS := Options} = Rep, + Cancel = maps:get(<<"cancel">>, Options, false), + Id = maps:get(<<"id">>, Options, nil), + case {Cancel, Id} of + {true, nil} -> + % Cancel request with no id, must parse id out of body contents + JobId = couch_replicator_ids:job_id(Rep, null, null), + {ok, JobId, Rep}; + {true, Id} -> + % Cancel request with an id specified, so do not parse id from body + {ok, Id, Rep}; + {false, _Id} -> + JobId = couch_replicator_ids:job_id(Rep, null, null), + % Not a cancel request, regular replication doc + {ok, JobId, Rep} + end. + + +-spec parse_rep({[_]} | #{}, user_name()) -> {ok, #{}}. +parse_rep({Props} = EJson, UserName) when is_list(Props) -> + Str = couch_util:json_encode(EJson), + Map = couch_util:json_decode(Str, [return_maps]), + parse_rep(Map, UserName); + +parse_rep(#{} = Doc, UserName) -> + {SrcProxy, TgtProxy} = parse_proxy_settings(Doc), + Opts = make_options(Doc), + Cancel = maps:get(<<"cancel">>, Opts, false), + Id = maps:get(<<"id">>, Opts, nil), + case Cancel andalso Id =/= nil of + true -> + {ok, #{?OPTIONS => Opts, ?REP_USER => UserName}}; + false -> + case {maps:is_key(?SOURCE, Doc), maps:is_key(?TARGET, Doc)} of + {false, _} -> throw({error, <<"Missing `source` field">>}); + {_, false} -> throw({error, <<"Missing `target` field">>}); + {true, true} -> ok + end, + #{?SOURCE := Source0, ?TARGET := Target0} = Doc, + Source = parse_rep_db(Source0, SrcProxy, Opts), + Target = parse_rep_db(Target0, TgtProxy, Opts), + case couch_replicator_filters:view_type(Doc, Opts) of + {error, Error} -> throw({error, Error}); + _ -> ok + end, + case couch_replicator_filters:parse(Opts) of + {ok, _} -> ok; + {error, FilterError} -> throw({error, FilterError}) + end, + Rep = #{ + ?SOURCE => Source, + ?TARGET => Target, + ?OPTIONS => Opts, + ?REP_USER => UserName, + ?START_TIME => erlang:system_time(second) + }, + {ok, Rep} + end. + + +-spec parse_rep_db(#{}, #{}, #{}) -> #{}. +parse_rep_db(#{} = Endpoint, #{} = ProxyParams, #{} = Options) -> + ProxyUrl = case ProxyParams of + #{<<"proxy_url">> := PUrl} -> PUrl; + _ -> null + end, + + Url0 = maps:get(<<"url">>, Endpoint), + Url = maybe_add_trailing_slash(Url0), + + AuthProps = maps:get(<<"auth">>, Endpoint, #{}), + if is_map(AuthProps) -> ok; true -> + throw({error, "if defined, `auth` must be an object"}) + end, + + Headers0 = maps:get(<<"headers">>, Endpoint, #{}), + if is_map(Headers0) -> ok; true -> + throw({error, "if defined `headers` must be an object"}) + end, + DefaultHeaders = couch_replicator_utils:default_headers_map(), + Headers = maps:merge(DefaultHeaders, Headers0), + + SockOpts = maps:get(<<"socket_options">>, Options, #{}), + SockAndProxy = maps:merge(#{ + <<"socket_options">> => SockOpts + }, ProxyParams), + SslParams = ssl_params(Url), + + #{ + <<"url">> => Url, + <<"auth_props">> => AuthProps, + <<"headers">> => Headers, + <<"ibrowse_options">> => maps:merge(SslParams, SockAndProxy), + <<"timeout">> => maps:get(<<"connection_timeout">>, Options), + <<"http_connections">> => maps:get(<<"http_connections">>, Options), + <<"retries">> => maps:get(<<"retries_per_request">>, Options), + <<"proxy_url">> => ProxyUrl + }; + +parse_rep_db(<<"http://", _/binary>> = Url, Proxy, Options) -> + parse_rep_db(#{<<"url">> => Url}, Proxy, Options); + +parse_rep_db(<<"https://", _/binary>> = Url, Proxy, Options) -> + parse_rep_db(#{<<"url">> => Url}, Proxy, Options); + +parse_rep_db(<<_/binary>>, _Proxy, _Options) -> + throw({error, local_endpoints_not_supported}); + +parse_rep_db(undefined, _Proxy, _Options) -> + throw({error, <<"Missing replication endpoint">>}). + + +parse_proxy_settings(#{} = Doc) -> + Proxy = maps:get(?PROXY, Doc, <<>>), + SrcProxy = maps:get(?SOURCE_PROXY, Doc, <<>>), + TgtProxy = maps:get(?TARGET_PROXY, Doc, <<>>), + + case Proxy =/= <<>> of + true when SrcProxy =/= <<>> -> + Error = "`proxy` is mutually exclusive with `source_proxy`", + throw({error, Error}); + true when TgtProxy =/= <<>> -> + Error = "`proxy` is mutually exclusive with `target_proxy`", + throw({error, Error}); + true -> + {parse_proxy_params(Proxy), parse_proxy_params(Proxy)}; + false -> + {parse_proxy_params(SrcProxy), parse_proxy_params(TgtProxy)} + end. + + +-spec maybe_add_trailing_slash(binary()) -> binary(). +maybe_add_trailing_slash(<<>>) -> + <<>>; + +maybe_add_trailing_slash(Url) when is_binary(Url) -> + case binary:match(Url, <<"?">>) of + nomatch -> + case binary:last(Url) of + $/ -> Url; + _ -> <> + end; + _ -> + Url % skip if there are query params + end. + + +-spec make_options(#{}) -> #{}. +make_options(#{} = RepDoc) -> + Options0 = convert_options(RepDoc), + Options = check_options(Options0), + ConfigOptions = lists:foldl(fun({K, Default, ConversionFun}, Acc) -> + V = ConversionFun(config:get("replicator", K, Default)), + Acc#{list_to_binary(K) => V} + end, #{}, ?CONFIG_DEFAULTS), + maps:merge(ConfigOptions, Options). + + +-spec convert_options(#{}) -> #{} | no_return(). +convert_options(#{} = Doc) -> + maps:fold(fun convert_fold/3, #{}, Doc). + + +-spec convert_fold(binary(), any(), #{}) -> #{}. +convert_fold(<<"cancel">>, V, Acc) when is_boolean(V) -> + Acc#{<<"cancel">> => V}; +convert_fold(<<"cancel">>, _, _) -> + throw({error, <<"`cancel` must be a boolean">>}); +convert_fold(IdOpt, V, Acc) when IdOpt =:= <<"_local_id">>; + IdOpt =:= <<"replication_id">>; IdOpt =:= <<"id">> -> + Acc#{<<"id">> => couch_replicator_ids:convert(V)}; +convert_fold(<<"create_target">>, V, Acc) when is_boolean(V) -> + Acc#{<<"create_target">> => V}; +convert_fold(<<"create_target">>, _, _) -> + throw({error, <<"`create_target` must be a boolean">>}); +convert_fold(<<"create_target_params">>, #{} = V, Acc) -> + Acc#{<<"create_target_params">> => V}; +convert_fold(<<"create_target_params">>, _, _) -> + throw({error, <<"`create_target_params` must be an object">>}); +convert_fold(<<"continuous">>, V, Acc) when is_boolean(V) -> + Acc#{<<"continuous">> => V}; +convert_fold(<<"continuous">>, _, _) -> + throw({error, <<"`continuous` must be a boolean">>}); +convert_fold(<<"filter">>, V, Acc) when is_binary(V), byte_size(V) > 1 -> + Acc#{<<"filter">> => V}; +convert_fold(<<"filter">>, _, _) -> + throw({error, <<"`filter` must be a string">>}); +convert_fold(<<"query_params">>, V, Acc) when is_map(V) orelse V =:= null -> + Acc#{<<"query_params">> => V}; +convert_fold(<<"query_params">>, _, _Acc) -> + throw({error, <<"`query_params` is not `null` or object">>}); +convert_fold(<<"doc_ids">>, null, Acc) -> + Acc; +convert_fold(<<"doc_ids">>, V, Acc) when is_list(V) -> + % Compatibility behaviour as: accept a list of percent encoded doc IDs + Ids = lists:map(fun(Id) -> + case is_binary(Id) andalso byte_size(Id) > 0 of + true -> list_to_binary(couch_httpd:unquote(Id)); + false -> throw({error, <<"`doc_ids` array must contain strings">>}) + end + end, V), + Acc#{<<"doc_ids">> => lists:usort(Ids)}; +convert_fold(<<"doc_ids">>, _, _) -> + throw({error, <<"`doc_ids` must be an array">>}); +convert_fold(<<"selector">>, #{} = V, Acc) -> + Acc#{<<"selector">> => V}; +convert_fold(<<"selector">>, _, _Acc) -> + throw({error, <<"`selector` must be a JSON object">>}); +convert_fold(<<"worker_processes">>, V, Acc) -> + Acc#{<<"worker_processes">> => bin2int(V, <<"worker_processes">>)}; +convert_fold(<<"worker_batch_size">>, V, Acc) -> + Acc#{<<"worker_batch_size">> => bin2int(V, <<"worker_batch_size">>)}; +convert_fold(<<"http_connections">>, V, Acc) -> + Acc#{<<"http_connections">> => bin2int(V, <<"http_connections">>)}; +convert_fold(<<"connection_timeout">>, V, Acc) -> + Acc#{<<"connection_timeout">> => bin2int(V, <<"connection_timeout">>)}; +convert_fold(<<"retries_per_request">>, V, Acc) -> + Acc#{<<"retries_per_request">> => bin2int(V, <<"retries_per_request">>)}; +convert_fold(<<"socket_options">>, V, Acc) -> + Acc#{<<"socket_options">> => parse_sock_opts(V)}; +convert_fold(<<"since_seq">>, V, Acc) -> + Acc#{<<"since_seq">> => V}; +convert_fold(<<"use_checkpoints">>, V, Acc) when is_boolean(V) -> + Acc#{<<"use_checkpoints">> => V}; +convert_fold(<<"use_checkpoints">>, _, _) -> + throw({error, <<"`use_checkpoints` must be a boolean">>}); +convert_fold(<<"checkpoint_interval">>, V, Acc) -> + Acc#{<<"checkpoint_interval">> => bin2int(V, <<"checkpoint_interval">>)}; +convert_fold(_K, _V, Acc) -> % skip unknown option + Acc. + + +bin2int(V, _Field) when is_integer(V) -> + V; + +bin2int(V, Field) when is_binary(V) -> + try + erlang:binary_to_integer(V) + catch + error:badarg -> + throw({error, <<"`", Field/binary, "` must be an integer">>}) + end; + +bin2int(_V, Field) -> + throw({error, <<"`", Field/binary, "` must be an integer">>}). + + +-spec check_options(#{}) -> #{}. +check_options(Options) -> + DocIds = maps:is_key(<<"doc_ids">>, Options), + Filter = maps:is_key(<<"filter">>, Options), + Selector = maps:is_key(<<"selector">>, Options), + case {DocIds, Filter, Selector} of + {false, false, false} -> Options; + {false, false, _} -> Options; + {false, _, false} -> Options; + {_, false, false} -> Options; + _ -> throw({error, <<"`doc_ids`,`filter`,`selector` are mutually " + " exclusive">>}) + end. + + +parse_sock_opts(Term) -> + {ok, SocketOptions} = couch_util:parse_term(Term), + lists:foldl(fun + ({K, V}, Acc) when is_atom(K) -> + case lists:member(K, ?VALID_SOCK_OPTS) of + true -> Acc#{atom_to_binary(K, utf8) => V}; + false -> Acc + end; + (_, Acc) -> + Acc + end, #{}, SocketOptions). + + +-spec parse_proxy_params(binary() | #{}) -> #{}. +parse_proxy_params(<<>>) -> + #{}; +parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl)-> + #url{ + host = Host, + port = Port, + username = User, + password = Passwd, + protocol = Prot0 + } = ibrowse_lib:parse_url(binary_to_list(ProxyUrl)), + Prot = case lists:member(Prot0, ?VALID_PROXY_PROTOCOLS) of + true -> atom_to_binary(Prot0, utf8); + false -> throw({error, <<"Unsupported proxy protocol">>}) + end, + ProxyParams = #{ + <<"proxy_url">> => ProxyUrl, + <<"proxy_protocol">> => Prot, + <<"proxy_host">> => list_to_binary(Host), + <<"proxy_port">> => Port + }, + case is_list(User) andalso is_list(Passwd) of + true -> + ProxyParams#{ + <<"proxy_user">> => list_to_binary(User), + <<"proxy_password">> => list_to_binary(Passwd) + }; + false -> + ProxyParams + end. + + +-spec ssl_params(binary()) -> #{}. +ssl_params(Url) -> + case ibrowse_lib:parse_url(binary_to_list(Url)) of + #url{protocol = https} -> + Depth = list_to_integer( + config:get("replicator", "ssl_certificate_max_depth", "3") + ), + VerifyCerts = config:get("replicator", "verify_ssl_certificates"), + CertFile = config:get("replicator", "cert_file", null), + KeyFile = config:get("replicator", "key_file", null), + Password = config:get("replicator", "password", null), + VerifySslOptions = ssl_verify_options(VerifyCerts =:= "true"), + SslOpts = maps:merge(VerifySslOptions, #{<<"depth">> => Depth}), + HaveCertAndKey = CertFile /= null andalso KeyFile /= null, + SslOpts1 = case HaveCertAndKey of false -> SslOpts; true -> + CertOpts0 = #{ + <<"certfile">> => list_to_binary(CertFile), + <<"keyfile">> => list_to_binary(KeyFile) + }, + CertOpts = case Password of null -> CertOpts0; _ -> + CertOpts0#{<<"password">> => list_to_binary(Password)} + end, + maps:merge(SslOpts, CertOpts) + end, + #{<<"is_ssl">> => true, <<"ssl_options">> => SslOpts1}; + #url{protocol = http} -> + #{} + end. + + +-spec ssl_verify_options(true | false) -> [_]. +ssl_verify_options(true) -> + case config:get("replicator", "ssl_trusted_certificates_file") of + undefined -> + #{ + <<"verify">> => <<"verify_peer">>, + <<"cacertfile">> => null + }; + CAFile when is_list(CAFile) -> + #{ + <<"verify">> => <<"verify_peer">>, + <<"cacertfile">> => list_to_binary(CAFile) + } + end; + +ssl_verify_options(false) -> + #{ + <<"verify">> => <<"verify_none">> + }. + + +-ifdef(TEST). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +check_options_pass_values_test() -> + ?assertEqual(check_options(#{}), #{}), + ?assertEqual(check_options(#{<<"baz">> => <<"foo">>}), + #{<<"baz">> => <<"foo">>}), + ?assertEqual(check_options(#{<<"doc_ids">> => [<<"x">>]}), + #{<<"doc_ids">> => [<<"x">>]}), + ?assertEqual(check_options(#{<<"filter">> => <<"f">>}), + #{<<"filter">> => <<"f">>}), + ?assertEqual(check_options(#{<<"selector">> => <<"s">>}), + #{<<"selector">> => <<"s">>}). + + +check_options_fail_values_test() -> + ?assertThrow({error, _}, + check_options(#{<<"doc_ids">> => [], <<"filter">> => <<"f">>})), + ?assertThrow({error, _}, + check_options(#{<<"doc_ids">> => [], <<"selector">> => <<"s">>})), + ?assertThrow({error, _}, + check_options(#{<<"filter">> => <<"f">>, <<"selector">> => <<"s">>})), + ?assertThrow({error, _}, + check_options(#{ + <<"doc_ids">> => [], + <<"filter">> => <<"f">>, + <<"selector">> => <<"s">>} + )). + + +check_convert_options_pass_test() -> + ?assertEqual(#{}, convert_options(#{})), + ?assertEqual(#{}, convert_options(#{<<"random">> => 42})), + ?assertEqual(#{<<"cancel">> => true}, + convert_options(#{<<"cancel">> => true})), + ?assertEqual(#{<<"create_target">> => true}, + convert_options(#{<<"create_target">> => true})), + ?assertEqual(#{<<"continuous">> => true}, + convert_options(#{<<"continuous">> => true})), + ?assertEqual(#{<<"doc_ids">> => [<<"id">>]}, + convert_options(#{<<"doc_ids">> => [<<"id">>]})), + ?assertEqual(#{<<"selector">> => #{<<"key">> => <<"value">>}}, + convert_options(#{<<"selector">> => #{<<"key">> => <<"value">>}})). + + +check_convert_options_fail_test() -> + ?assertThrow({error, _}, + convert_options(#{<<"cancel">> => <<"true">>})), + ?assertThrow({error, _}, + convert_options(#{<<"create_target">> => <<"true">>})), + ?assertThrow({error, _}, + convert_options(#{<<"continuous">> => <<"true">>})), + ?assertThrow({error, _}, + convert_options(#{<<"doc_ids">> => <<"not_a_list">>})), + ?assertThrow({error, _}, + convert_options(#{<<"selector">> => <<"bad">>})). + + +local_replication_endpoint_error_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_error_on_local_endpoint) + ] + }. + + +setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end). + + +teardown(_) -> + meck:unload(). + + +t_error_on_local_endpoint(_) -> + RepDoc = {[ + {<<"_id">>, <<"someid">>}, + {<<"source">>, <<"localdb">>}, + {<<"target">>, <<"http://somehost.local/tgt">>} + ]}, + Expect = local_endpoints_not_supported, + ?assertThrow({bad_rep_doc, Expect}, parse_rep_doc(RepDoc)). + + +-endif. -- cgit v1.2.1 From 941cfc3d7b33cbfbf7e95eb7db388515d0595399 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:33:05 -0400 Subject: Handle option maps in lower level modules The goal is to keep everything below the _api_wrap module level relatively intact. To achieve that handle option maps in some places, or translate back to a proplist or `#httpd{}` records in others. The `couch_replicator_api:db_from_json/1` function is where endpoint map object from a `Rep` object are translated into `#httpdb{}` records. Headers are translated back to lists and ibrowse options into proplist with atom keys. --- .../src/couch_replicator_api_wrap.erl | 164 ++++++++++++--------- .../src/couch_replicator_changes_reader.erl | 9 +- .../src/couch_replicator_filters.erl | 54 ++++--- 3 files changed, 127 insertions(+), 100 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_api_wrap.erl b/src/couch_replicator/src/couch_replicator_api_wrap.erl index a21de4242..da6f28800 100644 --- a/src/couch_replicator/src/couch_replicator_api_wrap.erl +++ b/src/couch_replicator/src/couch_replicator_api_wrap.erl @@ -28,7 +28,6 @@ db_close/1, get_db_info/1, get_pending_count/2, - get_view_info/3, update_doc/3, update_doc/4, update_docs/3, @@ -39,39 +38,28 @@ open_doc_revs/6, changes_since/5, db_uri/1, - normalize_db/1 + db_from_json/1 ]). --import(couch_replicator_httpc, [ - send_req/3 - ]). - --import(couch_util, [ - encode_doc_id/1, - get_value/2, - get_value/3 - ]). -define(MAX_WAIT, 5 * 60 * 1000). -define(MAX_URL_LEN, 7000). -define(MIN_URL_LEN, 200). -db_uri(#httpdb{url = Url}) -> +db_uri(#{<<"url">> := Url}) -> couch_util:url_strip_password(Url); -db_uri(DbName) when is_binary(DbName) -> - ?b2l(DbName); +db_uri(#httpdb{url = Url}) -> + couch_util:url_strip_password(Url). -db_uri(Db) -> - db_uri(couch_db:name(Db)). +db_open(#{} = Db) -> + db_open(Db, false, #{}). -db_open(Db) -> - db_open(Db, false, []). -db_open(#httpdb{} = Db1, Create, CreateParams) -> - {ok, Db} = couch_replicator_httpc:setup(Db1), +db_open(#{} = Db0, Create, #{} = CreateParams) when is_boolean(Create) -> + {ok, Db} = couch_replicator_httpc:setup(db_from_json(Db0)), try case Create of false -> @@ -149,14 +137,6 @@ get_pending_count(#httpdb{} = Db, Seq) -> {ok, couch_util:get_value(<<"pending">>, Props, null)} end). -get_view_info(#httpdb{} = Db, DDocId, ViewName) -> - Path = io_lib:format("~s/_view/~s/_info", [DDocId, ViewName]), - send_req(Db, [{path, Path}], - fun(200, _, {Props}) -> - {VInfo} = couch_util:get_value(<<"view_index">>, Props, {[]}), - {ok, VInfo} - end). - ensure_full_commit(#httpdb{} = Db) -> send_req( @@ -434,9 +414,9 @@ changes_since(#httpdb{headers = Headers1, timeout = InactiveTimeout} = HttpDb, {undefined, undefined} -> QArgs1 = maybe_add_changes_filter_q_args(BaseQArgs, Options), {QArgs1, get, [], Headers1}; - {undefined, _} when is_tuple(Selector) -> + {undefined, #{}} -> Headers2 = [{"Content-Type", "application/json"} | Headers1], - JsonSelector = ?JSON_ENCODE({[{<<"selector">>, Selector}]}), + JsonSelector = ?JSON_ENCODE(#{<<"selector">> => Selector}), {[{"filter", "_selector"} | BaseQArgs], post, JsonSelector, Headers2}; {_, undefined} when is_list(DocIds) -> Headers2 = [{"Content-Type", "application/json"} | Headers1], @@ -496,7 +476,8 @@ maybe_add_changes_filter_q_args(BaseQS, Options) -> ViewFields0 = [atom_to_list(F) || F <- record_info(fields, mrargs)], ViewFields = ["key" | ViewFields0], - {Params} = get_value(query_params, Options, {[]}), + ParamsMap = #{} = get_value(query_params, Options, #{}), + Params = maps:to_list(ParamsMap), [{"filter", ?b2l(FilterName)} | lists:foldl( fun({K, V}, QSAcc) -> Ks = couch_util:to_list(K), @@ -546,7 +527,7 @@ options_to_query_args(HttpDb, Path, Options0) -> length("GET " ++ FullUrl ++ " HTTP/1.1\r\n") + length("&atts_since=") + 6, % +6 = % encoded [ and ] PAs, MaxLen, []), - [{"atts_since", ?JSON_ENCODE(RevList)} | QueryArgs1] + [{"atts_since", ?b2l(iolist_to_binary(?JSON_ENCODE(RevList)))} | QueryArgs1] end. @@ -787,7 +768,7 @@ json_to_doc_info({Props}) -> RevsInfo0 = lists:map( fun({Change}) -> Rev = couch_doc:parse_rev(get_value(<<"rev">>, Change)), - Del = couch_replicator_utils:is_deleted(Change), + Del = get_value(<<"deleted">>, Change, false), #rev_info{rev=Rev, deleted=Del} end, Changes), @@ -895,52 +876,95 @@ header_value(Key, Headers, Default) -> end. -% Normalize an #httpdb{} or #db{} record such that it can be used for -% comparisons. This means remove things like pids and also sort options / props. -normalize_db(#httpdb{} = HttpDb) -> +maybe_append_create_query_params(Db, Params) when map_size(Params) == 0 -> + Db; + +maybe_append_create_query_params(Db, #{} = Params) -> + ParamList = maps:to_list(Params), + NewUrl = Db#httpdb.url ++ "?" ++ mochiweb_util:urlencode(ParamList), + Db#httpdb{url = NewUrl}. + + +db_from_json(#{} = DbMap) -> + #{ + <<"url">> := Url, + <<"auth_props">> := Auth, + <<"headers">> := Headers0, + <<"ibrowse_options">> := IBrowseOptions0, + <<"timeout">> := Timeout, + <<"http_connections">> := HttpConnections, + <<"retries">> := Retries, + <<"proxy_url">> := ProxyUrl0 + } = DbMap, + Headers = maps:fold(fun(K, V, Acc) -> + [{binary_to_list(K), binary_to_list(V)} | Acc] + end, [], Headers0), + IBrowseOptions = maps:fold(fun + (<<"socket_options">>, #{} = SockOpts, Acc) -> + SockOptsKVs = maps:fold(fun sock_opts_fold/3, [], SockOpts), + [{socket_options, SockOptsKVs} | Acc]; + (<<"ssl_options">>, #{} = SslOpts, Acc) -> + SslOptsKVs = maps:fold(fun ssl_opts_fold/3, [], SslOpts), + [{ssl_options, SslOptsKVs} | Acc]; + (K, V, Acc) when is_binary(V) -> + [{binary_to_atom(K, utf8), binary_to_list(V)} | Acc]; + (K, V, Acc) -> + [{binary_to_atom(K, utf8), V} | Acc] + end, [], IBrowseOptions0), + ProxyUrl = case ProxyUrl0 of + null -> undefined; + V when is_binary(V) -> binary_to_list(V) + end, #httpdb{ - url = HttpDb#httpdb.url, - auth_props = lists:sort(HttpDb#httpdb.auth_props), - headers = lists:keysort(1, HttpDb#httpdb.headers), - timeout = HttpDb#httpdb.timeout, - ibrowse_options = lists:keysort(1, HttpDb#httpdb.ibrowse_options), - retries = HttpDb#httpdb.retries, - http_connections = HttpDb#httpdb.http_connections - }; + url = binary_to_list(Url), + auth_props = maps:to_list(Auth), + headers = Headers, + ibrowse_options = IBrowseOptions, + timeout = Timeout, + http_connections = HttpConnections, + retries = Retries, + proxy_url = ProxyUrl + }. -normalize_db(<>) -> - DbName. +send_req(#httpdb{} = HttpDb, Opts, Callback) when is_function(Callback) -> + couch_replicator_httpc:send_req(HttpDb, Opts, Callback). -maybe_append_create_query_params(Db, []) -> - Db; -maybe_append_create_query_params(Db, CreateParams) -> - NewUrl = Db#httpdb.url ++ "?" ++ mochiweb_util:urlencode(CreateParams), - Db#httpdb{url = NewUrl}. +get_value(K, Props) -> + couch_util:get_value(K, Props). + + +get_value(K, Props, Default) -> + couch_util:get_value(K, Props, Default). --ifdef(TEST). +encode_doc_id(DocId) -> + couch_util:encode_doc_id(DocId). --include_lib("eunit/include/eunit.hrl"). -normalize_http_db_test() -> - HttpDb = #httpdb{ - url = "http://host/db", - auth_props = [{"key", "val"}], - headers = [{"k2","v2"}, {"k1","v1"}], - timeout = 30000, - ibrowse_options = [{k2, v2}, {k1, v1}], - retries = 10, - http_connections = 20 - }, - Expected = HttpDb#httpdb{ - headers = [{"k1","v1"}, {"k2","v2"}], - ibrowse_options = [{k1, v1}, {k2, v2}] - }, - ?assertEqual(Expected, normalize_db(HttpDb)), - ?assertEqual(<<"local">>, normalize_db(<<"local">>)). +% See couch_replicator_docs:ssl_params/1 for ssl parsed options +% and http://erlang.org/doc/man/ssl.html#type-server_option +% all latest SSL server options +% +ssl_opts_fold(K, V, Acc) when is_boolean(V); is_integer(V) -> + [{binary_to_atom(K, utf8), V} | Acc]; + +ssl_opts_fold(K, null, Acc) -> + [{binary_to_atom(K, utf8), undefined} | Acc]; + +ssl_opts_fold(<<"verify">>, V, Acc) -> + [{verify, binary_to_atom(V, utf8)} | Acc]; +ssl_opts_fold(K, V, Acc) when is_list(V) -> + [{binary_to_atom(K, utf8), binary_to_list(V)} | Acc]. + + +% See ?VALID_SOCK_OPTS in couch_replicator_docs for accepted socket options +% +sock_opts_fold(K, V, Acc) when is_binary(V) -> + [{binary_to_atom(K, utf8), binary_to_atom(V, utf8)} | Acc]; --endif. +sock_opts_fold(K, V, Acc) when is_boolean(V); is_integer(V) -> + [{binary_to_atom(K, utf8), V} | Acc]. diff --git a/src/couch_replicator/src/couch_replicator_changes_reader.erl b/src/couch_replicator/src/couch_replicator_changes_reader.erl index 2e4df5365..6adf1af5e 100644 --- a/src/couch_replicator/src/couch_replicator_changes_reader.erl +++ b/src/couch_replicator/src/couch_replicator_changes_reader.erl @@ -22,11 +22,8 @@ -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). -include("couch_replicator.hrl"). --import(couch_util, [ - get_value/2 -]). -start_link(StartSeq, #httpdb{} = Db, ChangesQueue, Options) -> +start_link(StartSeq, #httpdb{} = Db, ChangesQueue, #{} = Options) -> Parent = self(), {ok, spawn_link(fun() -> put(last_seq, StartSeq), @@ -41,12 +38,12 @@ start_link(StartSeq, Db, ChangesQueue, Options) -> end)}. read_changes(Parent, StartSeq, Db, ChangesQueue, Options) -> - Continuous = couch_util:get_value(continuous, Options), + Continuous = maps:get(<<"continuous">>, Options, false), try couch_replicator_api_wrap:changes_since(Db, all_docs, StartSeq, fun(Item) -> process_change(Item, {Parent, Db, ChangesQueue, Continuous}) - end, Options), + end, couch_replicator_utils:proplist_options(Options)), couch_work_queue:close(ChangesQueue) catch throw:recurse -> diff --git a/src/couch_replicator/src/couch_replicator_filters.erl b/src/couch_replicator/src/couch_replicator_filters.erl index c8980001a..50c37335d 100644 --- a/src/couch_replicator/src/couch_replicator_filters.erl +++ b/src/couch_replicator/src/couch_replicator_filters.erl @@ -20,6 +20,7 @@ ]). -include_lib("couch/include/couch_db.hrl"). +-include("couch_replicator.hrl"). % Parse the filter from replication options proplist. @@ -27,17 +28,17 @@ % For `user` filter, i.e. filters specified as user code % in source database, this code doesn't fetch the filter % code, but only returns the name of the filter. --spec parse([_]) -> +-spec parse(#{}) -> {ok, nil} | {ok, {view, binary(), {[_]}}} | {ok, {user, {binary(), binary()}, {[_]}}} | {ok, {docids, [_]}} | {ok, {mango, {[_]}}} | {error, binary()}. -parse(Options) -> - Filter = couch_util:get_value(filter, Options), - DocIds = couch_util:get_value(doc_ids, Options), - Selector = couch_util:get_value(selector, Options), +parse(#{} = Options) -> + Filter = maps:get(<<"filter">>, Options, undefined), + DocIds = maps:get(<<"doc_ids">>, Options, undefined), + Selector = maps:get(<<"selector">>, Options, undefined), case {Filter, DocIds, Selector} of {undefined, undefined, undefined} -> {ok, nil}; @@ -53,7 +54,10 @@ parse(Options) -> {undefined, _, undefined} -> {ok, {docids, DocIds}}; {undefined, undefined, _} -> - {ok, {mango, ejsort(mango_selector:normalize(Selector))}}; + % Translate it to proplist as normalize doesn't know how + % to handle maps + Selector1 = ?JSON_DECODE(?JSON_ENCODE(Selector)), + {ok, {mango, ejsort(mango_selector:normalize(Selector1))}}; _ -> Err = "`selector`, `filter` and `doc_ids` are mutually exclusive", {error, list_to_binary(Err)} @@ -88,22 +92,24 @@ fetch(DDocName, FilterName, Source) -> % Get replication type and view (if any) from replication document props --spec view_type([_], [_]) -> - {view, {binary(), binary()}} | {db, nil} | {error, binary()}. -view_type(Props, Options) -> - case couch_util:get_value(<<"filter">>, Props) of - <<"_view">> -> - {QP} = couch_util:get_value(query_params, Options, {[]}), - ViewParam = couch_util:get_value(<<"view">>, QP), - case re:split(ViewParam, <<"/">>) of - [DName, ViewName] -> - {view, {<< "_design/", DName/binary >>, ViewName}}; - _ -> - {error, <<"Invalid `view` parameter.">>} - end; +-spec view_type(#{}, #{}) -> + {binary(), #{}} | {error, binary()}. +view_type(#{?FILTER := <<"_view">>}, #{} = Options) -> + QP = maps:get(<<"query_params">>, Options, #{}), + ViewParam = maps:get(<<"view">>, QP, <<>>), + case re:split(ViewParam, <<"/">>) of + [DName, ViewName] -> + DDocMap = #{ + <<"ddoc">> => <<"_design/",DName/binary>>, + <<"view">> => ViewName + }, + {<<"view">>, DDocMap}; _ -> - {db, nil} - end. + {error, <<"Invalid `view` parameter.">>} + end; + +view_type(#{}, #{}) -> + {<<"db">>, #{}}. % Private functions @@ -151,9 +157,9 @@ fetch_internal(DDocName, FilterName, Source) -> end. --spec query_params([_]) -> {[_]}. -query_params(Options)-> - couch_util:get_value(query_params, Options, {[]}). +-spec query_params(#{}) -> #{}. +query_params(#{} = Options)-> + maps:get(<<"query_params">>, Options, #{}). parse_user_filter(Filter) -> -- cgit v1.2.1 From 276d19731bc5df73838f40efc126f1f709e04fbe Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:33:11 -0400 Subject: Update couch_replicator_ids This module is responsible for calculating replication IDs. It inspects all the replication options which may affect the replication results and hashes them into a single ID. CouchDB replicator tries to maintain compatibility with older versions of itself so it keep tracks of how to calculate replication IDs used by previous version of CouchDB. Replication ID calculation algorithms have their own version, the latest one is at version 4. One of the goals of this update is to not alter the replication ID algorithm and keep it at version 4, such that for all the same parameters the replication IDs should stay the same as they would be on CouchDB <= 3.x. That is why in some internal function, options maps and binares are turned back into proplist and tuples before hashing is performed. There is a unit tests which asserts that the replication ID calcuated with this update matches what was calcuated in CouchDB 3.x. Internal representation of the replication ID has changed slighly. Previously it was represented by a tuple of `{BaseId, ExtId}`, where `BaseId` was the ID without any options such as `continuous` or `create_target`, and `ExtId` was the concatenated list of those options. In most cases it was useful to operate on the full ID and in only a few place the `BaseId` was needed. So the calculation function was updated to return `{RepId, BaseId}` instead. `RepId` is a binary that is the full relication ID (base + extensions) and `BaseId` is just the base. The function which calculated the base ID was updated to actually be called `base_id/2` as opposed to `replication_id/2`. Another update to the module is a function which calculates replication job IDs. A `JobId` is used to identify replication jobs in the `couch_jobs` API. A `JobId`, unlike a `RepId` never requires making a network round-trip to calculate. For replications created from `_replicator` docs, `JobId` is defined as the concatenation of the database instance UUID and document ID. For a transient jobs it is calculated by hashing the source, target endpoint parameters, replication options. In fact, it is almost the same as a replication ID, with one important difference that the filter design doc name and function name are used instead of the contents of the filter from the source, so no network round-trip is necessary to calculate it. --- src/couch_replicator/src/couch_replicator_ids.erl | 202 +++++++++++++++------- 1 file changed, 141 insertions(+), 61 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_ids.erl b/src/couch_replicator/src/couch_replicator_ids.erl index 04e71c3ef..d1cbe571c 100644 --- a/src/couch_replicator/src/couch_replicator_ids.erl +++ b/src/couch_replicator/src/couch_replicator_ids.erl @@ -14,7 +14,9 @@ -export([ replication_id/1, - replication_id/2, + base_id/2, + job_id/3, + job_id/2, convert/1 ]). @@ -30,28 +32,31 @@ % {filter_fetch_error, Error} exception. % -replication_id(#rep{options = Options} = Rep) -> - BaseId = replication_id(Rep, ?REP_ID_VERSION), - {BaseId, maybe_append_options([continuous, create_target], Options)}. +replication_id(#{?OPTIONS := Options} = Rep) -> + BaseId = base_id(Rep, ?REP_ID_VERSION), + UseOpts = [<<"continuous">>, <<"create_target">>], + ExtId = maybe_append_options(UseOpts, Options), + RepId = iolist_to_binary([BaseId, ExtId]), + {RepId, BaseId}. % Versioned clauses for generating replication IDs. % If a change is made to how replications are identified, % please add a new clause and increase ?REP_ID_VERSION. -replication_id(#rep{} = Rep, 4) -> +base_id(#{?SOURCE := Src, ?TARGET := Tgt} = Rep, 4) -> UUID = couch_server:get_uuid(), - SrcInfo = get_v4_endpoint(Rep#rep.source), - TgtInfo = get_v4_endpoint(Rep#rep.target), + SrcInfo = get_v4_endpoint(Src), + TgtInfo = get_v4_endpoint(Tgt), maybe_append_filters([UUID, SrcInfo, TgtInfo], Rep); -replication_id(#rep{} = Rep, 3) -> +base_id(#{?SOURCE := Src0, ?TARGET := Tgt0} = Rep, 3) -> UUID = couch_server:get_uuid(), - Src = get_rep_endpoint(Rep#rep.source), - Tgt = get_rep_endpoint(Rep#rep.target), + Src = get_rep_endpoint(Src0), + Tgt = get_rep_endpoint(Tgt0), maybe_append_filters([UUID, Src, Tgt], Rep); -replication_id(#rep{} = Rep, 2) -> +base_id(#{?SOURCE := Src0, ?TARGET := Tgt0} = Rep, 2) -> {ok, HostName} = inet:gethostname(), Port = case (catch mochiweb_socket_server:get(couch_httpd, port)) of P when is_number(P) -> @@ -64,47 +69,76 @@ replication_id(#rep{} = Rep, 2) -> % ... mochiweb_socket_server:get(https, port) list_to_integer(config:get("httpd", "port", "5984")) end, - Src = get_rep_endpoint(Rep#rep.source), - Tgt = get_rep_endpoint(Rep#rep.target), + Src = get_rep_endpoint(Src0), + Tgt = get_rep_endpoint(Tgt0), maybe_append_filters([HostName, Port, Src, Tgt], Rep); -replication_id(#rep{} = Rep, 1) -> +base_id(#{?SOURCE := Src0, ?TARGET := Tgt0} = Rep, 1) -> {ok, HostName} = inet:gethostname(), - Src = get_rep_endpoint(Rep#rep.source), - Tgt = get_rep_endpoint(Rep#rep.target), + Src = get_rep_endpoint(Src0), + Tgt = get_rep_endpoint(Tgt0), maybe_append_filters([HostName, Src, Tgt], Rep). --spec convert([_] | binary() | {string(), string()}) -> {string(), string()}. -convert(Id) when is_list(Id) -> - convert(?l2b(Id)); +-spec job_id(#{}, binary() | null, binary() | null) -> binary(). +job_id(#{} = Rep, null = _DbUUID, null = _DocId) -> + #{ + ?SOURCE := Src, + ?TARGET := Tgt, + ?REP_USER := UserName, + ?OPTIONS := Options + } = Rep, + UUID = couch_server:get_uuid(), + SrcInfo = get_v4_endpoint(Src), + TgtInfo = get_v4_endpoint(Tgt), + UseOpts = [<<"continuous">>, <<"create_target">>], + Opts = maybe_append_options(UseOpts, Options), + IdParts = [UUID, SrcInfo, TgtInfo, UserName, Opts], + maybe_append_filters(IdParts, Rep, false); + +job_id(#{} = _Rep, DbUUID, DocId) when is_binary(DbUUID), is_binary(DocId) -> + job_id(DbUUID, DocId). + + +-spec job_id(binary(), binary()) -> binary(). +job_id(DbUUID, DocId) when is_binary(DbUUID), is_binary(DocId) -> + <>. + + +-spec convert(binary()) -> binary(). convert(Id0) when is_binary(Id0) -> % Spaces can result from mochiweb incorrectly unquoting + characters from % the URL path. So undo the incorrect parsing here to avoid forcing % users to url encode + characters. - Id = binary:replace(Id0, <<" ">>, <<"+">>, [global]), - lists:splitwith(fun(Char) -> Char =/= $+ end, ?b2l(Id)); -convert({BaseId, Ext} = Id) when is_list(BaseId), is_list(Ext) -> - Id. + binary:replace(Id0, <<" ">>, <<"+">>, [global]). % Private functions -maybe_append_filters(Base, - #rep{source = Source, options = Options}) -> +maybe_append_filters(Base, #{} = Rep) -> + maybe_append_filters(Base, Rep, true). + + +maybe_append_filters(Base, #{} = Rep, FetchFilter) -> + #{ + ?SOURCE := Source, + ?OPTIONS := Options + } = Rep, Base2 = Base ++ case couch_replicator_filters:parse(Options) of {ok, nil} -> []; {ok, {view, Filter, QueryParams}} -> [Filter, QueryParams]; - {ok, {user, {Doc, Filter}, QueryParams}} -> + {ok, {user, {Doc, Filter}, QueryParams}} when FetchFilter =:= true -> case couch_replicator_filters:fetch(Doc, Filter, Source) of {ok, Code} -> [Code, QueryParams]; {error, Error} -> throw({filter_fetch_error, Error}) end; + {ok, {user, {Doc, Filter}, QueryParams}} when FetchFilter =:= false -> + [Doc, Filter, QueryParams]; {ok, {docids, DocIds}} -> [DocIds]; {ok, {mango, Selector}} -> @@ -112,27 +146,33 @@ maybe_append_filters(Base, {error, FilterParseError} -> throw({error, FilterParseError}) end, - couch_util:to_hex(couch_hash:md5_hash(term_to_binary(Base2))). + Res = couch_util:to_hex(couch_hash:md5_hash(term_to_binary(Base2))), + list_to_binary(Res). -maybe_append_options(Options, RepOptions) -> +maybe_append_options(Options, #{} = RepOptions) -> lists:foldl(fun(Option, Acc) -> Acc ++ - case couch_util:get_value(Option, RepOptions, false) of - true -> - "+" ++ atom_to_list(Option); - false -> - "" + case maps:get(Option, RepOptions, false) of + true -> "+" ++ binary_to_list(Option); + false -> "" end end, [], Options). -get_rep_endpoint(#httpdb{url=Url, headers=Headers}) -> +get_rep_endpoint(#{<<"url">> := Url0, <<"headers">> := Headers0}) -> + % We turn everything to lists and proplists to calculate the same + % replication ID as CouchDB <= 3.x + Url = binary_to_list(Url0), + Headers1 = maps:fold(fun(K, V, Acc) -> + [{binary_to_list(K), binary_to_list(V)} | Acc] + end, [], Headers0), + Headers2 = lists:keysort(1, Headers1), DefaultHeaders = (#httpdb{})#httpdb.headers, - {remote, Url, Headers -- DefaultHeaders}. + {remote, Url, Headers2 -- DefaultHeaders}. -get_v4_endpoint(#httpdb{} = HttpDb) -> +get_v4_endpoint(#{} = HttpDb) -> {remote, Url, Headers} = get_rep_endpoint(HttpDb), {{UserFromHeaders, _}, HeadersWithoutBasicAuth} = couch_replicator_utils:remove_basic_auth_from_headers(Headers), @@ -184,92 +224,132 @@ get_non_default_port(_Schema, Port) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). replication_id_convert_test_() -> [?_assertEqual(Expected, convert(Id)) || {Expected, Id} <- [ - {{"abc", ""}, "abc"}, - {{"abc", ""}, <<"abc">>}, - {{"abc", "+x+y"}, <<"abc+x+y">>}, - {{"abc", "+x+y"}, {"abc", "+x+y"}}, - {{"abc", "+x+y"}, <<"abc x y">>} + {<<"abc">>, <<"abc">>}, + {<<"abc+x">>, <<"abc+x">>}, + {<<"abc+x">>, <<"abc x">>}, + {<<"abc+x+y">>, <<"abc+x+y">>}, + {<<"abc+x+y">>, <<"abc x y">>} ]]. + http_v4_endpoint_test_() -> [?_assertMatch({remote, User, Host, Port, Path, HeadersNoAuth, undefined}, - get_v4_endpoint(#httpdb{url = Url, headers = Headers})) || + get_v4_endpoint(#{<<"url">> => Url, <<"headers">> => Headers})) || {{User, Host, Port, Path, HeadersNoAuth}, {Url, Headers}} <- [ { {undefined, "host", default, "/", []}, - {"http://host", []} + {<<"http://host">>, #{}} }, { {undefined, "host", default, "/", []}, - {"https://host", []} + {<<"https://host">>, #{}} }, { {undefined, "host", default, "/", []}, - {"http://host:5984", []} + {<<"http://host:5984">>, #{}} }, { {undefined, "host", 1, "/", []}, - {"http://host:1", []} + {<<"http://host:1">>, #{}} }, { {undefined, "host", 2, "/", []}, - {"https://host:2", []} + {<<"https://host:2">>, #{}} }, { - {undefined, "host", default, "/", [{"h","v"}]}, - {"http://host", [{"h","v"}]} + {undefined, "host", default, "/", [{"h", "v"}]}, + {<<"http://host">>, #{<<"h">> => <<"v">>}} }, { {undefined, "host", default, "/a/b", []}, - {"http://host/a/b", []} + {<<"http://host/a/b">>, #{}} }, { {"user", "host", default, "/", []}, - {"http://user:pass@host", []} + {<<"http://user:pass@host">>, #{}} }, { {"user", "host", 3, "/", []}, - {"http://user:pass@host:3", []} + {<<"http://user:pass@host:3">>, #{}} }, { {"user", "host", default, "/", []}, - {"http://user:newpass@host", []} + {<<"http://user:newpass@host">>, #{}} }, { {"user", "host", default, "/", []}, - {"http://host", [basic_auth("user","pass")]} + {<<"http://host">>, basic_auth(<<"user">>, <<"pass">>)} }, { {"user", "host", default, "/", []}, - {"http://host", [basic_auth("user","newpass")]} + {<<"http://host">>, basic_auth(<<"user">>, <<"newpass">>)} }, { {"user1", "host", default, "/", []}, - {"http://user1:pass1@host", [basic_auth("user2","pass2")]} + {<<"http://user1:pass1@host">>, basic_auth(<<"user2">>, + <<"pass2">>)} }, { {"user", "host", default, "/", [{"h", "v"}]}, - {"http://host", [{"h", "v"}, basic_auth("user","pass")]} + {<<"http://host">>, maps:merge(#{<<"h">> => <<"v">>}, + basic_auth(<<"user">>, <<"pass">>))} }, { {undefined, "random_junk", undefined, undefined}, - {"random_junk", []} + {<<"random_junk">>, #{}} }, { {undefined, "host", default, "/", []}, - {"http://host", [{"Authorization", "Basic bad"}]} + {<<"http://host">>, #{<<"Authorization">> => + <<"Basic bad">>}} } ] ]. basic_auth(User, Pass) -> - B64Auth = base64:encode_to_string(User ++ ":" ++ Pass), - {"Authorization", "Basic " ++ B64Auth}. + B64Auth = base64:encode(<>), + #{<<"Authorization">> => <<"Basic ", B64Auth/binary>>}. + + +version4_matches_couchdb3_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(id_matches_couchdb3) + ] + }. + + +setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end). + + +teardown(_) -> + meck:unload(). + + +id_matches_couchdb3(_) -> + {ok, Rep} = couch_replicator_parse:parse_rep(#{ + <<"source">> => <<"http://adm:pass@127.0.0.1/abc">>, + <<"target">> => <<"http://adm:pass@127.0.0.1/xyz">>, + <<"create_target">> => true, + <<"continuous">> => true + }, null), + meck:expect(couch_server, get_uuid, 0, "somefixedid"), + {RepId, BaseId} = replication_id(Rep), + % Calculated on CouchDB 3.x + RepId3x = <<"ff71e1208f93ba054eb60e7ca8683fe4+continuous+create_target">>, + BaseId3x = <<"ff71e1208f93ba054eb60e7ca8683fe4">>, + ?assertEqual(RepId3x, RepId), + ?assertEqual(BaseId3x, BaseId). -endif. -- cgit v1.2.1 From 3c9b7540cbb41225b35c89b741e0c5b83cdbf4e1 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:33:18 -0400 Subject: Introduce couch_replicator_jobs abstraction module This is the `couch_jobs` abstraction module. All replicator calls to `couch_jobs` should go through it. This module takes care of adding types to some of the API calls, handles maintencence of the RepId -> JobId mappings when jobs are added and removed, and some subscription logic. `fabric2.hrl` include file is updated with the definition of the `?REPLICATION_IDS` prefix where the RepId -> JobId keyspace lives. --- src/couch_replicator/src/couch_replicator_jobs.erl | 312 +++++++++++++++++++++ src/fabric/include/fabric2.hrl | 1 + 2 files changed, 313 insertions(+) create mode 100644 src/couch_replicator/src/couch_replicator_jobs.erl diff --git a/src/couch_replicator/src/couch_replicator_jobs.erl b/src/couch_replicator/src/couch_replicator_jobs.erl new file mode 100644 index 000000000..a602b0c62 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_jobs.erl @@ -0,0 +1,312 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_jobs). + + +-export([ + % couch_jobs type timeouts + set_timeout/0, + get_timeout/0, + + % Job creation and querying + new_job/7, + add_job/3, + remove_job/2, + get_job_data/2, + fold_jobs/3, + pending_count/2, + + % Job subscription + wait_running/1, + wait_result/1, + + % Job execution + accept_job/1, + update_job_data/3, + finish_job/3, + reschedule_job/4, + + % (..., ?REPLICATION_IDS) -> JobId handling + try_update_rep_id/3, + update_rep_id/3, + clear_old_rep_id/3, + get_job_id/2, + + % Debug functions + remove_jobs/2, + get_job_ids/1 +]). + + +-include("couch_replicator.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +-define(REP_JOBS, <<"rep_jobs">>). +-define(REP_JOBS_TIMEOUT_SEC, 61). + + +% Data model +% ---------- +% +% State kept in couch_jobs under the ?REP_JOBS type +% +% Job IDs are defined as: +% * Replicator DB instance UUID + doc ID for persistent replications +% * Hash(username|source|target|options) for transient replications +% +% To map replication IDs to couch_job jobs, there is a separate index that +% looks like: +% (?REPLICATION_IDS, RepId) -> JobId +% + +set_timeout() -> + couch_jobs:set_type_timeout(?REP_JOBS, ?REP_JOBS_TIMEOUT_SEC). + + +get_timeout() -> + ?REP_JOBS_TIMEOUT_SEC. + + +new_job(#{} = Rep, DbName, DbUUID, DocId, State, StateInfo, DocState) -> + NowSec = erlang:system_time(second), + AddedEvent = #{?HIST_TYPE => ?HIST_ADDED, ?HIST_TIMESTAMP => NowSec}, + #{ + ?REP => Rep, + ?REP_ID => null, + ?BASE_ID => null, + ?DB_NAME => DbName, + ?DB_UUID => DbUUID, + ?DOC_ID => DocId, + ?ERROR_COUNT => 0, + ?REP_STATS => #{}, + ?STATE => State, + ?STATE_INFO => StateInfo, + ?DOC_STATE => DocState, + ?LAST_UPDATED => NowSec, + ?LAST_START => 0, + ?LAST_ERROR => null, + ?REP_NODE => null, + ?REP_PID => null, + ?JOB_HISTORY => [AddedEvent], + ?CHECKPOINT_HISTORY => [] + }. + + +add_job(Tx, JobId, JobData) -> + couch_stats:increment_counter([couch_replicator, jobs, adds]), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs:get_job_data(JTx, ?REP_JOBS, JobId) of + {ok, #{} = OldData} -> + ok = remove_job(JTx, JobId, OldData); + {error, not_found} -> + ok + end, + ok = couch_jobs:add(JTx, ?REP_JOBS, JobId, JobData) + end). + + +remove_job(Tx, JobId) -> + couch_stats:increment_counter([couch_replicator, jobs, removes]), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs:get_job_data(JTx, ?REP_JOBS, JobId) of + {ok, #{} = JobData} -> + ok = remove_job(JTx, JobId, JobData); + {error, not_found} -> + ok + end + end). + + +get_job_data(Tx, JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:get_job_data(JTx, ?REP_JOBS, JobId) + end). + + +% UserFun = fun(JTx, JobId, JobState, JobData, UserAcc) +% +fold_jobs(Tx, UserFun, Acc) when is_function(UserFun, 5) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:fold_jobs(JTx, ?REP_JOBS, UserFun, Acc) + end). + + +pending_count(_Tx, Limit) when is_integer(Limit), Limit =< 0 -> + 0; + +pending_count(Tx, Limit) when is_integer(Limit), Limit > 0 -> + Opts = #{ + max_sched_time => erlang:system_time(second), + limit => Limit + }, + couch_jobs:pending_count(Tx, ?REP_JOBS, Opts). + + +wait_running(JobId) -> + case couch_jobs:subscribe(?REP_JOBS, JobId) of + {ok, finished, JobData} -> + {ok, JobData}; + {ok, SubId, running, #{?STATE := ?ST_PENDING}} -> + wait_running(JobId, SubId); + {ok, SubId, running, JobData} -> + ok = couch_jobs:unsubscribe(SubId), + {ok, JobData}; + {ok, SubId, pending, _} -> + wait_running(JobId, SubId); + {error, Error} -> + {error, Error} + end. + + +wait_running(JobId, SubId) -> + case couch_jobs:wait(SubId, running, infinity) of + {?REP_JOBS, _, running, #{?STATE := ?ST_PENDING}} -> + wait_running(JobId, SubId); + {?REP_JOBS, _, running, JobData} -> + ok = couch_jobs:unsubscribe(SubId), + {ok, JobData}; + {?REP_JOBS, _, finished, JobData} -> + ok = couch_jobs:unsubscribe(SubId), + {ok, JobData} + end. + + +wait_result(JobId) -> + case couch_jobs:subscribe(?REP_JOBS, JobId) of + {ok, finished, JobData} -> + {ok, JobData}; + {ok, SubId, _, _} -> + {?REP_JOBS, _, finished, JobData} = couch_jobs:wait(SubId, + finished, infinity), + {ok, JobData}; + {error, Error} -> + {error, Error} + end. + + +accept_job(MaxSchedTime) when is_integer(MaxSchedTime) -> + Opts = #{max_sched_time => MaxSchedTime}, + couch_jobs:accept(?REP_JOBS, Opts). + + +update_job_data(Tx, #{} = Job, #{} = JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:update(JTx, Job, JobData) + end). + + +finish_job(Tx, #{} = Job, #{} = JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:finish(JTx, Job, JobData) + end). + + +reschedule_job(Tx, #{} = Job, #{} = JobData, Time) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + {ok, Job1} = couch_jobs:resubmit(JTx, Job, Time), + ok = couch_jobs:finish(JTx, Job1, JobData) + end). + + +try_update_rep_id(Tx, JobId, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case get_job_id(JTx, RepId) of + {error, not_found} -> + ok = erlfdb:set(ErlFdbTx, Key, JobId); + {ok, JobId} -> + ok; + {ok, OtherJobId} when is_binary(OtherJobId) -> + {error, {replication_job_conflict, OtherJobId}} + end + end). + + +update_rep_id(Tx, JobId, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + ok = erlfdb:set(ErlFdbTx, Key, JobId) + end). + + +clear_old_rep_id(_, _, null) -> + ok; + +clear_old_rep_id(Tx, JobId, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case get_job_id(JTx, RepId) of + {error, not_found} -> + ok; + {ok, JobId} -> + ok = erlfdb:clear(ErlFdbTx, Key); + {ok, OtherJobId} when is_binary(OtherJobId) -> + ok + end + end). + + +get_job_id(Tx, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case erlfdb:wait(erlfdb:get(ErlFdbTx, Key)) of + not_found -> + {error, not_found}; + <<_/binary>> = JobId -> + {ok, JobId} + end + end). + + +% Debug functions + +remove_jobs(Tx, JobIds) when is_list(JobIds) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + lists:foreach(fun(JobId) -> remove_job(JTx, JobId) end, JobIds) + end), + []. + + +get_job_ids(Tx) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Prefix = erlfdb_tuple:pack({?REPLICATION_IDS}, LayerPrefix), + KVs = erlfdb:wait(erlfdb:get_range_startswith(ErlFdbTx, Prefix)), + lists:map(fun({K, JobId}) -> + {RepId} = erlfdb_tuple:unpack(K, Prefix), + {RepId, JobId} + end, KVs) + end). + + +% Private functions + +remove_job(#{jtx := true} = JTx, JobId, OldJobData) -> + #{tx := Tx, layer_prefix := LayerPrefix} = JTx, + case OldJobData of + #{?REP_ID := null} -> + couch_jobs:remove(JTx, ?REP_JOBS, JobId); + #{?REP_ID := RepId} when is_binary(RepId) -> + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> ok; + JobId -> erlfdb:clear(Tx, Key); + <<_/binary>> -> ok + end, + couch_jobs:remove(JTx, ?REP_JOBS, JobId) + end. diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index 2eba4d5eb..ebbb7c7c5 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -26,6 +26,7 @@ -define(DELETED_DBS, 3). -define(DBS, 15). -define(EXPIRING_CACHE, 53). +-define(REPLICATION_IDS, 54). -define(TX_IDS, 255). % Cluster Level: (LayerPrefix, ?CLUSTER_CONFIG, X, ...) -- cgit v1.2.1 From 5b98e8a6c169449d1a3e362e52e86822ef350ed5 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:34:21 -0400 Subject: Update frontend replicator modules The frontend is the part responsible for parsing replication parameters and creating replication jobs. Most of that happens in the `couch_replicator` module. - `couch_replicator:replicate/2` is the main API for creating transient replication jobs. - Replication jobs from `_replicator` documents are updated from `couch_replicator:after_*` callbacks. `after_db_create/2` besides being called on db creation also gets called when a database is undeleted and `add_jobs_from_db/1` function will attempt to parse them all. `couch_replicator` exports monitoring functions `docs/2,3 and jobs/0,1`. Those get called from HTTP handlers for `_scheduler/docs` and `_scheduler/jobs` respectively. For hands-on remsh access there some debuging functions such as: - rescan_jobs/0,1 : Simulates a db being re-created so all the jobs are added - reenqueue_jobs/0,1 : Deletes all the jobs from a db then re-adds them - remove_jobs/0 : Removes all the replication jobs - get_job_ids/0 : Read the RepId -> JobId mapping area --- src/couch_replicator/src/couch_replicator.erl | 716 +++++++++++++-------- src/couch_replicator/src/couch_replicator_epi.erl | 58 ++ .../src/couch_replicator_fabric2_plugin.erl | 36 ++ 3 files changed, 530 insertions(+), 280 deletions(-) create mode 100644 src/couch_replicator/src/couch_replicator_epi.erl create mode 100644 src/couch_replicator/src/couch_replicator_fabric2_plugin.erl diff --git a/src/couch_replicator/src/couch_replicator.erl b/src/couch_replicator/src/couch_replicator.erl index b38f31b59..f34ac7d7f 100644 --- a/src/couch_replicator/src/couch_replicator.erl +++ b/src/couch_replicator/src/couch_replicator.erl @@ -14,279 +14,484 @@ -export([ replicate/2, - replication_states/0, + + jobs/0, job/1, - doc/3, - active_doc/2, - info_from_doc/2, - restart_job/1 + docs/2, + doc/2, + + after_db_create/2, + after_db_delete/2, + after_doc_write/6, + + ensure_rep_db_exists/0, + + rescan_jobs/0, + rescan_jobs/1, + reenqueue_jobs/0, + reenqueue_jobs/1, + remove_jobs/0, + get_job_ids/0 ]). + -include_lib("couch/include/couch_db.hrl"). -include("couch_replicator.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). --include_lib("mem3/include/mem3.hrl"). - --define(DESIGN_DOC_CREATION_DELAY_MSEC, 1000). --define(REPLICATION_STATES, [ - initializing, % Just added to scheduler - error, % Could not be turned into a replication job - running, % Scheduled and running - pending, % Scheduled and waiting to run - crashing, % Scheduled but crashing, backed off by the scheduler - completed, % Non-continuous (normal) completed replication - failed % Terminal failure, will not be retried anymore -]). - --import(couch_util, [ - get_value/2, - get_value/3 -]). -spec replicate({[_]}, any()) -> {ok, {continuous, binary()}} | - {ok, {[_]}} | + {ok, #{}} | {ok, {cancelled, binary()}} | {error, any()} | no_return(). -replicate(PostBody, Ctx) -> - {ok, Rep0} = couch_replicator_utils:parse_rep_doc(PostBody, Ctx), - Rep = Rep0#rep{start_time = os:timestamp()}, - #rep{id = RepId, options = Options, user_ctx = UserCtx} = Rep, - case get_value(cancel, Options, false) of - true -> - CancelRepId = case get_value(id, Options, nil) of - nil -> - RepId; - RepId2 -> - RepId2 - end, - case check_authorization(CancelRepId, UserCtx) of - ok -> - cancel_replication(CancelRepId); - not_found -> - {error, not_found} - end; - false -> - check_authorization(RepId, UserCtx), - {ok, Listener} = rep_result_listener(RepId), - Result = do_replication_loop(Rep), - couch_replicator_notifier:stop(Listener), - Result +replicate(Body, #user_ctx{name = User} = UserCtx) -> + {ok, Id, Rep} = couch_replicator_parse:parse_transient_rep(Body, User), + #{?OPTIONS := Options} = Rep, + JobId = case couch_replicator_jobs:get_job_id(undefined, Id) of + {ok, JobId0} -> JobId0; + {error, not_found} -> Id + end, + case maps:get(<<"cancel">>, Options, false) of + true -> + case check_authorization(JobId, UserCtx) of + ok -> cancel_replication(JobId); + not_found -> {error, not_found} + end; + false -> + check_authorization(JobId, UserCtx), + ok = start_transient_job(JobId, Rep), + case maps:get(<<"continuous">>, Options, false) of + true -> + case couch_replicator_jobs:wait_running(JobId) of + {ok, #{?STATE := ?ST_RUNNING} = JobData} -> + {ok, {continuous, maps:get(?REP_ID, JobData)}}; + {ok, #{?STATE := ?ST_FAILED} = JobData} -> + {error, maps:get(?STATE_INFO, JobData)}; + {error, Error} -> + {error, Error} + end; + false -> + case couch_replicator_jobs:wait_result(JobId) of + {ok, #{?STATE := ?ST_COMPLETED} = JobData} -> + {ok, maps:get(?CHECKPOINT_HISTORY, JobData)}; + {ok, #{?STATE := ?ST_FAILED} = JobData} -> + {error, maps:get(?STATE_INFO, JobData)}; + {error, Error} -> + {error, Error} + end + end end. --spec do_replication_loop(#rep{}) -> - {ok, {continuous, binary()}} | {ok, tuple()} | {error, any()}. -do_replication_loop(#rep{id = {BaseId, Ext} = Id, options = Options} = Rep) -> - ok = couch_replicator_scheduler:add_job(Rep), - case get_value(continuous, Options, false) of - true -> - {ok, {continuous, ?l2b(BaseId ++ Ext)}}; - false -> - wait_for_result(Id) +jobs() -> + FoldFun = fun(_JTx, _JobId, CouchJobsState, JobData, Acc) -> + case CouchJobsState of + pending -> [job_ejson(JobData) | Acc]; + running -> [job_ejson(JobData) | Acc]; + finished -> Acc + end + end, + couch_replicator_jobs:fold_jobs(undefined, FoldFun, []). + + +job(Id0) when is_binary(Id0) -> + Id1 = couch_replicator_ids:convert(Id0), + JobId = case couch_replicator_jobs:get_job_id(undefined, Id1) of + {ok, JobId0} -> JobId0; + {error, not_found} -> Id1 + end, + case couch_replicator_jobs:get_job_data(undefined, JobId) of + {ok, #{} = JobData} -> {ok, job_ejson(JobData)}; + {error, not_found} -> {error, not_found} end. --spec rep_result_listener(rep_id()) -> {ok, pid()}. -rep_result_listener(RepId) -> - ReplyTo = self(), - {ok, _Listener} = couch_replicator_notifier:start_link( - fun({_, RepId2, _} = Ev) when RepId2 =:= RepId -> - ReplyTo ! Ev; - (_) -> - ok - end). +docs(#{} = Db, States) when is_list(States) -> + DbName = fabric2_db:name(Db), + FoldFun = fun(_JTx, _JobId, _, JobData, Acc) -> + case JobData of + #{?DB_NAME := DbName, ?STATE := State} -> + case {States, lists:member(State, States)} of + {[], _} -> [doc_ejson(JobData) | Acc]; + {[_ | _], true} -> [doc_ejson(JobData) | Acc]; + {[_ | _], false} -> Acc + end; + #{} -> + Acc + end + end, + couch_replicator_jobs:fold_jobs(undefined, FoldFun, []). --spec wait_for_result(rep_id()) -> - {ok, {[_]}} | {error, any()}. -wait_for_result(RepId) -> - receive - {finished, RepId, RepResult} -> - {ok, RepResult}; - {error, RepId, Reason} -> - {error, Reason} +doc(#{} = Db, DocId) when is_binary(DocId) -> + DbUUID = fabric2_db:get_uuid(Db), + JobId = couch_replicator_ids:job_id(DbUUID, DocId), + case couch_replicator_jobs:get_job_data(undefined, JobId) of + {ok, #{} = JobData} -> {ok, doc_ejson(JobData)}; + {error, not_found} -> {error, not_found} end. --spec cancel_replication(rep_id()) -> - {ok, {cancelled, binary()}} | {error, not_found}. -cancel_replication({BasedId, Extension} = RepId) -> - FullRepId = BasedId ++ Extension, - couch_log:notice("Canceling replication '~s' ...", [FullRepId]), - case couch_replicator_scheduler:rep_state(RepId) of - #rep{} -> - ok = couch_replicator_scheduler:remove_job(RepId), - couch_log:notice("Replication '~s' cancelled", [FullRepId]), - {ok, {cancelled, ?l2b(FullRepId)}}; - nil -> - couch_log:notice("Replication '~s' not found", [FullRepId]), - {error, not_found} - end. +after_db_create(DbName, DbUUID) when ?IS_REP_DB(DbName)-> + couch_stats:increment_counter([couch_replicator, docs, dbs_created]), + try fabric2_db:open(DbName, [{uuid, DbUUID}, ?ADMIN_CTX]) of + {ok, Db} -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + ok = add_jobs_from_db(TxDb) + end) + catch + error:database_does_not_exist -> + ok + end; + +after_db_create(_DbName, _DbUUID) -> + ok. + + +after_db_delete(DbName, DbUUID) when ?IS_REP_DB(DbName) -> + couch_stats:increment_counter([couch_replicator, docs, dbs_deleted]), + FoldFun = fun(JTx, JobId, _, JobData, ok) -> + case JobData of + #{?DB_UUID := DbUUID} -> + ok = couch_replicator_jobs:remove_job(JTx, JobId); + #{} -> + ok + end + end, + couch_replicator_jobs:fold_jobs(undefined, FoldFun, ok); + +after_db_delete(_DbName, _DbUUID) -> + ok. + + +after_doc_write(#{name := DbName} = Db, #doc{} = Doc, _NewWinner, _OldWinner, + _NewRevId, _Seq) when ?IS_REP_DB(DbName) -> + couch_stats:increment_counter([couch_replicator, docs, db_changes]), + {Props} = Doc#doc.body, + case couch_util:get_value(?REPLICATION_STATE, Props) of + ?ST_COMPLETED -> ok; + ?ST_FAILED -> ok; + _ -> process_change(Db, Doc) + end; + +after_doc_write(_Db, _Doc, _NewWinner, _OldWinner, _NewRevId, _Seq) -> + ok. + + +% This is called from supervisor, must return ignore. +-spec ensure_rep_db_exists() -> ignore. +ensure_rep_db_exists() -> + couch_replicator_jobs:set_timeout(), + case config:get_boolean("replicator", "create_replicator_db", false) of + true -> + UserCtx = #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}, + Opts = [{user_ctx, UserCtx}, sys_db], + case fabric2_db:create(?REP_DB_NAME, Opts) of + {error, file_exists} -> ok; + {ok, _Db} -> ok + end; + false -> + ok + end, + ignore. --spec replication_states() -> [atom()]. -replication_states() -> - ?REPLICATION_STATES. +% Testing and debug functions +rescan_jobs() -> + rescan_jobs(?REP_DB_NAME). --spec strip_url_creds(binary() | {[_]}) -> binary(). -strip_url_creds(Endpoint) -> - try - couch_replicator_docs:parse_rep_db(Endpoint, [], []) of - #httpdb{url = Url} -> - iolist_to_binary(couch_util:url_strip_password(Url)) + +rescan_jobs(DbName) when is_binary(DbName), ?IS_REP_DB(DbName) -> + try fabric2_db:open(DbName, [?ADMIN_CTX]) of + {ok, Db} -> + after_db_create(DbName, fabric2_db:get_uuid(Db)) catch - throw:{error, local_endpoints_not_supported} -> - Endpoint + error:database_does_not_exist -> + database_does_not_exist end. --spec job(binary()) -> {ok, {[_]}} | {error, not_found}. -job(JobId0) when is_binary(JobId0) -> - JobId = couch_replicator_ids:convert(JobId0), - {Res, _Bad} = rpc:multicall(couch_replicator_scheduler, job, [JobId]), - case [JobInfo || {ok, JobInfo} <- Res] of - [JobInfo| _] -> - {ok, JobInfo}; - [] -> - {error, not_found} - end. +reenqueue_jobs() -> + reenqueue_jobs(?REP_DB_NAME). --spec restart_job(binary() | list() | rep_id()) -> - {ok, {[_]}} | {error, not_found}. -restart_job(JobId0) -> - JobId = couch_replicator_ids:convert(JobId0), - {Res, _} = rpc:multicall(couch_replicator_scheduler, restart_job, [JobId]), - case [JobInfo || {ok, JobInfo} <- Res] of - [JobInfo| _] -> - {ok, JobInfo}; - [] -> - {error, not_found} +reenqueue_jobs(DbName) when is_binary(DbName), ?IS_REP_DB(DbName) -> + try fabric2_db:open(DbName, [?ADMIN_CTX]) of + {ok, Db} -> + DbUUID = fabric2_db:get_uuid(Db), + ok = after_db_delete(DbName, DbUUID), + ok = after_db_create(DbName, DbUUID) + catch + error:database_does_not_exist -> + database_does_not_exist end. --spec active_doc(binary(), binary()) -> {ok, {[_]}} | {error, not_found}. -active_doc(DbName, DocId) -> - try - Shards = mem3:shards(DbName), - Live = [node() | nodes()], - Nodes = lists:usort([N || #shard{node=N} <- Shards, - lists:member(N, Live)]), - Owner = mem3:owner(DbName, DocId, Nodes), - case active_doc_rpc(DbName, DocId, [Owner]) of - {ok, DocInfo} -> - {ok, DocInfo}; +remove_jobs() -> + % If we clear a large number of jobs make sure to use batching so we don't + % take too long, if use individual transactions, and also don't timeout if + % use a single transaction + FoldFun = fun + (_, JobId, _, _, Acc) when length(Acc) > 250 -> + couch_replicator_jobs:remove_jobs(undefined, [JobId | Acc]); + (_, JobId, _, _, Acc) -> + [JobId | Acc] + end, + Acc = couch_replicator_jobs:fold_jobs(undefined, FoldFun, []), + [] = couch_replicator_jobs:remove_jobs(undefined, Acc), + ok. + + +get_job_ids() -> + couch_replicator_jobs:get_job_ids(undefined). + + +% Private functions + +-spec start_transient_job(binary(), #{}) -> ok. +start_transient_job(JobId, #{} = Rep) -> + JobData = couch_replicator_jobs:new_job(Rep, null, null, null, + ?ST_PENDING, null, null), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + case couch_replicator_jobs:get_job_data(JTx, JobId) of + {ok, #{?REP := OldRep, ?STATE := State}} -> + SameRep = couch_replicator_utils:compare_reps(Rep, OldRep), + Active = State =:= ?ST_PENDING orelse State =:= ?ST_RUNNING, + case SameRep andalso Active of + true -> + % If a job with the same paremeters is running we don't + % stop and just ignore the request. This is mainly for + % compatibility where users are able to idempotently + % POST the same job without it being stopped and + % restarted. + ok; + false -> + couch_replicator_jobs:add_job(JTx, JobId, JobData) + end; {error, not_found} -> - active_doc_rpc(DbName, DocId, Nodes -- [Owner]) + ok = couch_replicator_jobs:add_job(JTx, JobId, JobData) end - catch - % Might be a local database - error:database_does_not_exist -> - active_doc_rpc(DbName, DocId, [node()]) - end. + end). --spec active_doc_rpc(binary(), binary(), [node()]) -> - {ok, {[_]}} | {error, not_found}. -active_doc_rpc(_DbName, _DocId, []) -> - {error, not_found}; -active_doc_rpc(DbName, DocId, [Node]) when Node =:= node() -> - couch_replicator_doc_processor:doc(DbName, DocId); -active_doc_rpc(DbName, DocId, Nodes) -> - {Res, _Bad} = rpc:multicall(Nodes, couch_replicator_doc_processor, doc, - [DbName, DocId]), - case [DocInfo || {ok, DocInfo} <- Res] of - [DocInfo | _] -> - {ok, DocInfo}; - [] -> - {error, not_found} - end. +-spec cancel_replication(job_id()) -> + {ok, {cancelled, binary()}} | {error, not_found}. +cancel_replication(JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + Id = case couch_replicator_jobs:get_job_data(JTx, JobId) of + {ok, #{?REP_ID := RepId}} when is_binary(RepId) -> + RepId; + _ -> + JobId + end, + couch_log:notice("Canceling replication '~s'", [Id]), + case couch_replicator_jobs:remove_job(JTx, JobId) of + {error, not_found} -> + {error, not_found}; + ok -> + {ok, {cancelled, Id}} + end + end). --spec doc(binary(), binary(), any()) -> {ok, {[_]}} | {error, not_found}. -doc(RepDb, DocId, UserCtx) -> - case active_doc(RepDb, DocId) of - {ok, DocInfo} -> - {ok, DocInfo}; - {error, not_found} -> - doc_from_db(RepDb, DocId, UserCtx) - end. +process_change(_Db, #doc{id = <>}) -> + ok; +process_change(#{} = Db, #doc{deleted = true} = Doc) -> + DbUUID = fabric2_db:get_uuid(Db), + JobId = couch_replicator_ids:job_id(DbUUID, Doc#doc.id), + couch_replicator_jobs:remove_job(undefined, JobId); --spec doc_from_db(binary(), binary(), any()) -> {ok, {[_]}} | {error, not_found}. -doc_from_db(RepDb, DocId, UserCtx) -> - case fabric:open_doc(RepDb, DocId, [UserCtx, ejson_body]) of - {ok, Doc} -> - {ok, info_from_doc(RepDb, couch_doc:to_json_obj(Doc, []))}; - {not_found, _Reason} -> - {error, not_found} - end. +process_change(#{} = Db, #doc{deleted = false} = Doc) -> + #doc{id = DocId, body = {Props} = Body} = Doc, + DbName = fabric2_db:name(Db), + DbUUID = fabric2_db:get_uuid(Db), + {Rep, DocState, Error} = try + Rep0 = couch_replicator_parse:parse_rep_doc(Body), + DocState0 = couch_util:get_value(?REPLICATION_STATE, Props, null), + {Rep0, DocState0, null} + catch + throw:{bad_rep_doc, Reason} -> + {null, null, couch_replicator_utils:rep_error_to_binary(Reason)} + end, + JobId = couch_replicator_ids:job_id(DbUUID, DocId), + JobData = case Rep of + null -> + couch_relicator_jobs:new_job(Rep, DbName, DbUUID, DocId, + ?ST_FAILED, Error, null); + #{} -> + couch_replicator_jobs:new_job(Rep, DbName, DbUUID, DocId, + ?ST_PENDING, null, DocState) + end, + LogMsg = "~p : replication doc update db:~s doc:~s job_id:~s doc_state:~s", + couch_log:notice(LogMsg, [?MODULE, DbName, DocId, JobId, DocState]), + + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Db), fun(JTx) -> + case couch_replicator_jobs:get_job_data(JTx, JobId) of + {ok, #{?REP := null, ?STATE_INFO := Error}} when Rep =:= null -> + % Same error as before occurred, don't bother updating the job + ok; + {ok, #{?REP := null}} when Rep =:= null -> + % New error so the job is updated + couch_replicator_jobs:add_job(JTx, JobId, JobData); + {ok, #{?REP := OldRep, ?STATE := State}} when is_map(Rep) -> + SameRep = couch_replicator_utils:compare_reps(Rep, OldRep), + Active = State =:= ?ST_PENDING orelse State =:= ?ST_RUNNING, + case SameRep andalso Active of + true -> + % Document was changed but none of the parameters + % relevent for the replication job have changed, so + % make it a no-op + ok; + false -> + couch_replicator_jobs:add_job(JTx, JobId, JobData) + end; + {error, not_found} -> + couch_replicator_jobs:add_job(JTx, JobId, JobData) + end --spec info_from_doc(binary(), {[_]}) -> {[_]}. -info_from_doc(RepDb, {Props}) -> - DocId = get_value(<<"_id">>, Props), - Source = get_value(<<"source">>, Props), - Target = get_value(<<"target">>, Props), - State0 = state_atom(get_value(<<"_replication_state">>, Props, null)), - StateTime = get_value(<<"_replication_state_time">>, Props, null), - {State1, StateInfo, ErrorCount, StartTime} = case State0 of - completed -> - {InfoP} = get_value(<<"_replication_stats">>, Props, {[]}), - case lists:keytake(<<"start_time">>, 1, InfoP) of - {value, {_, Time}, InfoP1} -> - {State0, {InfoP1}, 0, Time}; - false -> - case lists:keytake(start_time, 1, InfoP) of - {value, {_, Time}, InfoP1} -> - {State0, {InfoP1}, 0, Time}; - false -> - {State0, {InfoP}, 0, null} - end - end; - failed -> - Info = get_value(<<"_replication_state_reason">>, Props, nil), - EJsonInfo = couch_replicator_utils:ejson_state_info(Info), - {State0, EJsonInfo, 1, StateTime}; - _OtherState -> - {null, null, 0, null} + end). + + +-spec add_jobs_from_db(#{}) -> ok. +add_jobs_from_db(#{} = TxDb) -> + FoldFun = fun + ({meta, _Meta}, ok) -> + {ok, ok}; + (complete, ok) -> + {ok, ok}; + ({row, Row}, ok) -> + Db = TxDb#{tx := undefined}, + ok = process_change(Db, get_doc(TxDb, Row)), + {ok, ok} end, - {[ - {doc_id, DocId}, - {database, RepDb}, - {id, null}, - {source, strip_url_creds(Source)}, - {target, strip_url_creds(Target)}, - {state, State1}, - {error_count, ErrorCount}, - {info, StateInfo}, - {start_time, StartTime}, - {last_updated, StateTime} - ]}. - - -state_atom(<<"triggered">>) -> - triggered; % This handles a legacy case were document wasn't converted yet -state_atom(State) when is_binary(State) -> - erlang:binary_to_existing_atom(State, utf8); -state_atom(State) when is_atom(State) -> - State. + Opts = [{restart_tx, true}], + {ok, ok} = fabric2_db:fold_docs(TxDb, FoldFun, ok, Opts), + ok. + + +-spec get_doc(#{}, list()) -> #doc{}. +get_doc(TxDb, Row) -> + {_, DocId} = lists:keyfind(id, 1, Row), + {ok, #doc{deleted = false} = Doc} = fabric2_db:open_doc(TxDb, DocId, []), + Doc. + + +doc_ejson(#{} = JobData) -> + #{ + ?REP := Rep, + ?REP_ID := RepId, + ?DB_NAME := DbName, + ?DOC_ID := DocId, + ?STATE := State, + ?STATE_INFO := Info0, + ?ERROR_COUNT := ErrorCount, + ?LAST_UPDATED := LastUpdatedSec, + ?REP_NODE := Node, + ?REP_PID := Pid, + ?REP_STATS := Stats + } = JobData, + + #{ + ?SOURCE := #{<<"url">> := Source, <<"proxy_url">> := SourceProxy}, + ?TARGET := #{<<"url">> := Target, <<"proxy_url">> := TargetProxy}, + ?START_TIME := StartSec + } = Rep, + + LastUpdatedISO8601 = couch_replicator_utils:iso8601(LastUpdatedSec), + StartISO8601 = couch_replicator_utils:iso8601(StartSec), + + Info = case State of + ?ST_RUNNING -> Stats; + ?ST_PENDING -> Stats; + _Other -> Info0 + end, + + #{ + <<"id">> => RepId, + <<"database">> => DbName, + <<"doc_id">> => DocId, + <<"source">> => ejson_url(Source), + <<"target">> => ejson_url(Target), + <<"source_proxy">> => ejson_url(SourceProxy), + <<"target_proxy">> => ejson_url(TargetProxy), + <<"state">> => State, + <<"info">> => Info, + <<"error_count">> => ErrorCount, + <<"last_updated">> => LastUpdatedISO8601, + <<"start_time">> => StartISO8601, + <<"node">> => Node, + <<"pid">> => Pid + }. + + +job_ejson(#{} = JobData) -> + #{ + ?REP := Rep, + ?REP_ID := RepId, + ?DB_NAME := DbName, + ?DOC_ID := DocId, + ?STATE := State, + ?STATE_INFO := Info0, + ?JOB_HISTORY := History, + ?REP_STATS := Stats + } = JobData, + + #{ + ?SOURCE := #{<<"url">> := Source}, + ?TARGET := #{<<"url">> := Target}, + ?REP_USER := User, + ?START_TIME := StartSec + } = Rep, + + StartISO8601 = couch_replicator_utils:iso8601(StartSec), + + History1 = lists:map(fun(#{?HIST_TIMESTAMP := Ts} = Evt) -> + Evt#{?HIST_TIMESTAMP := couch_replicator_utils:iso8601(Ts)} + end, History), + + Info = case State of + ?ST_RUNNING -> Stats; + ?ST_PENDING -> Stats; + _Other -> Info0 + end, + + #{ + <<"id">> => RepId, + <<"database">> => DbName, + <<"doc_id">> => DocId, + <<"source">> => ejson_url(Source), + <<"target">> => ejson_url(Target), + <<"state">> => State, + <<"info">> => Info, + <<"user">> => User, + <<"history">> => History1, + <<"start_time">> => StartISO8601 + }. + + +ejson_url(Url) when is_binary(Url) -> + list_to_binary(couch_util:url_strip_password(Url)); + +ejson_url(null) -> + null. -spec check_authorization(rep_id(), #user_ctx{}) -> ok | not_found. -check_authorization(RepId, #user_ctx{name = Name} = Ctx) -> - case couch_replicator_scheduler:rep_state(RepId) of - #rep{user_ctx = #user_ctx{name = Name}} -> - ok; - #rep{} -> - couch_httpd:verify_is_server_admin(Ctx); - nil -> - not_found +check_authorization(JobId, #user_ctx{} = Ctx) when is_binary(JobId) -> + #user_ctx{name = Name} = Ctx, + case couch_replicator_jobs:get_job_data(undefined, JobId) of + {error, not_found} -> + not_found; + {ok, #{?DB_NAME := DbName}} when is_binary(DbName) -> + throw({unauthorized, <<"Persistent replication collision">>}); + {ok, #{?REP := #{?REP_USER := Name}}} -> + ok; + {ok, #{}} -> + couch_httpd:verify_is_server_admin(Ctx) end. @@ -309,16 +514,16 @@ authorization_test_() -> t_admin_is_always_authorized() -> ?_test(begin - expect_rep_user_ctx(<<"someuser">>, <<"_admin">>), + expect_job_data({ok, #{?REP => #{?REP_USER => <<"someuser">>}}}), UserCtx = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx)) end). t_username_must_match() -> - ?_test(begin - expect_rep_user_ctx(<<"user">>, <<"somerole">>), - UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, + ?_test(begin + expect_job_data({ok, #{?REP => #{?REP_USER => <<"user1">>}}}), + UserCtx1 = #user_ctx{name = <<"user1">>, roles = [<<"somerole">>]}, ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx1)), UserCtx2 = #user_ctx{name = <<"other">>, roles = [<<"somerole">>]}, ?assertThrow({unauthorized, _}, check_authorization(<<"RepId">>, @@ -327,8 +532,8 @@ t_username_must_match() -> t_replication_not_found() -> - ?_test(begin - meck:expect(couch_replicator_scheduler, rep_state, 1, nil), + ?_test(begin + expect_job_data({error, not_found}), UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx1)), UserCtx2 = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, @@ -336,57 +541,8 @@ t_replication_not_found() -> end). -expect_rep_user_ctx(Name, Role) -> - meck:expect(couch_replicator_scheduler, rep_state, - fun(_Id) -> - UserCtx = #user_ctx{name = Name, roles = [Role]}, - #rep{user_ctx = UserCtx} - end). - +expect_job_data(JobDataRes) -> + meck:expect(couch_replicator_jobs, get_job_data, 2, JobDataRes). -strip_url_creds_test_() -> - { - setup, - fun() -> - meck:expect(config, get, fun(_, _, Default) -> Default end) - end, - fun(_) -> - meck:unload() - end, - [ - t_strip_http_basic_creds(), - t_strip_http_props_creds(), - t_strip_local_db_creds() - ] - }. - - -t_strip_local_db_creds() -> - ?_test(?assertEqual(<<"localdb">>, strip_url_creds(<<"localdb">>))). - - -t_strip_http_basic_creds() -> - ?_test(begin - Url1 = <<"http://adm:pass@host/db">>, - ?assertEqual(<<"http://adm:*****@host/db/">>, strip_url_creds(Url1)), - Url2 = <<"https://adm:pass@host/db">>, - ?assertEqual(<<"https://adm:*****@host/db/">>, strip_url_creds(Url2)), - Url3 = <<"http://adm:pass@host:80/db">>, - ?assertEqual(<<"http://adm:*****@host:80/db/">>, strip_url_creds(Url3)), - Url4 = <<"http://adm:pass@host/db?a=b&c=d">>, - ?assertEqual(<<"http://adm:*****@host/db?a=b&c=d">>, - strip_url_creds(Url4)) - end). - - -t_strip_http_props_creds() -> - ?_test(begin - Props1 = {[{<<"url">>, <<"http://adm:pass@host/db">>}]}, - ?assertEqual(<<"http://adm:*****@host/db/">>, strip_url_creds(Props1)), - Props2 = {[ {<<"url">>, <<"http://host/db">>}, - {<<"headers">>, {[{<<"Authorization">>, <<"Basic pa55">>}]}} - ]}, - ?assertEqual(<<"http://host/db/">>, strip_url_creds(Props2)) - end). -endif. diff --git a/src/couch_replicator/src/couch_replicator_epi.erl b/src/couch_replicator/src/couch_replicator_epi.erl new file mode 100644 index 000000000..9fb1790b5 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_epi.erl @@ -0,0 +1,58 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_replicator_epi). + + +-behaviour(couch_epi_plugin). + + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + + +app() -> + couch_replicator. + + +providers() -> + [ + {fabric2_db, couch_replicator_fabric2_plugin} + ]. + + +services() -> + []. + + +data_subscriptions() -> + []. + + +data_providers() -> + []. + + +processes() -> + []. + + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/couch_replicator/src/couch_replicator_fabric2_plugin.erl b/src/couch_replicator/src/couch_replicator_fabric2_plugin.erl new file mode 100644 index 000000000..7bf614512 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_fabric2_plugin.erl @@ -0,0 +1,36 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_replicator_fabric2_plugin). + + +-export([ + after_db_create/2, + after_db_delete/2, + after_doc_write/6 +]). + + +after_db_create(DbName, DbUUID) -> + couch_replicator:after_db_create(DbName, DbUUID), + [DbName, DbUUID]. + + +after_db_delete(DbName, DbUUID) -> + couch_replicator:after_db_delete(DbName, DbUUID), + [DbName, DbUUID]. + + +after_doc_write(Db, Doc, Winner, OldWinner, RevId, Seq)-> + couch_replicator:after_doc_write(Db, Doc, Winner, OldWinner, RevId, Seq), + [Db, Doc, Winner, OldWinner, RevId, Seq]. -- cgit v1.2.1 From 7dd18993f25b107db05a9e99e01770c7f84bd33b Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:35:07 -0400 Subject: Update backend replicator modules Backend replicator modules execute replication jobs. The two main modules reponsible for job management and execution are `couch_replicator_job` and `couch_replicator_job_server`. `couch_replicator_job` - Is the main process of each replication job. When this process starts, it waits in the `couch_jobs:accept/2` call. This may take an indefinite amount of time. The spawned `couch_replicator_job` waiting in accept like that is called internally an "acceptor". The main pattern of execution is multiple acceptors are started, and after some of them accept jobs, they become "workers". - After it accepts a job, it parses the `couch_jobs` job data, which contains the `Rep` object and calculates the replication ID from it. Replication ID calculation may involve making a call to the source endpoint in order to fetch the contents of the javascript filter. Then, the `Rep` object and the replication ID is used to construct the internal `#rep_state{}` state record of the `gen_server`. - Multiple replication jobs may end up trying to run the same replication (with the same replication ID) concurrently. To manage these types of colisions, `check_ownership/3` function is called to determine if the current replication is the correct `owner` of that replication. If it is not, then the job maybe fail and exit. - There is a periodic checkpoint timer which sends a `checkpoint` message. The checkpoint frequency is calculated as the minimum of the `couch_jobs` activity timeout and the configured checkpoint interval. During each checkpoint attempt, there is a call to `couch_jobs:update/3` which updates the job's data with latest state and ensure the job doesn't get re-enqueued due to inactivity. - If the job completes, then `couch_jobs:finish/3` is called and the replication process exits `normal`. If the job crashes, there is a consecutive error count field (`?ERROR_COUNT`) which, is used to calculate the backoff penalty. There is an exponential backoff schedule, that starts with the base value, then doubles, but only up to a maximum value. Both the base and the maximum values are configurable with the `min_backoff_penalty_sec` and `max_backoff_penalty_sec` settings respecively. This is an improvement from before where the users could only influence the maximum backoff penalty by reducing the number of failed events kept by each job. `couch_replicator_server` - This is a module which spawns and keeps track of `couch_replicator_job` processes. - Periodically, every `interval_sec` seconds, it runs the `reschedule` function which checks for pending jobs. If they are some, it will start acceptors to run them. If those acceptors become workers, and if the total number of workers goes above the `max_jobs` setting, the oldest `continuous` workers will be stopped until the total number of jobs falls below `max_jobs` value. - In addition to `max_jobs` limit, there is a `max_churn` limit which determines up to how many job starts to allow during each scheduling interval. As jobs are started, they reduce the available churn "budget" for that cycle and after it goes below 0 no more jobs can start until the next cycle. - This module also performs transient job cleanup. After transient jobs stop running previously they simply vanished but with this update they maybe linger for at least `transient_job_max_age_sec` seconds. --- src/couch_replicator/priv/stats_descriptions.cfg | 44 +- .../src/couch_replicator_httpc.erl | 2 +- src/couch_replicator/src/couch_replicator_job.erl | 1612 ++++++++++++++++++++ .../src/couch_replicator_job_server.erl | 370 +++++ 4 files changed, 1999 insertions(+), 29 deletions(-) create mode 100644 src/couch_replicator/src/couch_replicator_job.erl create mode 100644 src/couch_replicator/src/couch_replicator_job_server.erl diff --git a/src/couch_replicator/priv/stats_descriptions.cfg b/src/couch_replicator/priv/stats_descriptions.cfg index d9efb91dc..1bb151c1c 100644 --- a/src/couch_replicator/priv/stats_descriptions.cfg +++ b/src/couch_replicator/priv/stats_descriptions.cfg @@ -54,14 +54,6 @@ {type, counter}, {desc, <<"number of replicator workers started">>} ]}. -{[couch_replicator, cluster_is_stable], [ - {type, gauge}, - {desc, <<"1 if cluster is stable, 0 if unstable">>} -]}. -{[couch_replicator, db_scans], [ - {type, counter}, - {desc, <<"number of times replicator db scans have been started">>} -]}. {[couch_replicator, docs, dbs_created], [ {type, counter}, {desc, <<"number of db shard creations seen by replicator doc processor">>} @@ -70,10 +62,6 @@ {type, counter}, {desc, <<"number of db shard deletions seen by replicator doc processor">>} ]}. -{[couch_replicator, docs, dbs_found], [ - {type, counter}, - {desc, <<"number of db shard found by replicator doc processor">>} -]}. {[couch_replicator, docs, db_changes], [ {type, counter}, {desc, <<"number of db changes processed by replicator doc processor">>} @@ -88,43 +76,43 @@ ]}. {[couch_replicator, jobs, adds], [ {type, counter}, - {desc, <<"number of jobs added to replicator scheduler">>} + {desc, <<"number of jobs added to replicator">>} ]}. -{[couch_replicator, jobs, duplicate_adds], [ +{[couch_replicator, jobs, removes], [ {type, counter}, - {desc, <<"number of duplicate jobs added to replicator scheduler">>} + {desc, <<"number of jobs removed from replicator">>} ]}. -{[couch_replicator, jobs, removes], [ +{[couch_replicator, jobs, accepts], [ {type, counter}, - {desc, <<"number of jobs removed from replicator scheduler">>} + {desc, <<"number of job acceptors started">>} ]}. {[couch_replicator, jobs, starts], [ {type, counter}, - {desc, <<"number of jobs started by replicator scheduler">>} + {desc, <<"number of jobs started by replicator">>} ]}. {[couch_replicator, jobs, stops], [ {type, counter}, - {desc, <<"number of jobs stopped by replicator scheduler">>} + {desc, <<"number of jobs stopped by replicator">>} ]}. {[couch_replicator, jobs, crashes], [ {type, counter}, - {desc, <<"number of job crashed noticed by replicator scheduler">>} + {desc, <<"number of job crashed noticed by replicator">>} ]}. {[couch_replicator, jobs, running], [ {type, gauge}, - {desc, <<"replicator scheduler running jobs">>} + {desc, <<"replicator running jobs">>} ]}. -{[couch_replicator, jobs, pending], [ +{[couch_replicator, jobs, accepting], [ {type, gauge}, - {desc, <<"replicator scheduler pending jobs">>} + {desc, <<"replicator acceptors count">>} ]}. -{[couch_replicator, jobs, crashed], [ - {type, gauge}, - {desc, <<"replicator scheduler crashed jobs">>} +{[couch_replicator, jobs, reschedules], [ + {type, counter}, + {desc, <<"replicator reschedule cycles counter">>} ]}. -{[couch_replicator, jobs, total], [ +{[couch_replicator, jobs, pending], [ {type, gauge}, - {desc, <<"total number of replicator scheduler jobs">>} + {desc, <<"replicator pending count">>} ]}. {[couch_replicator, connection, acquires], [ {type, counter}, diff --git a/src/couch_replicator/src/couch_replicator_httpc.erl b/src/couch_replicator/src/couch_replicator_httpc.erl index 4dce319dc..f11d1895d 100644 --- a/src/couch_replicator/src/couch_replicator_httpc.erl +++ b/src/couch_replicator/src/couch_replicator_httpc.erl @@ -327,7 +327,7 @@ total_error_time_exceeded(#httpdb{first_error_timestamp = nil}) -> false; total_error_time_exceeded(#httpdb{first_error_timestamp = ErrorTimestamp}) -> - HealthThresholdSec = couch_replicator_scheduler:health_threshold(), + HealthThresholdSec = couch_replicator_job:health_threshold(), % Theshold value is halved because in the calling code the next step % is a doubling. Not halving here could mean sleeping too long and % exceeding the health threshold. diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl new file mode 100644 index 000000000..ed3d00d7b --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_job.erl @@ -0,0 +1,1612 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_job). + + +-behaviour(gen_server). + + +-export([ + start_link/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + format_status/2, + code_change/3 +]). + +-export([ + accept/0, + health_threshold/0 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). +-include("couch_replicator.hrl"). + + +-define(LOWEST_SEQ, 0). +-define(DEFAULT_CHECKPOINT_INTERVAL, 30000). +-define(STARTUP_JITTER_DEFAULT, 5000). +-define(DEFAULT_MIN_BACKOFF_PENALTY_SEC, 32). +-define(DEFAULT_MAX_BACKOFF_PENALTY_SEC, 2 * 24 * 3600). +-define(DEFAULT_HEALTH_THRESHOLD_SEC, 2 * 60). +-define(DEFAULT_MAX_HISTORY, 10). +-define(DEFAULT_STATS_UPDATE_INTERVAL_SEC, 10). + + +-record(rep_state, { + job, + job_data, + id, + base_id, + doc_id, + db_name, + db_uuid, + source_name, + target_name, + source, + target, + history, + checkpoint_history, + start_seq, + committed_seq, + current_through_seq, + seqs_in_progress = [], + highest_seq_done = {0, ?LOWEST_SEQ}, + source_log, + target_log, + rep_starttime, + src_starttime, + tgt_starttime, + checkpoint_timer, + stats_timer, + changes_queue, + changes_manager, + changes_reader, + workers, + stats = couch_replicator_stats:new(), + session_id, + source_seq = nil, + use_checkpoints = true, + checkpoint_interval = ?DEFAULT_CHECKPOINT_INTERVAL, + user = null, + options = #{} +}). + + +start_link() -> + gen_server:start_link(?MODULE, [], []). + + +init(_) -> + process_flag(trap_exit, true), + {ok, delayed_init, 0}. + + +terminate(normal, #rep_state{} = State) -> + #rep_state{ + job = Job, + job_data = JobData, + checkpoint_history = History + } = State, + ok = complete_job(undefined, Job, JobData, History), + close_endpoints(State); + +terminate(shutdown, #rep_state{} = State0) -> + % Replication stopped by the job server + State1 = cancel_timers(State0), + State3 = case do_checkpoint(State1) of + {ok, State2} -> + State2; + Error -> + Msg = "~p : Failed last checkpoint. Job: ~p Error: ~p", + couch_log:error(Msg, [?MODULE, State1#rep_state.id, Error]), + State1 + end, + #rep_state{job = Job, job_data = JobData} = State3, + ok = reschedule(undefined, Job, JobData), + ok = close_endpoints(State3); + +terminate({shutdown, Error}, {init_error, Stack}) -> + % Termination in init, before the job had initialized + case Error of + max_backoff -> couch_log:warning("~p job backed off", [?MODULE]); + finished -> couch_log:notice("~p job finished in init", [?MODULE]); + _ -> couch_log:error("~p job failed ~p ~p", [?MODULE, Error, Stack]) + end, + ok; + +terminate({shutdown, finished}, #rep_state{} = State) -> + % Job state was already updated and job is marked as finished + ok = close_endpoints(State); + +terminate({shutdown, halt}, #rep_state{} = State) -> + % Job is re-enqueued and possibly already running somewhere else + couch_log:error("~p job ~p halted", [?MODULE, State#rep_state.id]), + ok = close_endpoints(State); + +terminate(Reason0, #rep_state{} = State0) -> + State = update_job_state(State0), + Reason = case Reason0 of + {shutdown, Err} -> Err; + _ -> Reason0 + end, + #rep_state{ + id = RepId, + job = Job, + job_data = JobData, + source_name = Source, + target_name = Target + } = State, + couch_log:error("Replication `~s` (`~s` -> `~s`) failed: ~p", + [RepId, Source, Target, Reason]), + ok = reschedule_on_error(undefined, Job, JobData, Reason), + ok = close_endpoints(State). + + +handle_call({add_stats, Stats}, From, State) -> + gen_server:reply(From, ok), + NewStats = couch_replicator_stats:sum_stats(State#rep_state.stats, Stats), + {noreply, State#rep_state{stats = NewStats}}; + +handle_call({report_seq_done, Seq, StatsInc}, From, #rep_state{} = State) -> + #rep_state{ + seqs_in_progress = SeqsInProgress, + highest_seq_done = HighestDone, + current_through_seq = ThroughSeq, + stats = Stats + } = State, + gen_server:reply(From, ok), + {NewThroughSeq0, NewSeqsInProgress} = case SeqsInProgress of + [] -> + {Seq, []}; + [Seq | Rest] -> + {Seq, Rest}; + [_ | _] -> + {ThroughSeq, ordsets:del_element(Seq, SeqsInProgress)} + end, + NewHighestDone = lists:max([HighestDone, Seq]), + NewThroughSeq = case NewSeqsInProgress of + [] -> + lists:max([NewThroughSeq0, NewHighestDone]); + _ -> + NewThroughSeq0 + end, + couch_log:debug("Worker reported seq ~p, through seq was ~p, " + "new through seq is ~p, highest seq done was ~p, " + "new highest seq done is ~p~n" + "Seqs in progress were: ~p~nSeqs in progress are now: ~p", + [Seq, ThroughSeq, NewThroughSeq, HighestDone, + NewHighestDone, SeqsInProgress, NewSeqsInProgress]), + NewState = State#rep_state{ + stats = couch_replicator_stats:sum_stats(Stats, StatsInc), + current_through_seq = NewThroughSeq, + seqs_in_progress = NewSeqsInProgress, + highest_seq_done = NewHighestDone + }, + {noreply, maybe_update_job_state(NewState)}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast({report_seq, Seq}, + #rep_state{seqs_in_progress = SeqsInProgress} = State) -> + NewSeqsInProgress = ordsets:add_element(Seq, SeqsInProgress), + {noreply, State#rep_state{seqs_in_progress = NewSeqsInProgress}}; + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(timeout, delayed_init) -> + try delayed_init() of + {ok, State} -> {noreply, State}; + {stop, Reason, State} -> {stop, Reason, State} + catch + exit:{shutdown, Exit} when Exit =:= finished orelse Exit =:= halt -> + Stack = erlang:get_stacktrace(), + {stop, {shutdown, Exit}, {init_error, Stack}}; + _Tag:Error -> + ShutdownReason = {error, replication_start_error(Error)}, + Stack = erlang:get_stacktrace(), + {stop, {shutdown, ShutdownReason}, {init_error, Stack}} + end; + +handle_info(stats_update, #rep_state{} = State) -> + State1 = cancel_stats_timer(State), + State2 = update_job_state(State1), + {noreply, State2}; + +handle_info(checkpoint, State0) -> + State = cancel_checkpoint_timer(State0), + ok = check_user_filter(State), + case do_checkpoint(State) of + {ok, State1} -> + couch_stats:increment_counter([couch_replicator, checkpoints, + success]), + {noreply, start_checkpoint_timer(State1)}; + Error -> + couch_stats:increment_counter([couch_replicator, checkpoints, + failure]), + {stop, Error, State} + end; + +handle_info(shutdown, St) -> + {stop, shutdown, St}; + +handle_info({'EXIT', Pid, max_backoff}, State) -> + couch_log:error("Max backoff reached child process ~p", [Pid]), + {stop, {shutdown, max_backoff}, State}; + +handle_info({'EXIT', Pid, {shutdown, max_backoff}}, State) -> + couch_log:error("Max backoff reached child process ~p", [Pid]), + {stop, {shutdown, max_backoff}, State}; + +handle_info({'EXIT', Pid, normal}, #rep_state{changes_reader=Pid} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, Reason0}, #rep_state{changes_reader=Pid} = State) -> + couch_stats:increment_counter([couch_replicator, changes_reader_deaths]), + Reason = case Reason0 of + {changes_req_failed, _, _} = HttpFail -> + HttpFail; + {http_request_failed, _, _, {error, {code, Code}}} -> + {changes_req_failed, Code}; + {http_request_failed, _, _, {error, Err}} -> + {changes_req_failed, Err}; + Other -> + {changes_reader_died, Other} + end, + couch_log:error("ChangesReader process died with reason: ~p", [Reason]), + {stop, {shutdown, Reason}, cancel_timers(State)}; + +handle_info({'EXIT', Pid, normal}, #rep_state{changes_manager=Pid} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, Reason}, #rep_state{changes_manager=Pid} = State) -> + couch_stats:increment_counter([couch_replicator, changes_manager_deaths]), + couch_log:error("ChangesManager process died with reason: ~p", [Reason]), + {stop, {shutdown, {changes_manager_died, Reason}}, cancel_timers(State)}; + +handle_info({'EXIT', Pid, normal}, #rep_state{changes_queue=Pid} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) -> + couch_stats:increment_counter([couch_replicator, changes_queue_deaths]), + couch_log:error("ChangesQueue process died with reason: ~p", [Reason]), + {stop, {shutdown, {changes_queue_died, Reason}}, cancel_timers(State)}; + +handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) -> + case Workers -- [Pid] of + Workers -> + %% Processes might be linked by replicator's auth plugins so + %% we tolerate them exiting `normal` here and don't crash + LogMsg = "~p: unknown pid exited `normal` ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), + {noreply, State#rep_state{workers = Workers}}; + [] -> + catch unlink(State#rep_state.changes_manager), + catch exit(State#rep_state.changes_manager, kill), + do_last_checkpoint(State); + Workers2 -> + {noreply, State#rep_state{workers = Workers2}} + end; + +handle_info({'EXIT', Pid, Reason}, #rep_state{workers = Workers} = State) -> + State2 = cancel_timers(State), + case lists:member(Pid, Workers) of + false -> + {stop, {unknown_process_died, Pid, Reason}, State2}; + true -> + couch_stats:increment_counter([couch_replicator, worker_deaths]), + StopReason = case Reason of + {shutdown, _} = Err -> + Err; + Other -> + ErrLog = "Worker ~p died with reason: ~p", + couch_log:error(ErrLog, [Pid, Reason]), + {worker_died, Pid, Other} + end, + {stop, StopReason, State2} + end; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:notice(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +format_status(_Opt, [_PDict, State]) -> + #rep_state{ + id = Id, + source = Source, + target = Target, + start_seq = StartSeq, + source_seq = SourceSeq, + committed_seq = CommitedSeq, + current_through_seq = ThroughSeq, + highest_seq_done = HighestSeqDone, + session_id = SessionId, + doc_id = DocId, + db_name = DbName, + options = Options + } = state_strip_creds(State), + [ + {rep_id, Id}, + {source, couch_replicator_api_wrap:db_uri(Source)}, + {target, couch_replicator_api_wrap:db_uri(Target)}, + {db_name, DbName}, + {doc_id, DocId}, + {options, Options}, + {session_id, SessionId}, + {start_seq, StartSeq}, + {source_seq, SourceSeq}, + {committed_seq, CommitedSeq}, + {current_through_seq, ThroughSeq}, + {highest_seq_done, HighestSeqDone} + ]. + + +code_change(_OldVsn, #rep_state{}=State, _Extra) -> + {ok, State}. + + +accept() -> + couch_stats:increment_counter([couch_replicator, jobs, accepts]), + Now = erlang:system_time(second), + case couch_replicator_jobs:accept_job(Now + 5) of + {ok, Job, #{?REP := Rep} = JobData} -> + Normal = case Rep of + #{?OPTIONS := #{} = Options} -> + not maps:get(<<"continuous">>, Options, false); + _ -> + true + end, + couch_replicator_job_server:accepted(self(), Normal), + {ok, Job, JobData}; + {error, not_found} -> + timer:sleep(accept_jitter_msec()), + ?MODULE:accept() + end. + + +% Health threshold is the minimum amount of time an unhealthy job should run +% crashing before it is considered to be healthy again. HealtThreashold should +% not be 0 as jobs could start and immediately crash, and it shouldn't be +% infinity, since then consecutive crashes would accumulate forever even if +% job is back to normal. +health_threshold() -> + config:get_integer("replicator", "health_threshold_sec", + ?DEFAULT_HEALTH_THRESHOLD_SEC). + + +delayed_init() -> + {ok, Job, JobData} = accept(), + try do_init(Job, JobData) of + State = #rep_state{} -> {ok, State} + catch + exit:{http_request_failed, _, _, max_backoff} -> + Stack = erlang:get_stacktrace(), + reschedule_on_error(undefined, Job, JobData, max_backoff), + {stop, {shutdown, max_backoff}, {init_error, Stack}}; + exit:{shutdown, Exit} when Exit =:= finished orelse Exit =:= halt -> + Stack = erlang:get_stacktrace(), + {stop, {shutdown, Exit}, {init_error, Stack}}; + _Tag:Error -> + Reason = {error, replication_start_error(Error)}, + Stack = erlang:get_stacktrace(), + ErrMsg = "~p : job ~p failed during startup ~p stack:~p", + couch_log:error(ErrMsg, [?MODULE, Job, Reason, Stack]), + reschedule_on_error(undefined, Job, JobData, Reason), + {stop, {shutdown, Reason}, {init_error, Stack}} + end. + + +do_init(Job, #{} = JobData) -> + couch_stats:increment_counter([couch_replicator, jobs, starts]), + % This may make a network request, then may fail and reschedule the job + {RepId, BaseId} = get_rep_id(undefined, Job, JobData), + #{ + ?DB_NAME := DbName, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + + ok = couch_replicator_docs:remove_state_fields(DbName, DbUUID, DocId), + + % Finish if job is in a failed state already + case JobData of + #{?STATE := ?ST_FAILED, ?STATE_INFO := Error} -> + ok = fail_job(undefined, Job, JobData, Error), + exit({shutdown, finished}); + #{?STATE := St} when is_binary(St), St =/= ?ST_FAILED -> + ok + end, + + JobsTx = couch_jobs_fdb:get_jtx(), + {Job1, JobData1, Owner} = couch_jobs_fdb:tx(JobsTx, fun(JTx) -> + init_job_data(JTx, Job, JobData, RepId, BaseId) + end), + + % Handle ownership decision here to be outside of the transaction + case Owner of + owner -> ok; + not_owner -> exit({shutdown, finished}) + end, + + #rep_state{ + source = Source, + target = Target, + start_seq = {_Ts, StartSeq}, + options = Options, + doc_id = DocId, + db_name = DbName + } = State = init_state(Job1, JobData1), + + NumWorkers = maps:get(<<"worker_processes">>, Options), + BatchSize = maps:get(<<"worker_batch_size">>, Options), + {ok, ChangesQueue} = couch_work_queue:new([ + {max_items, BatchSize * NumWorkers * 2}, + {max_size, 100 * 1024 * NumWorkers} + ]), + + % This starts the _changes reader process. It adds the changes from the + % source db to the ChangesQueue. + {ok, ChangesReader} = couch_replicator_changes_reader:start_link( + StartSeq, Source, ChangesQueue, Options + ), + + % Changes manager - responsible for dequeing batches from the changes queue + % and deliver them to the worker processes. + ChangesManager = spawn_changes_manager(self(), ChangesQueue, BatchSize), + + % This starts the worker processes. They ask the changes queue manager for + % a a batch of _changes rows to process -> check which revs are missing in + % the target, and for the missing ones, it copies them from the source to + % the target. + MaxConns = maps:get(<<"http_connections">>, Options), + Workers = lists:map(fun(_) -> + couch_stats:increment_counter([couch_replicator, workers_started]), + {ok, Pid} = couch_replicator_worker:start_link(self(), Source, Target, + ChangesManager, MaxConns), + Pid + end, lists:seq(1, NumWorkers)), + + log_replication_start(State), + + State1 = State#rep_state{ + changes_queue = ChangesQueue, + changes_manager = ChangesManager, + changes_reader = ChangesReader, + workers = Workers + }, + + update_job_state(State1). + + +init_job_data(#{jtx := true} = JTx, Job, #{?REP_ID := RepId} = JobData, RepId, + _BaseId) -> + {Job, JobData, check_ownership(JTx, Job, JobData)}; + +init_job_data(#{jtx := true} = JTx, Job, #{} = JobData, RepId, BaseId) -> + #{ + ?REP := Rep, + ?REP_ID := OldRepId, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + JobId = couch_replicator_ids:job_id(Rep, DbUUID, DocId), + Now = erlang:system_time(second), + JobData1 = JobData#{ + ?REP_ID := RepId, + ?BASE_ID := BaseId, + ?STATE := ?ST_RUNNING, + ?STATE_INFO := null, + ?LAST_START := Now, + ?REP_NODE := erlang:atom_to_binary(node(), utf8), + ?REP_PID := list_to_binary(pid_to_list(self())), + ?LAST_UPDATED := Now + }, + JobData2 = case is_binary(OldRepId) andalso OldRepId =/= RepId of + true -> + % Handle Replication ID change + ok = couch_replicator_jobs:clear_old_rep_id(JTx, JobId, OldRepId), + JobData1#{ + ?REP_STATS := #{}, + ?JOB_HISTORY := [] + }; + false -> + JobData1 + end, + JobData3 = hist_append(?HIST_STARTED, Now, JobData2, undefined), + case check_ownership(JTx, Job, JobData3) of + owner -> + couch_stats:increment_counter([couch_replicator, jobs, starts]), + {Job1, JobData4} = update_job_data(JTx, Job, JobData3), + {Job1, JobData4, owner}; + not_owner -> + {Job, JobData3, not_owner} + end. + + +check_ownership(#{jtx := true} = JTx, Job, JobData) -> + #{ + ?REP_ID := RepId, + ?REP := Rep, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + JobId = couch_replicator_ids:job_id(Rep, DbUUID, DocId), + case couch_replicator_jobs:try_update_rep_id(JTx, JobId, RepId) of + ok -> + owner; + {error, {replication_job_conflict, OtherJobId}} -> + case couch_replicator_jobs:get_job_data(JTx, OtherJobId) of + {ok, #{?STATE := S, ?DB_NAME := null}} when + S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a transient job, not associated with a + % _replicator doc, so we let this job retry. This is also + % partly done for compatibility with pervious replicator + % behavior. + Error = <<"Duplicate job running: ", OtherJobId/binary>>, + reschedule_on_error(JTx, Job, JobData, Error), + not_owner; + {ok, #{?STATE := S, ?DB_NAME := <<_/binary>>}} when + S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a permanent replication job, so this + % job is marked as failed. + Error = <<"Duplicate job running: ", OtherJobId/binary>>, + fail_job(JTx, Job, JobData, Error), + not_owner; + {ok, #{}} -> + LogMsg = "~p : Job ~p usurping job ~p for replication ~p", + couch_log:warning(LogMsg, [?MODULE, JobId, OtherJobId, + RepId]), + couch_replicator_jobs:update_rep_id(JTx, JobId, RepId), + owner; + {error, not_found} -> + LogMsg = "~p : Orphan replication job reference ~p -> ~p", + couch_log:error(LogMsg, [?MODULE, RepId, OtherJobId]), + couch_replicator_jobs:update_rep_id(JTx, JobId, RepId), + owner + end + end. + + +update_job_data(Tx, #rep_state{} = State) -> + #rep_state{job = Job, job_data = JobData} = State, + {Job1, JobData1} = update_job_data(Tx, Job, JobData), + State#rep_state{job = Job1, job_data = JobData1}. + + +update_job_data(Tx, Job, #{} = JobData) -> + case couch_replicator_jobs:update_job_data(Tx, Job, JobData) of + {ok, Job1} -> + {Job1, JobData}; + {error, halt} -> + exit({shutdown, halt}) + end. + + +update_active_task_info(#rep_state{} = State) -> + #rep_state{ + job_data = JobData, + user = User, + id = RepId, + db_name = DbName, + doc_id = DocId, + source_name = Source, + target_name = Target, + options = Options, + highest_seq_done = {_, SourceSeq}, + checkpoint_interval = CheckpointInterval + } = State, + + #{ + ?REP := #{?START_TIME := StartTime}, + ?REP_STATS := Stats, + ?REP_NODE := Node, + ?REP_PID := Pid, + ?LAST_UPDATED := LastUpdated + } = JobData, + + Info = maps:merge(Stats, #{ + <<"type">> => <<"replication">>, + <<"user">> => User, + <<"replication_id">> => RepId, + <<"database">> => DbName, + <<"doc_id">> => DocId, + <<"source">> => ?l2b(Source), + <<"target">> => ?l2b(Target), + <<"continuous">> => maps:get(<<"continuous">>, Options, false), + <<"source_seq">> => SourceSeq, + <<"checkpoint_interval">> => CheckpointInterval, + <<"node">> => Node, + <<"pid">> => Pid, + <<"updated_on">> => LastUpdated, + <<"started_on">> => StartTime + }), + + JobData1 = fabric2_active_tasks:update_active_task_info(JobData, Info), + State#rep_state{job_data = JobData1}. + + +% Transient jobs don't get rescheduled on error with the exception of +% max_backoff errors. +% +reschedule_on_error(JTx, Job, #{?DB_NAME := null} = JobData, Error) when + Error =/= max_backoff -> + fail_job(JTx, Job, JobData, Error); + +reschedule_on_error(JTx, Job, #{} = JobData0, Error0) -> + Error = error_info(Error0), + + Now = erlang:system_time(second), + + JobData = maybe_heal(JobData0, Now), + #{?ERROR_COUNT := ErrorCount} = JobData, + JobData1 = JobData#{ + ?STATE := ?ST_CRASHING, + ?STATE_INFO := Error, + ?ERROR_COUNT := ErrorCount + 1, + ?LAST_ERROR := Error, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_CRASHED, Now, JobData1, Error), + JobData3 = hist_append(?HIST_PENDING, Now, JobData2, undefined), + JobData4 = fabric2_active_tasks:update_active_task_info(JobData3, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, crashes]), + + Time = get_backoff_time(ErrorCount + 1), + case couch_replicator_jobs:reschedule_job(JTx, Job, JobData4, Time) of + ok -> ok; + {error, halt} -> exit({shutdown, halt}) + end. + + +reschedule(JTx, Job, #{} = JobData) -> + Now = erlang:system_time(second), + + JobData1 = JobData#{ + ?STATE := ?ST_PENDING, + ?STATE_INFO := null, + ?LAST_ERROR := null, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_STOPPED, Now, JobData1, undefined), + JobData3 = hist_append(?HIST_PENDING, Now, JobData2, undefined), + JobData4 = fabric2_active_tasks:update_active_task_info(JobData3, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, stops]), + + Time = Now + couch_replicator_job_server:scheduling_interval_sec(), + case couch_replicator_jobs:reschedule_job(JTx, Job, JobData4, Time) of + ok -> ok; + {error, halt} -> exit({shutdown, halt}) + end. + + +fail_job(JTx, Job, #{} = JobData, Error0) -> + Error = error_info(Error0), + + Now = erlang:system_time(second), + + #{ + ?ERROR_COUNT := ErrorCount, + ?DB_NAME := DbName, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + + JobData1 = JobData#{ + ?STATE := ?ST_FAILED, + ?STATE_INFO := Error, + ?ERROR_COUNT := ErrorCount + 1, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_CRASHED, Now, JobData1, Error), + JobData3 = fabric2_active_tasks:update_active_task_info(JobData2, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, crashes]), + + case couch_replicator_jobs:finish_job(JTx, Job, JobData3) of + ok -> + couch_replicator_docs:update_failed(DbName, DbUUID, DocId, Error), + ok; + {error, halt} -> + exit({shutdown, halt}) + end. + + +complete_job(JTx, Job, #{} = JobData, CheckpointHistory) -> + #{ + ?DB_NAME := Db, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId, + ?REP_STATS := RepStats, + ?REP := Rep + } = JobData, + + Now = erlang:system_time(second), + + #{?START_TIME := StartTime} = Rep, + JobData1 = JobData#{ + ?STATE := ?ST_COMPLETED, + ?CHECKPOINT_HISTORY := CheckpointHistory, + ?STATE_INFO := RepStats, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_STOPPED, Now, JobData1, undefined), + JobData3 = fabric2_active_tasks:update_active_task_info(JobData2, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, stops]), + + case couch_replicator_jobs:finish_job(JTx, Job, JobData3) of + ok -> + StartISO8601 = couch_replicator_utils:iso8601(StartTime), + Stats = maps:merge(RepStats, #{<<"start_time">> => StartISO8601}), + couch_replicator_docs:update_completed(Db, DbUUID, DocId, Stats), + ok; + {error, halt} -> + exit({shutdown, halt}) + end. + + +error_info(Error0) -> + case Error0 of + <<_/binary>> -> + Error0; + undefined -> + undefined; + null -> + null; + Atom when is_atom(Atom) -> + atom_to_binary(Atom, utf8); + {shutdown, Atom} when is_atom(Atom) -> + atom_to_binary(Atom, utf8); + {shutdown, Err} -> + couch_replicator_utils:rep_error_to_binary(Err); + {error, Atom} when is_atom(Atom) -> + atom_to_binary(Atom, utf8); + {error, {Err, Reason}} when is_atom(Err) -> + ReasonBin = couch_replicator_utils:rep_error_to_binary(Reason), + #{ + <<"error">> => atom_to_binary(Err, utf8), + <<"reason">> => ReasonBin + }; + _Other -> + couch_replicator_utils:rep_error_to_binary(Error0) + end. + + +get_rep_id(JTx, Job, #{} = JobData) -> + #{?REP := Rep} = JobData, + try + couch_replicator_ids:replication_id(Rep) + catch + throw:{filter_fetch_error, Error} -> + Error1 = io_lib:format("Filter fetch error ~p", [Error]), + reschedule_on_error(JTx, Job, JobData, Error1), + exit({shutdown, finished}) + end. + + +% After job run continuously for some time we consider it "healed" and reset +% its consecutive error count. +maybe_heal(#{} = JobData, Now) -> + #{?LAST_START := LastStart} = JobData, + case Now - LastStart > health_threshold() of + true -> JobData#{?ERROR_COUNT := 0, ?LAST_ERROR := null}; + false -> JobData + end. + + +get_backoff_time(ErrCnt) -> + Max = min(max_backoff_penalty_sec(), 3600 * 24 * 30), + Min = max(min_backoff_penalty_sec(), 2), + + % Calculate the max exponent so exponentiation doesn't blow up + MaxExp = math:log2(Max) - math:log2(Min), + + % This is the recommended backoff amount + Wait = Min * math:pow(2, min(ErrCnt, MaxExp)), + + % Apply a 25% jitter to avoid a thundering herd effect + WaitJittered = Wait * 0.75 + rand:uniform(trunc(Wait * 0.25) + 1), + erlang:system_time(second) + trunc(WaitJittered). + + +headers_strip_creds([], Acc) -> + lists:reverse(Acc); + +headers_strip_creds([{Key, Value0} | Rest], Acc) -> + Value = case string:to_lower(Key) of + "authorization" -> "****"; + _ -> Value0 + end, + headers_strip_creds(Rest, [{Key, Value} | Acc]). + + +httpdb_strip_creds(#httpdb{url = Url, headers = Headers} = HttpDb) -> + HttpDb#httpdb{ + url = couch_util:url_strip_password(Url), + headers = headers_strip_creds(Headers, []) + }; + +httpdb_strip_creds(LocalDb) -> + LocalDb. + + +state_strip_creds(#rep_state{source = Source, target = Target} = State) -> + State#rep_state{ + source = httpdb_strip_creds(Source), + target = httpdb_strip_creds(Target) + }. + + +adjust_maxconn(Src = #{<<"http_connections">> := 1}, RepId) -> + Msg = "Adjusting minimum number of HTTP source connections to 2 for ~p", + couch_log:notice(Msg, [RepId]), + Src#{<<"http_connections">> := 2}; + +adjust_maxconn(Src, _RepId) -> + Src. + + +do_last_checkpoint(#rep_state{seqs_in_progress = [], + highest_seq_done = {_Ts, ?LOWEST_SEQ}} = State) -> + {stop, normal, cancel_timers(State)}; + +do_last_checkpoint(#rep_state{seqs_in_progress = [], + highest_seq_done = Seq} = State) -> + State1 = State#rep_state{current_through_seq = Seq}, + State2 = cancel_timers(State1), + case do_checkpoint(State2) of + {ok, State3} -> + couch_stats:increment_counter([couch_replicator, checkpoints, + success]), + {stop, normal, State3}; + Error -> + couch_stats:increment_counter([couch_replicator, checkpoints, + failure]), + {stop, Error, State2} + end. + + +start_checkpoint_timer(#rep_state{} = State) -> + CheckpointAfterMSec = State#rep_state.checkpoint_interval, + JobTimeoutMSec = couch_replicator_jobs:get_timeout() * 1000, + Wait1 = min(CheckpointAfterMSec, JobTimeoutMSec div 2), + Wait2 = trunc(Wait1 * 0.75) + rand:uniform(trunc(Wait1 * 0.25)), + TRef = erlang:send_after(Wait2, self(), checkpoint), + State#rep_state{checkpoint_timer = TRef}. + + +cancel_checkpoint_timer(#rep_state{checkpoint_timer = nil} = State) -> + State; +cancel_checkpoint_timer(#rep_state{checkpoint_timer = Timer} = State) -> + erlang:cancel_timer(Timer), + State#rep_state{checkpoint_timer = nil}. + + +start_stats_timer(#rep_state{} = State) -> + MSec = stats_update_interval_sec() * 1000, + TRef = erlang:send_after(MSec, self(), stats_update), + State#rep_state{stats_timer = TRef}. + + +cancel_stats_timer(#rep_state{stats_timer = nil} = State) -> + State; +cancel_stats_timer(#rep_state{stats_timer = Timer} = State) -> + erlang:cancel_timer(Timer), + receive stats_update -> ok after 0 -> ok end, + State#rep_state{stats_timer = nil}. + + +cancel_timers(#rep_state{} = State) -> + State1 = cancel_checkpoint_timer(State), + cancel_stats_timer(State1). + + +init_state(#{} = Job, #{} = JobData) -> + #{ + ?REP := Rep, + ?REP_ID := Id, + ?BASE_ID := BaseId, + ?DB_NAME := DbName, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId, + ?LAST_ERROR := LastError + } = JobData, + #{ + ?SOURCE := Src0, + ?TARGET := Tgt, + ?START_TIME := StartTime, + ?OPTIONS := Options0 + } = Rep, + + % Optimize replication parameters if last time the jobs crashed because it + % was rate limited + Options = optimize_rate_limited_job(Options0, LastError), + + % Adjust minimum number of http source connections to 2 to avoid deadlock + Src = adjust_maxconn(Src0, BaseId), + {ok, Source} = couch_replicator_api_wrap:db_open(Src), + CreateTgt = maps:get(<<"create_target">>, Options, false), + TParams = maps:get(<<"create_target_params">>, Options, #{}), + + {ok, Target} = couch_replicator_api_wrap:db_open(Tgt, CreateTgt, TParams), + + {ok, SourceInfo} = couch_replicator_api_wrap:get_db_info(Source), + {ok, TargetInfo} = couch_replicator_api_wrap:get_db_info(Target), + + [SourceLog, TargetLog] = find_and_migrate_logs([Source, Target], Rep, + BaseId), + + {StartSeq0, History} = compare_replication_logs(SourceLog, TargetLog), + + #{?REP_STATS := Stats0} = JobData, + Stats1 = couch_replicator_stats:new(Stats0), + HistoryStats = case History of + [{[_ | _] = HProps} | _] -> couch_replicator_stats:new(HProps); + _ -> couch_replicator_stats:new() + end, + Stats2 = couch_replicator_stats:max_stats(Stats1, HistoryStats), + + StartSeq1 = maps:get(<<"since_seq">>, Options, StartSeq0), + StartSeq = {0, StartSeq1}, + + SourceSeq = get_value(<<"update_seq">>, SourceInfo, ?LOWEST_SEQ), + + #doc{body={CheckpointHistory}} = SourceLog, + + State = #rep_state{ + job = Job, + job_data = JobData, + id = Id, + base_id = BaseId, + source_name = couch_replicator_api_wrap:db_uri(Source), + target_name = couch_replicator_api_wrap:db_uri(Target), + source = Source, + target = Target, + options = Options, + history = History, + checkpoint_history = {[{<<"no_changes">>, true} | CheckpointHistory]}, + start_seq = StartSeq, + current_through_seq = StartSeq, + committed_seq = StartSeq, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = StartTime, + src_starttime = get_value(<<"instance_start_time">>, SourceInfo), + tgt_starttime = get_value(<<"instance_start_time">>, TargetInfo), + session_id = couch_uuids:random(), + source_seq = SourceSeq, + use_checkpoints = maps:get(<<"use_checkpoints">>, Options), + checkpoint_interval = maps:get(<<"checkpoint_interval">>, Options), + stats = Stats2, + stats_timer = nil, + doc_id = DocId, + db_name = DbName, + db_uuid = DbUUID + }, + start_checkpoint_timer(State). + + +find_and_migrate_logs(DbList, #{} = Rep, BaseId) when is_binary(BaseId) -> + LogId = ?l2b(?LOCAL_DOC_PREFIX ++ BaseId), + fold_replication_logs(DbList, ?REP_ID_VERSION, LogId, LogId, Rep, []). + + +fold_replication_logs([], _Vsn, _LogId, _NewId, _Rep, Acc) -> + lists:reverse(Acc); + +fold_replication_logs([Db | Rest] = Dbs, Vsn, LogId, NewId, #{} = Rep, Acc) -> + case couch_replicator_api_wrap:open_doc(Db, LogId, [ejson_body]) of + {error, <<"not_found">>} when Vsn > 1 -> + OldRepId = couch_replicator_ids:base_id(Rep, Vsn - 1), + fold_replication_logs(Dbs, Vsn - 1, + ?l2b(?LOCAL_DOC_PREFIX ++ OldRepId), NewId, Rep, Acc); + {error, <<"not_found">>} -> + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, Rep, + [#doc{id = NewId} | Acc]); + {ok, Doc} when LogId =:= NewId -> + fold_replication_logs( + Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [Doc | Acc]); + {ok, Doc} -> + MigratedLog = #doc{id = NewId, body = Doc#doc.body}, + maybe_save_migrated_log(Rep, Db, MigratedLog, Doc#doc.id), + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, Rep, + [MigratedLog | Acc]) + end. + + +maybe_save_migrated_log(#{?OPTIONS := Options}, Db, #doc{} = Doc, OldId) -> + case maps:get(<<"use_checkpoints">>, Options) of + true -> + update_checkpoint(Db, Doc), + Msg = "Migrated replication checkpoint. Db:~p ~p -> ~p", + couch_log:notice(Msg, [httpdb_strip_creds(Db), OldId, Doc#doc.id]); + false -> + ok + end. + + +spawn_changes_manager(Parent, ChangesQueue, BatchSize) -> + spawn_link(fun() -> + changes_manager_loop_open(Parent, ChangesQueue, BatchSize, 1) + end). + + +changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts) -> + receive + {get_changes, From} -> + case couch_work_queue:dequeue(ChangesQueue, BatchSize) of + closed -> + From ! {closed, self()}; + {ok, ChangesOrLastSeqs} -> + ReportSeq = case lists:last(ChangesOrLastSeqs) of + {last_seq, Seq} -> {Ts, Seq}; + #doc_info{high_seq = Seq} -> {Ts, Seq} + end, + Changes = lists:filter(fun + (#doc_info{}) -> true; + ({last_seq, _Seq}) -> false + end, ChangesOrLastSeqs), + ok = gen_server:cast(Parent, {report_seq, ReportSeq}), + From ! {changes, self(), Changes, ReportSeq} + end, + changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts + 1) + end. + + +do_checkpoint(#rep_state{use_checkpoints=false} = State) -> + NewState = State#rep_state{ + checkpoint_history = {[{<<"use_checkpoints">>, false}]} + }, + {ok, update_job_state(NewState)}; +do_checkpoint(#rep_state{current_through_seq=S, committed_seq=S} = State) -> + {ok, update_job_state(State)}; +do_checkpoint(State) -> + #rep_state{ + source_name=SourceName, + target_name=TargetName, + source = Source, + target = Target, + history = OldHistory, + start_seq = {_, StartSeq}, + current_through_seq = {_Ts, NewSeq} = NewTsSeq, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = RepStartTime, + src_starttime = SrcInstanceStartTime, + tgt_starttime = TgtInstanceStartTime, + stats = Stats, + options = Options, + session_id = SessionId + } = State, + case commit_to_both(Source, Target) of + {source_error, Reason} -> + {checkpoint_commit_failure, <<"Failure on source commit: ", + (couch_util:to_binary(Reason))/binary>>}; + {target_error, Reason} -> + {checkpoint_commit_failure, <<"Failure on target commit: ", + (couch_util:to_binary(Reason))/binary>>}; + {SrcInstanceStartTime, TgtInstanceStartTime} -> + couch_log:notice("recording a checkpoint for `~s` -> `~s` at " + "source update_seq ~p", [SourceName, TargetName, NewSeq]), + StartTime = couch_replicator_utils:rfc1123_local(RepStartTime), + EndTime = couch_replicator_utils:rfc1123_local(), + NewHistoryEntry = {[ + {<<"session_id">>, SessionId}, + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"start_last_seq">>, StartSeq}, + {<<"end_last_seq">>, NewSeq}, + {<<"recorded_seq">>, NewSeq}, + {<<"missing_checked">>, + couch_replicator_stats:missing_checked(Stats)}, + {<<"missing_found">>, + couch_replicator_stats:missing_found(Stats)}, + {<<"docs_read">>, + couch_replicator_stats:docs_read(Stats)}, + {<<"docs_written">>, + couch_replicator_stats:docs_written(Stats)}, + {<<"doc_write_failures">>, + couch_replicator_stats:doc_write_failures(Stats)} + ]}, + BaseHistory = [ + {<<"session_id">>, SessionId}, + {<<"source_last_seq">>, NewSeq}, + {<<"replication_id_version">>, ?REP_ID_VERSION} + ] ++ case maps:get(<<"doc_ids">>, Options, undefined) of + undefined -> + []; + _DocIds -> + % backwards compatibility with the result of a replication + % by doc IDs in versions 0.11.x and 1.0.x TODO: deprecate + % (use same history format, simplify code) + [ + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"docs_read">>, + couch_replicator_stats:docs_read(Stats)}, + {<<"docs_written">>, + couch_replicator_stats:docs_written(Stats)}, + {<<"doc_write_failures">>, + couch_replicator_stats:doc_write_failures(Stats)} + ] + end, + % limit history to 50 entries + NewRepHistory = { + BaseHistory ++ [{<<"history">>, + lists:sublist([NewHistoryEntry | OldHistory], 50)}] + }, + + try + {SrcRevPos, SrcRevId} = update_checkpoint(Source, + SourceLog#doc{body = NewRepHistory}, source), + {TgtRevPos, TgtRevId} = update_checkpoint(Target, + TargetLog#doc{body = NewRepHistory}, target), + NewState = State#rep_state{ + checkpoint_history = NewRepHistory, + committed_seq = NewTsSeq, + source_log = SourceLog#doc{revs={SrcRevPos, [SrcRevId]}}, + target_log = TargetLog#doc{revs={TgtRevPos, [TgtRevId]}} + }, + {ok, update_job_state(NewState)} + catch throw:{checkpoint_commit_failure, _} = Failure -> + Failure + end; + {SrcInstanceStartTime, _NewTgtInstanceStartTime} -> + {checkpoint_commit_failure, <<"Target database out of sync. " + "Try to increase max_dbs_open at the target's server.">>}; + {_NewSrcInstanceStartTime, TgtInstanceStartTime} -> + {checkpoint_commit_failure, <<"Source database out of sync. " + "Try to increase max_dbs_open at the source's server.">>}; + {_NewSrcInstanceStartTime, _NewTgtInstanceStartTime} -> + {checkpoint_commit_failure, <<"Source and target databases out of " + "sync. Try to increase max_dbs_open at both servers.">>} + end. + + +update_checkpoint(Db, Doc, DbType) -> + try + update_checkpoint(Db, Doc) + catch throw:{checkpoint_commit_failure, Reason} -> + throw({checkpoint_commit_failure, <<"Error updating the ", + (couch_util:to_binary(DbType))/binary, " checkpoint document: ", + (couch_util:to_binary(Reason))/binary>>}) + end. + + +update_checkpoint(Db, #doc{id = LogId, body = LogBody} = Doc) -> + try + case couch_replicator_api_wrap:update_doc(Db, Doc, [delay_commit]) of + {ok, PosRevId} -> PosRevId; + {error, Reason} -> throw({checkpoint_commit_failure, Reason}) + end + catch throw:conflict -> + Opts = [ejson_body], + case (catch couch_replicator_api_wrap:open_doc(Db, LogId, Opts)) of + {ok, #doc{body = LogBody, revs = {Pos, [RevId | _]}}} -> + % This means that we were able to update successfully the + % checkpoint doc in a previous attempt but we got a connection + % error (timeout for e.g.) before receiving the success + % response. Therefore the request was retried and we got a + % conflict, as the revision we sent is not the current one. We + % confirm this by verifying the doc body we just got is the + % same that we have just sent. + {Pos, RevId}; + _ -> + throw({checkpoint_commit_failure, conflict}) + end + end. + + +commit_to_both(Source, Target) -> + % commit the src async + ParentPid = self(), + SrcCommitPid = spawn_link(fun() -> + Result = (catch couch_replicator_api_wrap:ensure_full_commit(Source)), + ParentPid ! {self(), Result} + end), + + % commit tgt sync + TgtResult = (catch couch_replicator_api_wrap:ensure_full_commit(Target)), + + SrcResult = receive + {SrcCommitPid, Result} -> + unlink(SrcCommitPid), + receive + {'EXIT', SrcCommitPid, _} -> + ok + after + 0 -> ok + end, + Result; + {'EXIT', SrcCommitPid, Reason} -> + {error, Reason} + end, + case TgtResult of + {ok, TargetStartTime} -> + case SrcResult of + {ok, SourceStartTime} -> + {SourceStartTime, TargetStartTime}; + SourceError -> + {source_error, SourceError} + end; + TargetError -> + {target_error, TargetError} + end. + + +compare_replication_logs(SrcDoc, TgtDoc) -> + #doc{body={RepRecProps}} = SrcDoc, + #doc{body={RepRecPropsTgt}} = TgtDoc, + SrcSession = get_value(<<"session_id">>, RepRecProps), + TgtSession = get_value(<<"session_id">>, RepRecPropsTgt), + case SrcSession == TgtSession of + true -> + % if the records have the same session id, + % then we have a valid replication history + OldSeqNum = get_value(<<"source_last_seq">>, RepRecProps, + ?LOWEST_SEQ), + OldHistory = get_value(<<"history">>, RepRecProps, []), + {OldSeqNum, OldHistory}; + false -> + SourceHistory = get_value(<<"history">>, RepRecProps, []), + TargetHistory = get_value(<<"history">>, RepRecPropsTgt, []), + couch_log:notice("Replication records differ. " + "Scanning histories to find a common ancestor.", []), + couch_log:debug("Record on source:~p~nRecord on target:~p~n", + [RepRecProps, RepRecPropsTgt]), + compare_rep_history(SourceHistory, TargetHistory) + end. + + +compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> + couch_log:notice("no common ancestry -- performing full replication", []), + {?LOWEST_SEQ, []}; + +compare_rep_history([{S} | SourceRest], [{T} | TargetRest] = Target) -> + SourceId = get_value(<<"session_id">>, S), + case has_session_id(SourceId, Target) of + true -> + RecordSeqNum = get_value(<<"recorded_seq">>, S, ?LOWEST_SEQ), + couch_log:notice("found a common replication record with " + "source_seq ~p", [RecordSeqNum]), + {RecordSeqNum, SourceRest}; + false -> + TargetId = get_value(<<"session_id">>, T), + case has_session_id(TargetId, SourceRest) of + true -> + RecordSeqNum = get_value(<<"recorded_seq">>, T, + ?LOWEST_SEQ), + couch_log:notice("found a common replication record with " + "source_seq ~p", [RecordSeqNum]), + {RecordSeqNum, TargetRest}; + false -> + compare_rep_history(SourceRest, TargetRest) + end + end. + + +has_session_id(_SessionId, []) -> + false; + +has_session_id(SessionId, [{Props} | Rest]) -> + case get_value(<<"session_id">>, Props, nil) of + SessionId -> true; + _Else -> has_session_id(SessionId, Rest) + end. + + +get_pending_count(#rep_state{} = St) -> + #rep_state{ + highest_seq_done = HighestSeqDone, + source = #httpdb{} = Db0 + } = St, + {_, Seq} = HighestSeqDone, + Db = Db0#httpdb{retries = 3}, + case (catch couch_replicator_api_wrap:get_pending_count(Db, Seq)) of + {ok, Pending} -> + Pending; + _ -> + null + end. + + +maybe_update_job_state(#rep_state{} = State) -> + case State#rep_state.stats_timer of + nil -> start_stats_timer(State); + Ref when is_reference(Ref) -> State + end. + + +update_job_state(#rep_state{} = State0) -> + State = cancel_stats_timer(State0), + #rep_state{ + current_through_seq = {_, ThroughSeq}, + highest_seq_done = {_, HighestSeq}, + committed_seq = {_, CommittedSeq}, + stats = Stats, + job_data = JobData + } = State, + + Now = erlang:system_time(second), + + RevisionsChecked = couch_replicator_stats:missing_checked(Stats), + MissingRevisions = couch_replicator_stats:missing_found(Stats), + DocsRead = couch_replicator_stats:docs_read(Stats), + DocsWritten = couch_replicator_stats:docs_written(Stats), + DocWriteFailures = couch_replicator_stats:doc_write_failures(Stats), + PendingCount = get_pending_count(State), + + StatsMap = #{ + <<"checkpointed_source_seq">> => CommittedSeq, + <<"source_seq">> => HighestSeq, + <<"through_seq">> => ThroughSeq, + <<"revisions_checked">> => RevisionsChecked, + <<"missing_revisions_found">> => MissingRevisions, + <<"docs_read">> => DocsRead, + <<"docs_written">> => DocsWritten, + <<"doc_write_failures">> => DocWriteFailures, + <<"changes_pending">> => PendingCount + }, + + JobData1 = JobData#{ + ?REP_STATS := StatsMap, + ?LAST_UPDATED := Now + }, + + JobData2 = maybe_heal(JobData1, Now), + + State1 = State#rep_state{job_data = JobData2}, + State2 = update_active_task_info(State1), + update_job_data(undefined, State2). + + +replication_start_error({unauthorized, DbUri}) -> + {unauthorized, <<"unauthorized to access or create database ", + DbUri/binary>>}; + +replication_start_error({db_not_found, DbUri}) -> + {db_not_found, <<"could not open ", DbUri/binary>>}; + +replication_start_error({http_request_failed, _Method, Url0, + {error, {error, {conn_failed, {error, nxdomain}}}}}) -> + Url = ?l2b(couch_util:url_strip_password(Url0)), + {nxdomain, <<"could not resolve ", Url/binary>>}; + +replication_start_error({http_request_failed, Method0, Url0, + {error, {code, Code}}}) when is_integer(Code) -> + Url = ?l2b(couch_util:url_strip_password(Url0)), + Method = ?l2b(Method0), + CodeBin = integer_to_binary(Code), + {http_error_code, <>}; + +replication_start_error(Error) -> + Error. + + +log_replication_start(#rep_state{} = RepState) -> + #rep_state{ + id = Id, + doc_id = DocId, + db_name = DbName, + options = Options, + source_name = Source, + target_name = Target, + session_id = Sid + } = RepState, + Workers = maps:get(<<"worker_processes">>, Options), + BatchSize = maps:get(<<"worker_batch_size">>, Options), + From = case DbName of + Name when is_binary(Name) -> + io_lib:format("from doc ~s:~s", [Name, DocId]); + _ -> + "from _replicate endpoint" + end, + Msg = "Starting replication ~s (~s -> ~s) ~s worker_procesess:~p" + " worker_batch_size:~p session_id:~s", + couch_log:notice(Msg, [Id, Source, Target, From, Workers, BatchSize, Sid]). + + +check_user_filter(#rep_state{} = State) -> + #rep_state{ + id = RepId, + base_id = BaseId, + job = Job, + job_data = JobData + } = State, + case get_rep_id(undefined, Job, JobData) of + {RepId, BaseId} -> + ok; + {NewId, NewBaseId} when is_binary(NewId), is_binary(NewBaseId) -> + LogMsg = "~p : Replication id was updated ~p -> ~p", + couch_log:error(LogMsg, [?MODULE, RepId, NewId]), + reschedule(undefined, Job, JobData), + exit({shutdown, finished}) + end. + + +hist_append(Type, Now, #{} = JobData, Info) when is_integer(Now), + is_binary(Type) -> + #{?JOB_HISTORY := Hist} = JobData, + Evt1 = #{?HIST_TYPE => Type, ?HIST_TIMESTAMP => Now}, + Evt2 = case Info of + undefined -> + Evt1; + null -> + Evt1#{?HIST_REASON => null}; + <<_/binary>> -> + Evt1#{?HIST_REASON => Info}; + #{<<"error">> := Err, <<"reason">> := Reason} when is_binary(Err), + is_binary(Reason) -> + Evt1#{?HIST_REASON => Reason} + end, + Hist1 = [Evt2 | Hist], + Hist2 = lists:sublist(Hist1, max_history()), + JobData#{?JOB_HISTORY := Hist2}. + + +optimize_rate_limited_job(#{} = Options, <<"max_backoff">>) -> + OptimizedSettings = #{ + <<"checkpoint_interval">> => 5000, + <<"worker_processes">> => 2, + <<"worker_batch_size">> => 100, + <<"http_connections">> => 2 + }, + maps:merge(Options, OptimizedSettings); + +optimize_rate_limited_job(#{} = Options, _Other) -> + Options. + + +close_endpoints(State) -> + State1 = cancel_timers(State), + couch_replicator_api_wrap:db_close(State1#rep_state.source), + couch_replicator_api_wrap:db_close(State1#rep_state.target), + ok. + + +get_value(K, Props) -> + couch_util:get_value(K, Props). + + +get_value(K, Props, Default) -> + couch_util:get_value(K, Props, Default). + + +accept_jitter_msec() -> + couch_rand:uniform(erlang:max(1, max_startup_jitter_msec())). + + +max_startup_jitter_msec() -> + config:get_integer("replicator", "startup_jitter", + ?STARTUP_JITTER_DEFAULT). + + +min_backoff_penalty_sec() -> + config:get_integer("replicator", "min_backoff_penalty_sec", + ?DEFAULT_MIN_BACKOFF_PENALTY_SEC). + + +max_backoff_penalty_sec() -> + config:get_integer("replicator", "max_backoff_penalty_sec", + ?DEFAULT_MAX_BACKOFF_PENALTY_SEC). + + +max_history() -> + config:get_integer("replicator", "max_history", ?DEFAULT_MAX_HISTORY). + + +stats_update_interval_sec() -> + config:get_integer("replicator", "stats_update_interval_sec", + ?DEFAULT_STATS_UPDATE_INTERVAL_SEC). + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +replication_start_error_test() -> + ?assertEqual({unauthorized, <<"unauthorized to access or create database" + " http://x/y">>}, replication_start_error({unauthorized, + <<"http://x/y">>})), + ?assertEqual({db_not_found, <<"could not open http://x/y">>}, + replication_start_error({db_not_found, <<"http://x/y">>})), + ?assertEqual({nxdomain, <<"could not resolve http://x/y">>}, + replication_start_error({http_request_failed, "GET", "http://x/y", + {error, {error, {conn_failed, {error, nxdomain}}}}})), + ?assertEqual({http_error_code, <<"503 GET http://x/y">>}, + replication_start_error({http_request_failed, "GET", "http://x/y", + {error, {code, 503}}})). + + +scheduler_job_format_status_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_format_status) + ] + }. + + +setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end). + + +teardown(_) -> + meck:unload(). + + +t_format_status(_) -> + {ok, Rep} = couch_replicator_parse:parse_rep(#{ + <<"source">> => <<"http://u:p@h1/d1">>, + <<"target">> => <<"http://u:p@h2/d2">>, + <<"create_target">> => true + }, null), + State = #rep_state{ + id = <<"base+ext">>, + job_data = #{?REP => Rep}, + doc_id = <<"mydoc">>, + db_name = <<"mydb">>, + source = maps:get(?SOURCE, Rep), + target = maps:get(?TARGET, Rep), + options = maps:get(?OPTIONS, Rep), + session_id = <<"a">>, + start_seq = <<"1">>, + source_seq = <<"2">>, + committed_seq = <<"3">>, + current_through_seq = <<"4">>, + highest_seq_done = <<"5">> + }, + Format = format_status(opts_ignored, [pdict, State]), + FmtOptions = proplists:get_value(options, Format), + ?assertEqual("http://u:*****@h1/d1/", proplists:get_value(source, Format)), + ?assertEqual("http://u:*****@h2/d2/", proplists:get_value(target, Format)), + ?assertEqual(<<"base+ext">>, proplists:get_value(rep_id, Format)), + ?assertEqual(true, maps:get(<<"create_target">>, FmtOptions)), + ?assertEqual(<<"mydoc">>, proplists:get_value(doc_id, Format)), + ?assertEqual(<<"mydb">>, proplists:get_value(db_name, Format)), + ?assertEqual(<<"a">>, proplists:get_value(session_id, Format)), + ?assertEqual(<<"1">>, proplists:get_value(start_seq, Format)), + ?assertEqual(<<"2">>, proplists:get_value(source_seq, Format)), + ?assertEqual(<<"3">>, proplists:get_value(committed_seq, Format)), + ?assertEqual(<<"4">>, proplists:get_value(current_through_seq, Format)), + ?assertEqual(<<"5">>, proplists:get_value(highest_seq_done, Format)). + + +-endif. diff --git a/src/couch_replicator/src/couch_replicator_job_server.erl b/src/couch_replicator/src/couch_replicator_job_server.erl new file mode 100644 index 000000000..a2e90b061 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_job_server.erl @@ -0,0 +1,370 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_job_server). + + +-behaviour(gen_server). + + +-export([ + start_link/1 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + format_status/2, + code_change/3 +]). + +-export([ + accepted/2, + scheduling_interval_sec/0, + reschedule/0 +]). + + +-include("couch_replicator.hrl"). + + +-define(MAX_ACCEPTORS, 2). +-define(MAX_JOBS, 500). +-define(MAX_CHURN, 100). +-define(INTERVAL_SEC, 15). +-define(MIN_RUN_TIME_SEC, 60). +-define(TRANSIENT_JOB_MAX_AGE_SEC, 86400). % 1 day + + +start_link(Timeout) when is_integer(Timeout) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, Timeout, []). + + +init(Timeout) when is_integer(Timeout) -> + process_flag(trap_exit, true), + couch_replicator_jobs:set_timeout(), + St = #{ + acceptors => #{}, + workers => #{}, + churn => 0, + config => get_config(), + timer => undefined, + timeout => Timeout + }, + St1 = spawn_acceptors(St), + St2 = do_send_after(St1), + {ok, St2}. + + +terminate(_, #{} = St) -> + #{ + workers := Workers, + timeout := Timeout + } = St, + [stop_job(Pid) || Pid <- maps:keys(Workers)], + % Give jobs a chance to checkpoint and release their locks + wait_jobs_exit(Workers, Timeout), + ok. + + +handle_call({accepted, Pid, Normal}, _From, #{} = St) -> + #{ + acceptors := Acceptors, + workers := Workers, + churn := Churn + } = St, + case maps:is_key(Pid, Acceptors) of + true -> + Val = {Normal, erlang:system_time(second)}, + St1 = St#{ + acceptors := maps:remove(Pid, Acceptors), + workers := Workers#{Pid => Val}, + churn := Churn + 1 + }, + {reply, ok, spawn_acceptors(St1)}; + false -> + LogMsg = "~p : unknown acceptor processs ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), + {stop, {unknown_acceptor_pid, Pid}, St} + end; + +handle_call(reschedule, _From, St) -> + {reply, ok, reschedule(St)}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(reschedule, #{} = St) -> + {noreply, reschedule(St)}; + +handle_info({'EXIT', Pid, Reason}, #{} = St) -> + #{ + acceptors := Acceptors, + workers := Workers + } = St, + case {maps:is_key(Pid, Acceptors), maps:is_key(Pid, Workers)} of + {true, false} -> handle_acceptor_exit(St, Pid, Reason); + {false, true} -> handle_worker_exit(St, Pid, Reason); + {false, false} -> handle_unknown_exit(St, Pid, Reason) + end; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +format_status(_Opt, [_PDict, #{} = St]) -> + #{ + acceptors := Acceptors, + workers := Workers, + churn := Churn, + config := Config + } = St, + [ + {acceptors, map_size(Acceptors)}, + {workers, map_size(Workers)}, + {churn, Churn}, + {config, Config} + ]. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +accepted(Worker, Normal) when is_pid(Worker), is_boolean(Normal) -> + gen_server:call(?MODULE, {accepted, Worker, Normal}, infinity). + + +scheduling_interval_sec() -> + config:get_integer("replicator", "interval_sec", ?INTERVAL_SEC). + + +reschedule() -> + gen_server:call(?MODULE, reschedule, infinity). + + +% Scheduling logic + +do_send_after(#{} = St) -> + #{config := #{interval_sec := IntervalSec}} = St, + IntervalMSec = IntervalSec * 1000, + Jitter = IntervalMSec div 3, + WaitMSec = IntervalMSec + rand:uniform(max(1, Jitter)), + TRef = erlang:send_after(WaitMSec, self(), reschedule), + St#{timer := TRef}. + + +cancel_timer(#{timer := undefined} = St) -> + St; + +cancel_timer(#{timer := TRef} = St) when is_reference(TRef) -> + erlang:cancel_timer(TRef), + St#{timer := undefined}. + + +reschedule(#{} = St) -> + St1 = cancel_timer(St), + St2 = St1#{config := get_config()}, + St3 = trim_jobs(St2), + St4 = start_excess_acceptors(St3), + St5 = transient_job_cleanup(St4), + St6 = update_stats(St5), + St7 = do_send_after(St6), + St7#{churn := 0}. + + +start_excess_acceptors(#{} = St) -> + #{ + churn := Churn, + acceptors := Acceptors, + workers := Workers, + config := #{max_jobs := MaxJobs, max_churn := MaxChurn} + } = St, + + ACnt = maps:size(Acceptors), + WCnt = maps:size(Workers), + + ChurnLeft = MaxChurn - Churn, + Slots = (MaxJobs + MaxChurn) - (ACnt + WCnt), + MinSlotsChurn = min(Slots, ChurnLeft), + + Pending = if MinSlotsChurn =< 0 -> 0; true -> + % Don't fetch pending if we don't have enough slots or churn budget + couch_replicator_jobs:pending_count(undefined, MinSlotsChurn) + end, + + couch_stats:update_gauge([couch_replicator, jobs, pending], Pending), + + % Start new acceptors only if we have churn budget, there are pending jobs + % and we won't start more than max jobs + churn total acceptors + ToStart = max(0, lists:min([ChurnLeft, Pending, Slots])), + + lists:foldl(fun(_, #{} = StAcc) -> + #{acceptors := AccAcceptors} = StAcc, + {ok, Pid} = couch_replicator_job:start_link(), + StAcc#{acceptors := AccAcceptors#{Pid => true}} + end, St, lists:seq(1, ToStart)). + + +transient_job_cleanup(#{} = St) -> + #{ + config := #{transient_job_max_age_sec := MaxAge} + } = St, + Now = erlang:system_time(second), + FoldFun = fun(_JTx, JobId, State, #{} = Data, ok) -> + IsTransient = maps:get(?DB_NAME, Data) =:= null, + IsOld = Now - maps:get(?LAST_UPDATED, Data) >= MaxAge, + case State =:= finished andalso IsTransient andalso IsOld of + true -> + ok = couch_replicator_jobs:remove_job(undefined, JobId), + couch_log:info("~p : Removed old job ~p", [?MODULE, JobId]), + ok; + false -> + ok + end + end, + ok = couch_replicator_jobs:fold_jobs(undefined, FoldFun, ok), + St. + + +update_stats(#{} = St) -> + ACnt = maps:size(maps:get(acceptors, St)), + WCnt = maps:size(maps:get(workers, St)), + couch_stats:update_gauge([couch_replicator, jobs, accepting], ACnt), + couch_stats:update_gauge([couch_replicator, jobs, running], WCnt), + couch_stats:increment_counter([couch_replicator, jobs, reschedules]), + St. + + +trim_jobs(#{} = St) -> + #{ + workers := Workers, + churn := Churn, + config := #{max_jobs := MaxJobs} + } = St, + Excess = max(0, maps:size(Workers) - MaxJobs), + lists:foreach(fun stop_job/1, stop_candidates(St, Excess)), + St#{churn := Churn + Excess}. + + +stop_candidates(#{}, Top) when is_integer(Top), Top =< 0 -> + []; + +stop_candidates(#{} = St, Top) when is_integer(Top), Top > 0 -> + #{ + workers := Workers, + config := #{min_run_time_sec := MinRunTime} + } = St, + + WList1 = maps:to_list(Workers), % [{Pid, {Normal, StartTime}},...] + + % Filter out normal jobs and those which have just started running + MaxT = erlang:system_time(second) - MinRunTime, + WList2 = lists:filter(fun({_Pid, {Normal, T}}) -> + not Normal andalso T =< MaxT + end, WList1), + + Sorted = lists:keysort(2, WList2), + Pids = lists:map(fun({Pid, _}) -> Pid end, Sorted), + lists:sublist(Pids, Top). + + +stop_job(Pid) when is_pid(Pid) -> + % Replication jobs handle the shutdown signal and then checkpoint in + % terminate handler + exit(Pid, shutdown). + + +wait_jobs_exit(#{} = Jobs, _) when map_size(Jobs) =:= 0 -> + ok; + +wait_jobs_exit(#{} = Jobs, Timeout) -> + receive + {'EXIT', Pid, _} -> + wait_jobs_exit(maps:remove(Pid, Jobs), Timeout) + after + Timeout -> + LogMsg = "~p : ~p jobs didn't terminate cleanly", + couch_log:error(LogMsg, [?MODULE, map_size(Jobs)]), + ok + end. + + +spawn_acceptors(St) -> + #{ + workers := Workers, + acceptors := Acceptors, + config := #{max_jobs := MaxJobs, max_acceptors := MaxAcceptors} + } = St, + ACnt = maps:size(Acceptors), + WCnt = maps:size(Workers), + case ACnt < MaxAcceptors andalso (ACnt + WCnt) < MaxJobs of + true -> + {ok, Pid} = couch_replicator_job:start_link(), + NewSt = St#{acceptors := Acceptors#{Pid => true}}, + spawn_acceptors(NewSt); + false -> + St + end. + + +% Worker process exit handlers + +handle_acceptor_exit(#{acceptors := Acceptors} = St, Pid, Reason) -> + St1 = St#{acceptors := maps:remove(Pid, Acceptors)}, + LogMsg = "~p : acceptor process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {noreply, spawn_acceptors(St1)}. + + +handle_worker_exit(#{workers := Workers} = St, Pid, Reason) -> + St1 = St#{workers := maps:remove(Pid, Workers)}, + case Reason of + normal -> + ok; + shutdown -> + ok; + {shutdown, _} -> + ok; + _ -> + LogMsg = "~p : replicator job process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]) + end, + {noreply, spawn_acceptors(St1)}. + + +handle_unknown_exit(St, Pid, Reason) -> + LogMsg = "~p : unknown process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unknown_pid_exit, Pid}, St}. + + +get_config() -> + Defaults = #{ + max_acceptors => ?MAX_ACCEPTORS, + interval_sec => ?INTERVAL_SEC, + max_jobs => ?MAX_JOBS, + max_churn => ?MAX_CHURN, + min_run_time_sec => ?MIN_RUN_TIME_SEC, + transient_job_max_age_sec => ?TRANSIENT_JOB_MAX_AGE_SEC + }, + maps:map(fun(K, Default) -> + config:get_integer("replicator", atom_to_list(K), Default) + end, Defaults). -- cgit v1.2.1 From aa8836434655b9b1b34c6d72035eb306266c484b Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 12:49:59 -0400 Subject: Update replicator http handlers and supervisor Stich everything together: the backend, frontend and http handlers. The supervisor `couch_replicator_sup` handles starting a set of fronted or backend children. It may also start both or neither. The HTTP layer for monitoring and creating jobs is simpler than before since there is rpc and clustering involved. --- src/chttpd/src/chttpd_misc.erl | 55 +------ src/couch_replicator/src/couch_replicator.app.src | 11 +- .../src/couch_replicator_httpd.erl | 163 ++++++++++----------- src/couch_replicator/src/couch_replicator_sup.erl | 113 +++++++------- 4 files changed, 149 insertions(+), 193 deletions(-) diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index ec2435c41..79c291462 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -302,17 +302,22 @@ handle_task_status_req(Req) -> handle_replicate_req(#httpd{method='POST', user_ctx=Ctx, req_body=PostBody} = Req) -> chttpd:validate_ctype(Req, "application/json"), %% see HACK in chttpd.erl about replication - case replicate(PostBody, Ctx) of + case couch_replicator:replicate(PostBody, Ctx) of {ok, {continuous, RepId}} -> send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); {ok, {cancelled, RepId}} -> send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); - {ok, {JsonResults}} -> - send_json(Req, {[{ok, true} | JsonResults]}); + {ok, #{} = JsonResults} -> + send_json(Req, maps:merge(#{<<"ok">> => true}, JsonResults)); {ok, stopped} -> send_json(Req, 200, {[{ok, stopped}]}); {error, not_found=Error} -> chttpd:send_error(Req, Error); + {error, #{<<"error">> := Err, <<"reason">> := Reason}} when + is_binary(Err), is_binary(Reason) -> + % Safe to use binary_to_atom since this is only built + % from couch_replicator_jobs:error_info/1 + chttpd:send_error(Req, {binary_to_atom(Err, utf8), Reason}); {error, {_, _}=Error} -> chttpd:send_error(Req, Error); {_, _}=Error -> @@ -321,50 +326,6 @@ handle_replicate_req(#httpd{method='POST', user_ctx=Ctx, req_body=PostBody} = Re handle_replicate_req(Req) -> send_method_not_allowed(Req, "POST"). -replicate({Props} = PostBody, Ctx) -> - case couch_util:get_value(<<"cancel">>, Props) of - true -> - cancel_replication(PostBody, Ctx); - _ -> - Node = choose_node([ - couch_util:get_value(<<"source">>, Props), - couch_util:get_value(<<"target">>, Props) - ]), - case rpc:call(Node, couch_replicator, replicate, [PostBody, Ctx]) of - {badrpc, Reason} -> - erlang:error(Reason); - Res -> - Res - end - end. - -cancel_replication(PostBody, Ctx) -> - {Res, _Bad} = rpc:multicall(couch_replicator, replicate, [PostBody, Ctx]), - case [X || {ok, {cancelled, _}} = X <- Res] of - [Success|_] -> - % Report success if at least one node canceled the replication - Success; - [] -> - case lists:usort(Res) of - [UniqueReply] -> - % Report a universally agreed-upon reply - UniqueReply; - [] -> - {error, badrpc}; - Else -> - % Unclear what to do here -- pick the first error? - % Except try ignoring any {error, not_found} responses - % because we'll always get two of those - hd(Else -- [{error, not_found}]) - end - end. - -choose_node(Key) when is_binary(Key) -> - Checksum = erlang:crc32(Key), - Nodes = lists:sort([node()|erlang:nodes()]), - lists:nth(1 + Checksum rem length(Nodes), Nodes); -choose_node(Key) -> - choose_node(term_to_binary(Key)). handle_reload_query_servers_req(#httpd{method='POST'}=Req) -> chttpd:validate_ctype(Req, "application/json"), diff --git a/src/couch_replicator/src/couch_replicator.app.src b/src/couch_replicator/src/couch_replicator.app.src index 2e0e191d3..81789f155 100644 --- a/src/couch_replicator/src/couch_replicator.app.src +++ b/src/couch_replicator/src/couch_replicator.app.src @@ -18,20 +18,15 @@ couch_replicator_sup, couch_replicator_rate_limiter, couch_replicator_connection, - couch_replication, % couch_replication_event gen_event - couch_replicator_clustering, - couch_replicator_scheduler, - couch_replicator_scheduler_sup, - couch_replicator_doc_processor + couch_replicator_job_server ]}, {applications, [ kernel, stdlib, couch_log, - mem3, config, couch, - couch_event, - couch_stats + couch_stats, + couch_jobs ]} ]}. diff --git a/src/couch_replicator/src/couch_replicator_httpd.erl b/src/couch_replicator/src/couch_replicator_httpd.erl index abd9f7fd0..196fcf203 100644 --- a/src/couch_replicator/src/couch_replicator_httpd.erl +++ b/src/couch_replicator/src/couch_replicator_httpd.erl @@ -12,9 +12,6 @@ -module(couch_replicator_httpd). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - -export([ handle_req/1, handle_scheduler_req/1 @@ -26,48 +23,40 @@ send_method_not_allowed/2 ]). --import(couch_util, [ - to_binary/1 -]). + +-include_lib("couch/include/couch_db.hrl"). +-include("couch_replicator.hrl"). -define(DEFAULT_TASK_LIMIT, 100). --define(REPDB, <<"_replicator">>). -% This is a macro so it can be used as a guard --define(ISREPDB(X), X =:= ?REPDB orelse binary_part(X, {byte_size(X), -12}) - =:= <<"/_replicator">>). handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"jobs">>]}=Req) -> - Limit = couch_replicator_httpd_util:parse_int_param(Req, "limit", + Limit = couch_replicator_utils:parse_int_param(Req, "limit", ?DEFAULT_TASK_LIMIT, 0, infinity), - Skip = couch_replicator_httpd_util:parse_int_param(Req, "skip", 0, 0, + Skip = couch_replicator_utils:parse_int_param(Req, "skip", 0, 0, infinity), - {Replies, _BadNodes} = rpc:multicall(couch_replicator_scheduler, jobs, []), - Flatlist = lists:concat(Replies), - % couch_replicator_scheduler:job_ejson/1 guarantees {id, Id} to be the - % the first item in the list - Sorted = lists:sort(fun({[{id,A}|_]},{[{id,B}|_]}) -> A =< B end, Flatlist), - Total = length(Sorted), + Jobs1 = couch_replicator:jobs(), + Total = length(Jobs1), Offset = min(Skip, Total), - Sublist = lists:sublist(Sorted, Offset+1, Limit), - Sublist1 = [couch_replicator_httpd_util:update_db_name(Task) - || Task <- Sublist], - send_json(Req, {[{total_rows, Total}, {offset, Offset}, {jobs, Sublist1}]}); + Jobs2 = lists:sublist(Jobs1, Offset + 1, Limit), + send_json(Req, #{ + <<"total_rows">> => Total, + <<"offset">> => Offset, + <<"jobs">> => Jobs2 + }); handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"jobs">>,JobId]}=Req) -> case couch_replicator:job(JobId) of - {ok, JobInfo} -> - send_json(Req, couch_replicator_httpd_util:update_db_name(JobInfo)); - {error, not_found} -> - throw(not_found) + {ok, JobInfo} -> send_json(Req, JobInfo); + {error, not_found} -> throw(not_found) end; handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>]}=Req) -> - handle_scheduler_docs(?REPDB, Req); + handle_scheduler_docs(?REP_DB_NAME, Req); handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>,Db]}=Req) - when ?ISREPDB(Db) -> + when ?IS_REP_DB(Db) -> handle_scheduler_docs(Db, Req); handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>,Db,DocId]} - = Req) when ?ISREPDB(Db) -> + = Req) when ?IS_REP_DB(Db) -> handle_scheduler_doc(Db, DocId, Req); % Allow users to pass in unencoded _replicator database names (/ are not % escaped). This is possible here because _replicator is not a valid document @@ -82,77 +71,80 @@ handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>|Unquoted]} {error, invalid} -> throw(bad_request) end; -handle_scheduler_req(#httpd{method='GET'} = Req) -> - send_json(Req, 404, {[{error, <<"not found">>}]}); +handle_scheduler_req(#httpd{method='GET'} = _Req) -> + throw(not_found); handle_scheduler_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). handle_req(#httpd{method = 'POST', user_ctx = UserCtx} = Req) -> couch_httpd:validate_ctype(Req, "application/json"), - RepDoc = {Props} = couch_httpd:json_body_obj(Req), - couch_replicator_httpd_util:validate_rep_props(Props), + RepDoc = couch_httpd:json_body_obj(Req), case couch_replicator:replicate(RepDoc, UserCtx) of - {error, {Error, Reason}} -> - send_json( - Req, 500, - {[{error, to_binary(Error)}, {reason, to_binary(Reason)}]}); - {error, not_found} -> - % Tried to cancel a replication that didn't exist. - send_json(Req, 404, {[{error, <<"not found">>}]}); - {error, Reason} -> - send_json(Req, 500, {[{error, to_binary(Reason)}]}); - {ok, {cancelled, RepId}} -> - send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); - {ok, {continuous, RepId}} -> - send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); - {ok, {HistoryResults}} -> - send_json(Req, {[{ok, true} | HistoryResults]}) - end; + {error, {Error, Reason}} -> + send_json(Req, 500, #{ + <<"error">> => couch_util:to_binary(Error), + <<"reason">> => couch_util:to_binary(Reason) + }); + {error, not_found} -> + throw(not_found); + {error, Reason} -> + send_json(Req, 500, #{<<"error">> => couch_util:to_binary(Reason)}); + {ok, {cancelled, JobId}} -> + send_json(Req, 200, #{<<"ok">> => true, <<"_local_id">> => JobId}); + {ok, {continuous, JobId}} -> + send_json(Req, 202, #{<<"ok">> => true, <<"_local_id">> => JobId}); + {ok, #{} = CheckpointHistory} -> + Res = maps:merge(#{<<"ok">> => true}, CheckpointHistory), + send_json(Req, Res) + end; handle_req(Req) -> send_method_not_allowed(Req, "POST"). -handle_scheduler_docs(Db, Req) when is_binary(Db) -> - VArgs0 = couch_mrview_http:parse_params(Req, undefined), - StatesQs = chttpd:qs_value(Req, "states"), - States = couch_replicator_httpd_util:parse_replication_state_filter(StatesQs), - VArgs1 = VArgs0#mrargs{ - view_type = map, - include_docs = true, - reduce = false, - extra = [{filter_states, States}] - }, - VArgs2 = couch_mrview_util:validate_args(VArgs1), - Opts = [{user_ctx, Req#httpd.user_ctx}], - Max = chttpd:chunked_response_buffer_size(), - Acc = couch_replicator_httpd_util:docs_acc_new(Req, Db, Max), - Cb = fun couch_replicator_httpd_util:docs_cb/2, - {ok, RAcc} = couch_replicator_fabric:docs(Db, Opts, VArgs2, Cb, Acc), - {ok, couch_replicator_httpd_util:docs_acc_response(RAcc)}. - - -handle_scheduler_doc(Db, DocId, Req) when is_binary(Db), is_binary(DocId) -> - UserCtx = Req#httpd.user_ctx, - case couch_replicator:doc(Db, DocId, UserCtx#user_ctx.roles) of - {ok, DocInfo} -> - send_json(Req, couch_replicator_httpd_util:update_db_name(DocInfo)); - {error, not_found} -> +handle_scheduler_docs(DbName, #httpd{user_ctx = UserCtx} = Req) -> + try fabric2_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + ok = fabric2_db:check_is_member(Db), + StatesQs = chttpd:qs_value(Req, "states"), + States = couch_replicator_utils:parse_replication_states(StatesQs), + Docs = couch_replicator:docs(Db, States), + send_json(Req, #{ + <<"total_rows">> => length(Docs), + <<"offset">> => 0, + <<"docs">> => Docs + }) + catch + error:database_does_not_exist -> throw(not_found) end. +handle_scheduler_doc(DbName, DocId, #httpd{user_ctx = UserCtx} = Req) -> + try fabric2_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + ok = fabric2_db:check_is_member(Db), + case couch_replicator:doc(Db, DocId) of + {ok, DocInfo} -> send_json(Req, DocInfo); + {error, not_found} -> throw(not_found) + end + catch + error:database_does_not_exist -> + throw(not_found) + end. + + parse_unquoted_docs_path([_, _ | _] = Unquoted) -> - DbAndAfter = lists:dropwhile(fun(E) -> E =/= ?REPDB end, Unquoted), - BeforeRDb = lists:takewhile(fun(E) -> E =/= ?REPDB end, Unquoted), + DbAndAfter = lists:dropwhile(fun(E) -> E =/= ?REP_DB_NAME end, Unquoted), + BeforeRDb = lists:takewhile(fun(E) -> E =/= ?REP_DB_NAME end, Unquoted), case DbAndAfter of [] -> {error, invalid}; - [?REPDB] -> - {db_only, filename:join(BeforeRDb ++ [?REPDB])}; - [?REPDB, DocId] -> - {db_and_doc, filename:join(BeforeRDb ++ [?REPDB]), DocId} + [?REP_DB_NAME] -> + {db_only, filename:join(BeforeRDb ++ [?REP_DB_NAME])}; + [?REP_DB_NAME, DocId] -> + {db_and_doc, filename:join(BeforeRDb ++ [?REP_DB_NAME]), DocId} end. @@ -163,10 +155,13 @@ parse_unquoted_docs_path([_, _ | _] = Unquoted) -> unquoted_scheduler_docs_path_test_() -> [?_assertEqual(Res, parse_unquoted_docs_path(Path)) || {Res, Path} <- [ {{error, invalid}, [<<"a">>,<< "b">>]}, - {{db_only, <<"a/_replicator">>}, [<<"a">>, ?REPDB]}, - {{db_only, <<"a/b/_replicator">>}, [<<"a">>, <<"b">>, ?REPDB]}, - {{db_and_doc, <<"_replicator">>, <<"x">>}, [?REPDB, <<"x">>]}, - {{db_and_doc, <<"a/_replicator">>, <<"x">>}, [<<"a">>, ?REPDB, <<"x">>]}, + {{db_only, <<"a/_replicator">>}, [<<"a">>, ?REP_DB_NAME]}, + {{db_only, <<"a/b/_replicator">>}, [<<"a">>, <<"b">>, + ?REP_DB_NAME]}, + {{db_and_doc, <<"_replicator">>, <<"x">>}, + [?REP_DB_NAME, <<"x">>]}, + {{db_and_doc, <<"a/_replicator">>, <<"x">>}, [<<"a">>, + ?REP_DB_NAME, <<"x">>]}, {{error, invalid}, [<<"a/_replicator">>,<<"x">>]} ]]. diff --git a/src/couch_replicator/src/couch_replicator_sup.erl b/src/couch_replicator/src/couch_replicator_sup.erl index cd4512c54..49d412aaa 100644 --- a/src/couch_replicator/src/couch_replicator_sup.erl +++ b/src/couch_replicator/src/couch_replicator_sup.erl @@ -12,61 +12,66 @@ % the License. -module(couch_replicator_sup). + + -behaviour(supervisor). --export([start_link/0, init/1]). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). + Backend = fabric2_node_types:is_type(replication), + Frontend = fabric2_node_types:is_type(api_frontend), + Arg = {Backend, Frontend}, + supervisor:start_link({local, ?MODULE}, ?MODULE, Arg). + + +init({Backend, Frontend}) -> + Children = case {Backend, Frontend} of + {true, true} -> backend() ++ frontend(); + {true, false} -> backend(); + {false, true} -> frontend(); + {false, false} -> [] + end, + Flags = #{ + strategy => rest_for_one, + intensity => 1, + period => 5 + }, + {ok, {Flags, Children}}. + + +backend() -> + Timeout = 5000, + [ + #{ + id => couch_replicator_connection, + start => {couch_replicator_connection, start_link, []} + }, + #{ + id => couch_replicator_rate_limiter, + start => {couch_replicator_rate_limiter, start_link, []} + }, + #{ + id => couch_replicator_job_server, + start => {couch_replicator_job_server, start_link, [Timeout]}, + shutdown => Timeout + } + ]. + -init(_Args) -> - Children = [ - {couch_replication_event, - {gen_event, start_link, [{local, couch_replication}]}, - permanent, - brutal_kill, - worker, - dynamic}, - {couch_replicator_clustering, - {couch_replicator_clustering, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_clustering]}, - {couch_replicator_connection, - {couch_replicator_connection, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_connection]}, - {couch_replicator_rate_limiter, - {couch_replicator_rate_limiter, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_rate_limiter]}, - {couch_replicator_scheduler_sup, - {couch_replicator_scheduler_sup, start_link, []}, - permanent, - infinity, - supervisor, - [couch_replicator_scheduler_sup]}, - {couch_replicator_scheduler, - {couch_replicator_scheduler, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_scheduler]}, - {couch_replicator_doc_processor, - {couch_replicator_doc_processor, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_doc_processor]}, - {couch_replicator_db_changes, - {couch_replicator_db_changes, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_multidb_changes]} - ], - {ok, {{rest_for_one,10,1}, Children}}. +frontend() -> + [ + #{ + id => couch_replicator, + start => {couch_replicator, ensure_rep_db_exists, []}, + restart => transient + } + ] ++ couch_epi:register_service(couch_replicator_epi, []). -- cgit v1.2.1 From b718d3380cbc8dc5799f36e19e1a94b8c2e00c2d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:36:05 -0400 Subject: Update and cleanup default.ini replicator entries Update settings with defaults. Also comment out values which are already set to default in the code. --- rel/overlay/etc/default.ini | 78 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index b837082f6..712150b18 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -436,55 +436,99 @@ compression_level = 8 ; from 1 (lowest, fastest) to 9 (highest, slowest), 0 to d compressible_types = text/*, application/javascript, application/json, application/xml [replicator] -; Random jitter applied on replication job startup (milliseconds) -startup_jitter = 5000 -; Number of actively running replications -max_jobs = 500 -;Scheduling interval in milliseconds. During each reschedule cycle -interval = 60000 +; Number of actively running replications per replication backend node +;max_jobs = 500 + +; Scheduling interval in seconds +;interval_sec = 15 + ; Maximum number of replications to start and stop during rescheduling. -max_churn = 20 +;max_churn = 100 + +; Max number of acceptors running per node. If they are available job slots +; left then up to these many acceptors are kept open. +;max_acceptors = 2 + +; The amount of jitter (in milliseconds) to apply to replication job acceptors. +; This will allow multiple acceptors to avoid generating too many transaction +; conflicts on busy clusters. +;accept_jitter = 2000 + +; Minimum time in seconds replication jobs will be left running before being +; rotated when all the schedule slots are filled. This migth be useful if +; max_jobs is very low, but jobs should be left running long enough to make at +; least some progress before being replaced +;min_run_time_sec = 60 + +; Health threshold is the minimum amount of time an unhealthy job should run +; crashing before it is considered to be healthy again. +;health_threshold_sec = 120 + +; These are applied when jobs are pentalized after repeatedly crashing. On +; first error the minimum value is applied. Then the penalty is doubled, but +; only up to the maximum value. +;min_backoff_penalty_sec = 32 +;max_backoff_penalty_sec = 172800 + +; How many per-job history events to keep +;max_history = 10 + ; More worker processes can give higher network throughput but can also ; imply more disk and network IO. -worker_processes = 4 +;worker_processes = 4 + ; With lower batch sizes checkpoints are done more frequently. Lower batch sizes ; also reduce the total amount of used RAM memory. -worker_batch_size = 500 +;worker_batch_size = 500 + ; Maximum number of HTTP connections per replication. -http_connections = 20 +;http_connections = 20 + ; HTTP connection timeout per replication. ; Even for very fast/reliable networks it might need to be increased if a remote ; database is too busy. -connection_timeout = 30000 +;connection_timeout = 30000 + ; Request timeout ;request_timeout = infinity ; If a request fails, the replicator will retry it up to N times. -retries_per_request = 5 +;retries_per_request = 5 + ; Use checkpoints ;use_checkpoints = true + ; Checkpoint interval ;checkpoint_interval = 30000 + ; Some socket options that might boost performance in some scenarios: ; {nodelay, boolean()} ; {sndbuf, integer()} ; {recbuf, integer()} ; {priority, integer()} ; See the `inet` Erlang module's man page for the full list of options. -socket_options = [{keepalive, true}, {nodelay, false}] +;socket_options = [{keepalive, true}, {nodelay, false}] + ; Path to a file containing the user's certificate. ;cert_file = /full/path/to/server_cert.pem + ; Path to file containing user's private PEM encoded key. ;key_file = /full/path/to/server_key.pem + ; String containing the user's password. Only used if the private keyfile is password protected. ;password = somepassword + ; Set to true to validate peer certificates. -verify_ssl_certificates = false +;verify_ssl_certificates = false + ; File containing a list of peer trusted certificates (in the PEM format). ;ssl_trusted_certificates_file = /etc/ssl/certs/ca-certificates.crt + ; Maximum peer certificate depth (must be set even if certificate validation is off). -ssl_certificate_max_depth = 3 +;ssl_certificate_max_depth = 3 + ; Maximum document ID length for replication. ;max_document_id_length = infinity + ; How much time to wait before retrying after a missing doc exception. This ; exception happens if the document was seen in the changes feed, but internal ; replication hasn't caught up yet, and fetching document's revisions @@ -494,10 +538,6 @@ ssl_certificate_max_depth = 3 ; avoid crashing the whole replication job, which would consume more resources ; and add log noise. ;missing_doc_retry_msec = 2000 -; Wait this many seconds after startup before attaching changes listeners -; cluster_start_period = 5 -; Re-check cluster state at least every cluster_quiet_period seconds -; cluster_quiet_period = 60 ; List of replicator client authentication plugins to try. Plugins will be ; tried in order. The first to initialize successfully will be used for that -- cgit v1.2.1 From 99262909129602bceac82e7907ebfcafc9eba629 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:36:12 -0400 Subject: Update replicator's readme file Remove sections which don't apply anymore and describe briefly how frontend and backend interact. --- src/couch_replicator/README.md | 312 ++++++++--------------------------------- 1 file changed, 61 insertions(+), 251 deletions(-) diff --git a/src/couch_replicator/README.md b/src/couch_replicator/README.md index 6a2a5cfdd..4eced760f 100644 --- a/src/couch_replicator/README.md +++ b/src/couch_replicator/README.md @@ -3,41 +3,67 @@ Developer Oriented Replicator Description This description of scheduling replicator's functionality is mainly geared to CouchDB developers. It dives a bit into the internal and explains how -everything is connected together. +everything is connected together. A higher level overview is available in the +[RFC](https://github.com/apache/couchdb-documentation/pull/581). This +documention assumes the audience is familiar with that description as well as +with the [Couch Jobs +RFC](https://github.com/apache/couchdb-documentation/blob/master/rfcs/007-background-jobs.md) +as well as with the [Node Types +RFC](https://github.com/apache/couchdb-documentation/blob/master/rfcs/013-node-types.md). A natural place to start is the top application supervisor: -`couch_replicator_sup`. It's a `rest_for_one` restart strategy supervisor, -so if a child process terminates, the rest of the children in the hierarchy -following it are also terminated. This structure implies a useful constraint -- -children lower in the list can safely call their siblings which are higher in -the list. - -A description of each child: - - * `couch_replication_event`: Starts a gen_event publication bus to handle some - replication related events. This used for example, to publish cluster - membership changes by the `couch_replicator_clustering` process. But is - also used in replication tests to monitor for replication events. - Notification is performed via the `couch_replicator_notifier:notify/1` - function. It's the first (left-most) child because - `couch_replicator_clustering` uses it. - - * `couch_replicator_clustering`: This module maintains cluster membership - information for the replication application and provides functions to check - ownership of replication jobs. A cluster membership change is published via - the `gen_event` event server named `couch_replication_event` as previously - covered. Published events are `{cluster, stable}` when cluster membership - has stabilized, that it, no node membership changes in a given period, and - `{cluster, unstable}` which indicates there was a recent change to the - cluster membership and now it's considered unstable. Listeners for cluster - membership change include `couch_replicator_doc_processor` and - `couch_replicator_db_changes`. When doc processor gets an `{cluster, - stable}` event it will remove all the replication jobs not belonging to the - current node. When `couch_replicator_db_changes` gets a `{cluster, - stable}` event, it will restart the `couch_multidb_changes` process it - controls, which will launch an new scan of all the replicator databases. - - * `couch_replicator_connection`: Maintains a global replication connection +`couch_replicator_sup`. The set of children in the supervisor is split into +`frontend` and `backend`. The `frontend` set is started on nodes which have the +`api_frontend` node type label set to `true`, and `backend` ones are started on +nodes which have the `replication` label set to `true`. The same node could +have both them set to `true`, and it could act as a replication front and +backend node. However, it is not guaranteed that jobs which are created by the +frontend part will necessarily run on the backend on the same node. + + +Frontend Description +-- + +The "frontend" consists of the parts which handle HTTP requests and monitor +`_replicator` databases for changes and then create `couch_jobs` replication +job records. Some of the modules involved in this are: + + * `couch_replicator` : Contains the main API "entry" point into the + `couch_replicator` application. The `replicate/2` function creates transient + replication jobs. `after_db_create/2`, `after_db_delete/2`, + `after_doc_write/6` functions are called from `couch_epi` callbacks to + create replication jobs from `_replicator` db events. Eventually they all + call `couch_replicator_jobs:add_job/3` to create a `couch_jobs` replication + job. Before the job is created, either the HTTP request body or the + `_replicator` doc body is parsed into a `Rep` map object. An important + property of this object is that it can be serialized to JSON and + deserialized from JSON. This object is saved in the `?REP` field of the + replication `couch_jobs` job data. Besides creating replication job + `couch_replicator` is also responsible for handling `_scheduler/jobs` and + `_scheduler/docs` monitoring API response. That happens in the `jobs/0`, + `job/1`, `docs/` and `doc/2` function. + +Backend Description +-- + +The "backend" consists of parts which run replication jobs, update their state, +and handle rescheduling on intermettent errors. All the job activity on these +nodes is ultumately driven from `couch_jobs` acceptors which wait in +`couch_jobs:accept/2` for replication jobs. + + * `couch_replicator_job_server` : A singleton process in charge of which + spawning and keeping track of `couch_replicator_job` processes. It ensures + there is a limited number of replication jobs running on each node. It + periodically accepts new jobs and stopping the oldest running ones in order + to give other pending jobs a chance to run. It runs this logic in the + `reschedule/1` function. That function is called with a frequency defined by + the `interval_sec` configuration setting. The other pramers which determine + how jobs start and stop are `max_jobs` and `max_churn`. The node will try to + limit running up to `max_jobs` job on average with periodic spikes of up to + `max_jobs + max_churn` job at a time, and it will try not to start more than + `max_churn` number of job during each rescheduling cycle. + + * `couch_replicator_connection`: Maintains a global replication connection pool. It allows reusing connections across replication tasks. The main interface is `acquire/1` and `release/1`. The general idea is once a connection is established, it is kept around for @@ -62,224 +88,8 @@ A description of each child: interval is updated accordingly on each call to `failure/1` or `success/1` calls. For a successful request, a client should call `success/1`. Whenever a 429 response is received the client should call `failure/1`. When no - failures are happening the code ensures the ETS tables are empty in - order to have a lower impact on a running system. - - * `couch_replicator_scheduler`: This is the core component of the scheduling - replicator. It's main task is to switch between replication jobs, by - stopping some and starting others to ensure all of them make progress. - Replication jobs which fail are penalized using an exponential backoff. - That is, each consecutive failure will double the time penalty. This frees - up system resources for more useful work than just continuously trying to - run the same subset of failing jobs. - - The main API function is `add_job/1`. Its argument is an instance of the - `#rep{}` record, which could be the result of a document update from a - `_replicator` db or the result of a POST to `_replicate` endpoint. - - Each job internally is represented by the `#job{}` record. It contains the - original `#rep{}` but also, maintains an event history. The history is a - sequence of past events for each job. These are timestamped and ordered - such that the most recent event is at the head. History length is limited - based on the `replicator.max_history` configuration value. The default is - 20 entries. History events types are: - - * `added` : job was just added to the scheduler. This is the first event. - * `started` : job was started. This was an attempt to run the job. - * `stopped` : job was stopped by the scheduler. - * `crashed` : job has crashed (instead of stopping cleanly). - - The core of the scheduling algorithm is the `reschedule/1` function. This - function is called every `replicator.interval` milliseconds (default is - 60000 i.e. a minute). During each call the scheduler will try to stop some - jobs, start some new ones and will also try to keep the maximum number of - jobs running less than `replicator.max_jobs` (default 500). So the - functions does these operations (actual code paste): - - ``` - Running = running_job_count(), - Pending = pending_job_count(), - stop_excess_jobs(State, Running), - start_pending_jobs(State, Running, Pending), - rotate_jobs(State, Running, Pending), - update_running_jobs_stats(State#state.stats_pid) - ``` - - `Running` is the total number of currently running jobs. `Pending` is the - total number of jobs waiting to be run. `stop_excess_jobs` will stop any - exceeding the `replicator.max_jobs` configured limit. This code takes - effect if user reduces the `max_jobs` configuration value. - `start_pending_jobs` will start any jobs if there is more room available. - This will take effect on startup or when user increases the `max_jobs` - configuration value. `rotate_jobs` is where all the action happens. The - scheduler picks `replicator.max_churn` running jobs to stop and then picks - the same number of pending jobs to start. The default value of `max_churn` - is 20. So by default every minute, 20 running jobs are stopped, and 20 new - pending jobs are started. - - Before moving on it is worth pointing out that scheduler treats continuous - and non-continuous replications differently. Normal (non-continuous) - replications once started will be allowed to run to completion. That - behavior is to preserve their semantics of replicating a snapshot of the - source database to the target. For example if new documents are added to - the source after the replication are started, those updates should not show - up on the target database. Stopping and restarting a normal replication - would violate that constraint. The only exception to the rule is the user - explicitly reduces `replicator.max_jobs` configuration value. Even then - scheduler will first attempt to stop as many continuous jobs as possible - and only if it has no choice left will it stop normal jobs. - - Keeping that in mind and going back to the scheduling algorithm, the next - interesting part is how the scheduler picks which jobs to stop and which - ones to start: - - * Stopping: When picking jobs to stop the scheduler will pick longest - running continuous jobs first. The sorting callback function to get the - longest running jobs is unsurprisingly called `longest_running/2`. To - pick the longest running jobs it looks at the most recent `started` - event. After it gets a sorted list by longest running, it simply picks - first few depending on the value of `max_churn` using `lists:sublist/2`. - Then those jobs are stopped. - - * Starting: When starting the scheduler will pick the jobs which have been - waiting the longest. Surprisingly, in this case it also looks at the - `started` timestamp and picks the jobs which have the oldest `started` - timestamp. If there are 3 jobs, A[started=10], B[started=7], - C[started=9], then B will be picked first, then C then A. This ensures - that jobs are not starved, which is a classic scheduling pitfall. - - In the code, the list of pending jobs is picked slightly differently than - how the list of running jobs is picked. `pending_jobs/1` uses `ets:foldl` - to iterate over all the pending jobs. As it iterates it tries to keep only - up to `max_churn` oldest items in the accumulator. The reason this is done - is that there could be a very large number of pending jobs and loading them - all in a list (making a copy from ETS) and then sorting it can be quite - expensive performance-wise. The tricky part of the iteration is happening - in `pending_maybe_replace/2`. A `gb_sets` ordered set is used to keep top-N - longest waiting jobs so far. The code has a comment with a helpful example - on how this algorithm works. - - The last part is how the scheduler treats jobs which keep crashing. If a - job is started but then crashes then that job is considered unhealthy. The - main idea is to penalize such jobs such that they are forced to wait an - exponentially larger amount of time with each consecutive crash. A central - part to this algorithm is determining what forms a sequence of consecutive - crashes. If a job starts then quickly crashes, and after its next start it - crashes again, then that would become a sequence of 2 consecutive crashes. - The penalty then would be calculated by `backoff_micros/1` function where - the consecutive crash count would end up as the exponent. However for - practical concerns there is also maximum penalty specified and that's the - equivalent of 10 consecutive crashes. Timewise it ends up being about 8 - hours. That means even a job which keep crashing will still get a chance to - retry once in 8 hours. - - There is subtlety when calculating consecutive crashes and that is deciding - when the sequence stops. That is, figuring out when a job becomes healthy - again. The scheduler considers a job healthy again if it started and hasn't - crashed in a while. The "in a while" part is a configuration parameter - `replicator.health_threshold` defaulting to 2 minutes. This means if job - has been crashing, for example 5 times in a row, but then on the 6th - attempt it started and ran for more than 2 minutes then it is considered - healthy again. The next time it crashes its sequence of consecutive crashes - will restart at 1. - - * `couch_replicator_scheduler_sup`: This module is a supervisor for running - replication tasks. The most interesting thing about it is perhaps that it is - not used to restart children. The scheduler handles restarts and error - handling backoffs. - - * `couch_replicator_doc_processor`: The doc processor component is in charge - of processing replication document updates, turning them into replication - jobs and adding those jobs to the scheduler. Unfortunately the only reason - there is even a `couch_replicator_doc_processor` gen_server, instead of - replication documents being turned to jobs and inserted into the scheduler - directly, is because of one corner case -- filtered replications using - custom (JavaScript mostly) filters. More about this later. It is better to - start with how updates flow through the doc processor: - - Document updates come via the `db_change/3` callback from - `couch_multidb_changes`, then go to the `process_change/2` function. - - In `process_change/2` a few decisions are made regarding how to proceed. The - first is "ownership" check. That is a check if the replication document - belongs on the current node. If not, then it is ignored. In a cluster, in - general there would be N copies of a document change and we only want to run - the replication once. Another check is to see if the update has arrived - during a time when the cluster is considered "unstable". If so, it is - ignored, because soon enough a rescan will be launched and all the documents - will be reprocessed anyway. Another noteworthy thing in `process_change/2` - is handling of upgrades from the previous version of the replicator when - transient states were written to the documents. Two such states were - `triggered` and `error`. Both of those states are removed from the document - then then update proceeds in the regular fashion. `failed` documents are - also ignored here. `failed` is a terminal state which indicates the document - was somehow unsuitable to become a replication job (it was malformed or a - duplicate). Otherwise the state update proceeds to `process_updated/2`. - - `process_updated/2` is where replication document updates are parsed and - translated to `#rep{}` records. The interesting part here is that the - replication ID isn't calculated yet. Unsurprisingly the parsing function - used is called `parse_rep_doc_without_id/1`. Also note that up until now - everything is still running in the context of the `db_change/3` callback. - After replication filter type is determined the update gets passed to the - `couch_replicator_doc_processor` gen_server. - - The `couch_replicator_doc_processor` gen_server's main role is to try to - calculate replication IDs for each `#rep{}` record passed to it, then add - that as a scheduler job. As noted before, `#rep{}` records parsed up until - this point lack a replication ID. The reason is replication ID calculation - includes a hash of the filter code. And because user defined replication - filters live in the source DB, which most likely involves a remote network - fetch there is a possibility of blocking and a need to handle various - network failures and retries. Because of that `replication_doc_processor` - dispatches all of that blocking and retrying to a separate `worker` process - (`couch_replicator_doc_processor_worker` module). - - `couch_replicator_doc_processor_worker` is where replication IDs are - calculated for each individual doc update. There are two separate modules - which contain utilities related to replication ID calculation: - `couch_replicator_ids` and `couch_replicator_filters`. The first one - contains ID calculation algorithms and the second one knows how to parse and - fetch user filters from a remote source DB. One interesting thing about the - worker is that it is time-bounded and is guaranteed to not be stuck forever. - That's why it spawns an extra process with `spawn_monitor`, just so it can - do an `after` clause in receive and bound the maximum time this worker will - take. - - A doc processor worker will either succeed or fail but never block for too - long. Success and failure are returned as exit values. Those are handled in - the `worker_returned/3` doc processor clauses. The most common pattern is - that a worker is spawned to add a replication job, it does so and returns a - `{ok, ReplicationID}` value in `worker_returned`. - - In case of a filtered replication with custom user code there are two case to - consider: - - 1. Filter fetching code has failed. In that case worker returns an error. - But because the error could be a transient network error, another - worker is started to try again. It could fail and return an error - again, then another one is started and so on. However each consecutive - worker will do an exponential backoff, not unlike the scheduler code. - `error_backoff/1` is where the backoff period is calculated. - Consecutive errors are held in the `errcnt` field in the ETS table. - - 2. Fetching filter code succeeds, replication ID is calculated and job is - added to the scheduler. However, because this is a filtered replication - the source database could get an updated filter. Which means - replication ID could change again. So the worker is spawned to - periodically check the filter and see if it changed. In other words doc - processor will do the work of checking for filtered replications, get - an updated filter and will then refresh the replication job (remove the - old one and add a new one with a different ID). The filter checking - interval is determined by the `filter_backoff` function. An unusual - thing about that function is it calculates the period based on the size - of the ETS table. The idea there is for a few replications in a - cluster, it's ok to check filter changes often. But when there are lots - of replications running, having each one checking their filter often is - not a good idea. + failures are happening the code ensures the ETS tables are empty in order + to have a lower impact on a running system. - * `couch_replicator_db_changes`: This process specializes and configures - `couch_multidb_changes` so that it looks for `_replicator` suffixed shards - and makes sure to restart it when node membership changes. -- cgit v1.2.1 From ae858196848cf9533dfa03a2006227481f47388d Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 28 Aug 2020 04:36:18 -0400 Subject: Update and clean up tests Update tests to use the new replicator. Also clean up redundancy and re-use some of the newer macros from fabric2 (?TDEF_FE). Make sure replicator tests are included in `make check` --- Makefile | 2 +- .../couch_replicator_attachments_too_large.erl | 90 ++--- .../eunit/couch_replicator_connection_tests.erl | 274 +++++++------ ...replicator_create_target_with_options_tests.erl | 129 +++--- .../test/eunit/couch_replicator_db_tests.erl | 332 ++++++++++++++++ .../test/eunit/couch_replicator_filtered_tests.erl | 348 ++++++++-------- .../eunit/couch_replicator_httpc_pool_tests.erl | 125 +++--- .../eunit/couch_replicator_id_too_long_tests.erl | 91 ++--- .../eunit/couch_replicator_job_server_tests.erl | 437 +++++++++++++++++++++ .../eunit/couch_replicator_large_atts_tests.erl | 123 +++--- .../eunit/couch_replicator_many_leaves_tests.erl | 241 +++++------- .../eunit/couch_replicator_missing_stubs_tests.erl | 179 ++++----- .../test/eunit/couch_replicator_proxy_tests.erl | 135 +++---- .../eunit/couch_replicator_rate_limiter_tests.erl | 77 ++-- ...ch_replicator_retain_stats_between_job_runs.erl | 223 +++-------- .../test/eunit/couch_replicator_selector_tests.erl | 136 +++---- ...ch_replicator_small_max_request_size_target.erl | 190 +++------ .../test/eunit/couch_replicator_test_helper.erl | 323 +++++++++++---- .../couch_replicator_transient_jobs_tests.erl | 106 +++++ .../couch_replicator_use_checkpoints_tests.erl | 207 +++------- test/elixir/test/replication_test.exs | 5 +- 21 files changed, 2210 insertions(+), 1563 deletions(-) create mode 100644 src/couch_replicator/test/eunit/couch_replicator_db_tests.erl create mode 100644 src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl create mode 100644 src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl diff --git a/Makefile b/Makefile index e8d366296..35b62f949 100644 --- a/Makefile +++ b/Makefile @@ -163,7 +163,7 @@ endif .PHONY: check check: all @$(MAKE) emilio - make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd + make eunit apps=couch_eval,couch_expiring_cache,ctrace,couch_jobs,couch_views,fabric,mango,chttpd,couch_replicator make elixir tests=test/elixir/test/basics_test.exs,test/elixir/test/replication_test.exs,test/elixir/test/map_test.exs,test/elixir/test/all_docs_test.exs,test/elixir/test/bulk_docs_test.exs make exunit apps=chttpd make mango-test diff --git a/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl b/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl index ac4bb84f3..0e7e0ea5a 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl @@ -12,72 +12,60 @@ -module(couch_replicator_attachments_too_large). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - - -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_doc_with_attachment(Source, <<"doc">>, 1000), - Target = create_db(), - {Ctx, {Source, Target}}. - - -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - config:delete("couchdb", "max_attachment_size"), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). attachment_too_large_replication_test_() -> - Pairs = [{remote, remote}], { - "Attachment size too large replication tests", + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] ++ - [{Pair, fun should_fail/2} || Pair <- Pairs] + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_should_succeed), + ?TDEF_FE(t_should_fail) + ] } }. -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("couchdb", "max_attachment_size", "1000", _Persist = false), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_doc_with_attachment(Source, <<"doc">>, 1000), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -should_fail({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("couchdb", "max_attachment_size", "999", _Persist = false), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertError({badmatch, {not_found, missing}}, - couch_replicator_test_helper:compare_dbs(Source, Target)). +teardown({Source, Target}) -> + config:delete("couchdb", "max_attachment_size", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. +t_should_succeed({Source, Target}) -> + config:set("couchdb", "max_attachment_size", "1000", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). + + +t_should_fail({Source, Target}) -> + config:set("couchdb", "max_attachment_size", "999", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ExceptIds = [<<"doc">>], + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, + Target, ExceptIds)). create_doc_with_attachment(DbName, DocId, AttSize) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), Doc = #doc{id = DocId, atts = att(AttSize)}, - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db), + couch_replicator_test_helper:create_docs(DbName, [Doc]), ok. @@ -90,13 +78,3 @@ att(Size) when is_integer(Size), Size >= 1 -> << <<"x">> || _ <- lists:seq(1, Size) >> end} ])]. - - -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - - -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). diff --git a/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl index e75cc5a63..df30db25d 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl @@ -12,187 +12,176 @@ -module(couch_replicator_connection_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). - --define(TIMEOUT, 1000). - - -setup() -> - Host = config:get("httpd", "bind_address", "127.0.0.1"), - Port = config:get("httpd", "port", "5984"), - {Host, Port}. - -teardown(_) -> - ok. +-include_lib("fabric/test/fabric2_test.hrl"). httpc_pool_test_() -> { - "replicator connection sharing tests", + "Replicator connection sharing tests", { setup, - fun() -> test_util:start_couch([couch_replicator]) end, fun test_util:stop_couch/1, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { foreach, - fun setup/0, fun teardown/1, + fun setup/0, + fun teardown/1, [ - fun connections_shared_after_release/1, - fun connections_not_shared_after_owner_death/1, - fun idle_connections_closed/1, - fun test_owner_monitors/1, - fun worker_discards_creds_on_create/1, - fun worker_discards_url_creds_after_request/1, - fun worker_discards_creds_in_headers_after_request/1, - fun worker_discards_proxy_creds_after_request/1 + ?TDEF_FE(connections_shared_after_release), + ?TDEF_FE(connections_not_shared_after_owner_death), + ?TDEF_FE(idle_connections_closed), + ?TDEF_FE(test_owner_monitors), + ?TDEF_FE(worker_discards_creds_on_create), + ?TDEF_FE(worker_discards_url_creds_after_request), + ?TDEF_FE(worker_discards_creds_in_headers_after_request), + ?TDEF_FE(worker_discards_proxy_creds_after_request) ] } } }. +setup() -> + Host = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = config:get("chttpd", "port", "5984"), + {Host, Port}. + + +teardown(_) -> + ok. + + connections_shared_after_release({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - Self = self(), - {ok, Pid} = couch_replicator_connection:acquire(URL), - couch_replicator_connection:release(Pid), - spawn(fun() -> - Self ! couch_replicator_connection:acquire(URL) - end), - receive - {ok, Pid2} -> - ?assertEqual(Pid, Pid2) - end - end). + URL = "http://" ++ Host ++ ":" ++ Port, + Self = self(), + {ok, Pid} = couch_replicator_connection:acquire(URL), + couch_replicator_connection:release(Pid), + spawn(fun() -> + Self ! couch_replicator_connection:acquire(URL) + end), + receive + {ok, Pid2} -> + ?assertEqual(Pid, Pid2) + end. connections_not_shared_after_owner_death({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - Self = self(), - spawn(fun() -> - Self ! couch_replicator_connection:acquire(URL), - error("simulate division by zero without compiler warning") - end), - receive - {ok, Pid} -> - {ok, Pid2} = couch_replicator_connection:acquire(URL), - ?assertNotEqual(Pid, Pid2), - MRef = monitor(process, Pid), - receive {'DOWN', MRef, process, Pid, _Reason} -> + URL = "http://" ++ Host ++ ":" ++ Port, + Self = self(), + spawn(fun() -> + Self ! couch_replicator_connection:acquire(URL), + error("simulate division by zero without compiler warning") + end), + receive + {ok, Pid} -> + {ok, Pid2} = couch_replicator_connection:acquire(URL), + ?assertNotEqual(Pid, Pid2), + MRef = monitor(process, Pid), + receive + {'DOWN', MRef, process, Pid, _Reason} -> ?assert(not is_process_alive(Pid)); - Other -> throw(Other) - end - end - end). + Other -> + throw(Other) + end + end. idle_connections_closed({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - {ok, Pid} = couch_replicator_connection:acquire(URL), - couch_replicator_connection ! close_idle_connections, - ?assert(ets:member(couch_replicator_connection, Pid)), - % block until idle connections have closed - sys:get_status(couch_replicator_connection), - couch_replicator_connection:release(Pid), - couch_replicator_connection ! close_idle_connections, - % block until idle connections have closed - sys:get_status(couch_replicator_connection), - ?assert(not ets:member(couch_replicator_connection, Pid)) - end). + URL = "http://" ++ Host ++ ":" ++ Port, + {ok, Pid} = couch_replicator_connection:acquire(URL), + couch_replicator_connection ! close_idle_connections, + ?assert(ets:member(couch_replicator_connection, Pid)), + % block until idle connections have closed + sys:get_status(couch_replicator_connection), + couch_replicator_connection:release(Pid), + couch_replicator_connection ! close_idle_connections, + % block until idle connections have closed + sys:get_status(couch_replicator_connection), + ?assert(not ets:member(couch_replicator_connection, Pid)). test_owner_monitors({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - {ok, Worker0} = couch_replicator_connection:acquire(URL), - assert_monitors_equal([{process, self()}]), - couch_replicator_connection:release(Worker0), - assert_monitors_equal([]), - {Workers, Monitors} = lists:foldl(fun(_, {WAcc, MAcc}) -> - {ok, Worker1} = couch_replicator_connection:acquire(URL), - MAcc1 = [{process, self()} | MAcc], - assert_monitors_equal(MAcc1), - {[Worker1 | WAcc], MAcc1} - end, {[], []}, lists:seq(1,5)), - lists:foldl(fun(Worker2, Acc) -> - [_ | NewAcc] = Acc, - couch_replicator_connection:release(Worker2), - assert_monitors_equal(NewAcc), - NewAcc - end, Monitors, Workers) - end). + URL = "http://" ++ Host ++ ":" ++ Port, + {ok, Worker0} = couch_replicator_connection:acquire(URL), + assert_monitors_equal([{process, self()}]), + couch_replicator_connection:release(Worker0), + assert_monitors_equal([]), + {Workers, Monitors} = lists:foldl(fun(_, {WAcc, MAcc}) -> + {ok, Worker1} = couch_replicator_connection:acquire(URL), + MAcc1 = [{process, self()} | MAcc], + assert_monitors_equal(MAcc1), + {[Worker1 | WAcc], MAcc1} + end, {[], []}, lists:seq(1, 5)), + lists:foldl(fun(Worker2, Acc) -> + [_ | NewAcc] = Acc, + couch_replicator_connection:release(Worker2), + assert_monitors_equal(NewAcc), + NewAcc + end, Monitors, Workers). worker_discards_creds_on_create({Host, Port}) -> - ?_test(begin - {User, Pass, B64Auth} = user_pass(), - URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ Port, - {ok, WPid} = couch_replicator_connection:acquire(URL), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0) - end). + {User, Pass, B64Auth} = user_pass(), + URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ Port, + {ok, WPid} = couch_replicator_connection:acquire(URL), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0). worker_discards_url_creds_after_request({Host, _}) -> - ?_test(begin - {User, Pass, B64Auth} = user_pass(), - {Port, ServerPid} = server(), - PortStr = integer_to_list(Port), - URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ PortStr, - {ok, WPid} = couch_replicator_connection:acquire(URL), - ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], [])), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0), - couch_replicator_connection:release(WPid), - unlink(ServerPid), - exit(ServerPid, kill) - end). + {User, Pass, B64Auth} = user_pass(), + {Port, ServerPid} = server(), + PortStr = integer_to_list(Port), + URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ PortStr, + {ok, WPid} = couch_replicator_connection:acquire(URL), + ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], [])), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0), + couch_replicator_connection:release(WPid), + unlink(ServerPid), + exit(ServerPid, kill). worker_discards_creds_in_headers_after_request({Host, _}) -> - ?_test(begin - {_User, Pass, B64Auth} = user_pass(), - {Port, ServerPid} = server(), - PortStr = integer_to_list(Port), - URL = "http://" ++ Host ++ ":" ++ PortStr, - {ok, WPid} = couch_replicator_connection:acquire(URL), - Headers = [{"Authorization", "Basic " ++ B64Auth}], - ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, Headers, [])), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0), - couch_replicator_connection:release(WPid), - unlink(ServerPid), - exit(ServerPid, kill) - end). + {_User, Pass, B64Auth} = user_pass(), + {Port, ServerPid} = server(), + PortStr = integer_to_list(Port), + URL = "http://" ++ Host ++ ":" ++ PortStr, + {ok, WPid} = couch_replicator_connection:acquire(URL), + Headers = [{"Authorization", "Basic " ++ B64Auth}], + ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, Headers, [])), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0), + couch_replicator_connection:release(WPid), + unlink(ServerPid), + exit(ServerPid, kill). worker_discards_proxy_creds_after_request({Host, _}) -> - ?_test(begin - {User, Pass, B64Auth} = user_pass(), - {Port, ServerPid} = server(), - PortStr = integer_to_list(Port), - URL = "http://" ++ Host ++ ":" ++ PortStr, - {ok, WPid} = couch_replicator_connection:acquire(URL), - Opts = [ - {proxy_host, Host}, - {proxy_port, Port}, - {proxy_user, User}, - {proxy_pass, Pass} - ], - ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], Opts)), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0), - couch_replicator_connection:release(WPid), - unlink(ServerPid), - exit(ServerPid, kill) - end). + {User, Pass, B64Auth} = user_pass(), + {Port, ServerPid} = server(), + PortStr = integer_to_list(Port), + URL = "http://" ++ Host ++ ":" ++ PortStr, + {ok, WPid} = couch_replicator_connection:acquire(URL), + Opts = [ + {proxy_host, Host}, + {proxy_port, Port}, + {proxy_user, User}, + {proxy_pass, Pass} + ], + ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], Opts)), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0), + couch_replicator_connection:release(WPid), + unlink(ServerPid), + exit(ServerPid, kill). send_req(WPid, URL, Headers, Opts) -> @@ -237,5 +226,6 @@ server_responder(LSock) -> assert_monitors_equal(ShouldBe) -> sys:get_status(couch_replicator_connection), - {monitors, Monitors} = process_info(whereis(couch_replicator_connection), monitors), + {monitors, Monitors} = process_info(whereis(couch_replicator_connection), + monitors), ?assertEqual(Monitors, ShouldBe). diff --git a/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl index 63310d39e..c957fc199 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl @@ -12,132 +12,137 @@ -module(couch_replicator_create_target_with_options_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - --define(USERNAME, "rep_admin"). --define(PASSWORD, "secret"). - -setup() -> - Ctx = test_util:start_couch([fabric, mem3, couch_replicator, chttpd]), - Hashed = couch_passwords:hash_admin_password(?PASSWORD), - ok = config:set("admins", ?USERNAME, ?b2l(Hashed), _Persist=false), - Source = ?tempdb(), - Target = ?tempdb(), - {Ctx, {Source, Target}}. - - -teardown({Ctx, {_Source, _Target}}) -> - config:delete("admins", ?USERNAME), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). create_target_with_options_replication_test_() -> { "Create target with range partitions tests", { - foreach, - fun setup/0, fun teardown/1, - [ - fun should_create_target_with_q_4/1, - fun should_create_target_with_q_2_n_1/1, - fun should_create_target_with_default/1, - fun should_not_create_target_with_q_any/1 - ] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_create_target_with_q_4), + ?TDEF_FE(should_create_target_with_q_2_n_1), + ?TDEF_FE(should_create_target_with_default), + ?TDEF_FE(should_not_create_target_with_q_any) + ] + } } }. -should_create_target_with_q_4({_Ctx, {Source, Target}}) -> +setup() -> + Source = ?tempdb(), + Target = ?tempdb(), + {Source, Target}. + + +teardown({Source, Target}) -> + delete_db(Source), + delete_db(Target). + + +should_create_target_with_q_4({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, true}, {<<"create_target_params">>, {[{<<"q">>, <<"4">>}]}} ]}, create_db(Source), create_doc(Source), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), - {ok, TargetInfo} = fabric:get_db_info(Target), + TargetInfo = db_info(Target), {ClusterInfo} = couch_util:get_value(cluster, TargetInfo), delete_db(Source), delete_db(Target), - ?_assertEqual(4, couch_util:get_value(q, ClusterInfo)). + ?assertEqual(0, couch_util:get_value(q, ClusterInfo)). -should_create_target_with_q_2_n_1({_Ctx, {Source, Target}}) -> +should_create_target_with_q_2_n_1({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, true}, {<<"create_target_params">>, {[{<<"q">>, <<"2">>}, {<<"n">>, <<"1">>}]}} ]}, create_db(Source), create_doc(Source), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), - {ok, TargetInfo} = fabric:get_db_info(Target), + TargetInfo = db_info(Target), {ClusterInfo} = couch_util:get_value(cluster, TargetInfo), delete_db(Source), delete_db(Target), - [ - ?_assertEqual(2, couch_util:get_value(q, ClusterInfo)), - ?_assertEqual(1, couch_util:get_value(n, ClusterInfo)) - ]. + ?assertEqual(0, couch_util:get_value(q, ClusterInfo)), + ?assertEqual(0, couch_util:get_value(n, ClusterInfo)). -should_create_target_with_default({_Ctx, {Source, Target}}) -> +should_create_target_with_default({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, true} ]}, create_db(Source), create_doc(Source), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), - {ok, TargetInfo} = fabric:get_db_info(Target), + TargetInfo = db_info(Target), {ClusterInfo} = couch_util:get_value(cluster, TargetInfo), - Q = config:get("cluster", "q", "8"), delete_db(Source), delete_db(Target), - ?_assertEqual(list_to_integer(Q), couch_util:get_value(q, ClusterInfo)). + ?assertEqual(0, couch_util:get_value(q, ClusterInfo)). -should_not_create_target_with_q_any({_Ctx, {Source, Target}}) -> +should_not_create_target_with_q_any({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, false}, {<<"create_target_params">>, {[{<<"q">>, <<"1">>}]}} ]}, create_db(Source), create_doc(Source), - {error, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - DbExist = is_list(catch mem3:shards(Target)), + {error, _} = couch_replicator_test_helper:replicate(RepObject), + Exists = try + fabric2_db:open(Target, [?ADMIN_CTX]), + ?assert(false) + catch + error:database_does_not_exist -> + database_does_not_exist + end, delete_db(Source), - ?_assertEqual(false, DbExist). + ?assertEqual(Exists, database_does_not_exist). create_doc(DbName) -> - Body = {[{<<"foo">>, <<"bar">>}]}, - NewDoc = #doc{body = Body}, - {ok, _} = fabric:update_doc(DbName, NewDoc, [?ADMIN_CTX]). + couch_replicator_test_helper:create_docs(DbName, [ + #{<<"_id">> => fabric2_util:uuid(), <<"foo">> => <<"bar">>} + ]). create_db(DbName) -> - ok = fabric:create_db(DbName, [?ADMIN_CTX]). + couch_replicator_test_helper:create_db(DbName). delete_db(DbName) -> - ok = fabric:delete_db(DbName, [?ADMIN_CTX]). + couch_replicator_test_helper:delete_db(DbName). -db_url(DbName) -> - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - ?l2b(io_lib:format("http://~s:~s@~s:~b/~s", [?USERNAME, ?PASSWORD, Addr, - Port, DbName])). +db_info(DbName) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Info} = fabric2_db:get_db_info(Db), + Info. diff --git a/src/couch_replicator/test/eunit/couch_replicator_db_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_db_tests.erl new file mode 100644 index 000000000..053441007 --- /dev/null +++ b/src/couch_replicator/test/eunit/couch_replicator_db_tests.erl @@ -0,0 +1,332 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_db_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +couch_replicator_db_test_() -> + { + "Replications are started from docs in _replicator dbs", + { + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(default_replicator_db_is_created), + ?TDEF_FE(continuous_replication_created_from_doc, 15), + ?TDEF_FE(normal_replication_created_from_doc, 15), + ?TDEF_FE(replicator_db_deleted, 15), + ?TDEF_FE(replicator_db_recreated, 15), + ?TDEF_FE(invalid_replication_docs), + ?TDEF_FE(duplicate_persistent_replication, 15), + ?TDEF_FE(duplicate_transient_replication, 30) + ] + } + } + }. + + +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_doc(Source, #{<<"_id">> => <<"doc1">>}), + Target = couch_replicator_test_helper:create_db(), + Name = ?tempdb(), + RepDb = couch_replicator_test_helper:create_db(<>), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "create_replicator_db", "false", false), + config:set("couchdb", "enable_database_recovery", "false", false), + config:set("replicator", "min_backoff_penalty_sec", "1", false), + {Source, Target, RepDb}. + + +teardown({Source, Target, RepDb}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "create_replicator_db", false), + config:delete("couchdb", "enable_database_recovery", false), + config:delete("replicator", "min_backoff_penalty_sec", false), + + couch_replicator_test_helper:delete_db(RepDb), + couch_replicator_test_helper:delete_db(?REP_DB_NAME), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +default_replicator_db_is_created({_, _, _}) -> + config:set("replicator", "create_replicator_db", "true", false), + ?assertEqual(ignore, couch_replicator:ensure_rep_db_exists()), + ?assertMatch({ok, #{}}, fabric2_db:open(?REP_DB_NAME, [])). + + +continuous_replication_created_from_doc({Source, Target, RepDb}) -> + DocId = <<"rdoc1">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + + {Code, DocInfo} = scheduler_docs(RepDb, DocId), + ?assertEqual(200, Code), + ?assertMatch(#{ + <<"database">> := RepDb, + <<"doc_id">> := DocId + }, DocInfo), + + RepId = maps:get(<<"id">>, DocInfo), + + ?assertMatch([#{ + <<"database">> := RepDb, + <<"doc_id">> := DocId, + <<"id">> := RepId, + <<"state">> := <<"running">> + }], couch_replicator_test_helper:scheduler_jobs()), + + ?assertMatch({200, #{ + <<"database">> := RepDb, + <<"doc_id">> := DocId, + <<"id">> := RepId, + <<"state">> := <<"running">> + }}, scheduler_jobs(RepId)), + + delete_doc(RepDb, DocId), + wait_scheduler_docs_not_found(RepDb, DocId), + ?assertMatch({404, #{}}, scheduler_jobs(RepId)). + + +normal_replication_created_from_doc({Source, Target, RepDb}) -> + DocId = <<"rdoc2">>, + RDoc = rep_doc(Source, Target, DocId), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"completed">>), + + {Code, DocInfo} = scheduler_docs(RepDb, DocId), + ?assertEqual(200, Code), + ?assertMatch(#{ + <<"database">> := RepDb, + <<"doc_id">> := DocId, + <<"state">> := <<"completed">>, + <<"info">> := #{ + <<"docs_written">> := 1, + <<"docs_read">> := 1, + <<"missing_revisions_found">> := 1 + } + }, DocInfo), + + wait_doc_state(RepDb, DocId, <<"completed">>), + ?assertMatch(#{ + <<"_replication_state">> := <<"completed">>, + <<"_replication_stats">> := #{ + <<"docs_written">> := 1, + <<"docs_read">> := 1, + <<"missing_revisions_found">> := 1 + } + }, read_doc(RepDb, DocId)), + + delete_doc(RepDb, DocId), + wait_scheduler_docs_not_found(RepDb, DocId). + + +replicator_db_deleted({Source, Target, RepDb}) -> + DocId = <<"rdoc3">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + fabric2_db:delete(RepDb, [?ADMIN_CTX]), + wait_scheduler_docs_not_found(RepDb, DocId). + + +replicator_db_recreated({Source, Target, RepDb}) -> + DocId = <<"rdoc4">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + + config:set("couchdb", "enable_database_recovery", "true", false), + fabric2_db:delete(RepDb, [?ADMIN_CTX]), + wait_scheduler_docs_not_found(RepDb, DocId), + + Opts = [{start_key, RepDb}, {end_key, RepDb}], + {ok, [DbInfo]} = fabric2_db:list_deleted_dbs_info(Opts), + {_, Timestamp} = lists:keyfind(timestamp, 1, DbInfo), + ok = fabric2_db:undelete(RepDb, RepDb, Timestamp, [?ADMIN_CTX]), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + + config:set("couchdb", "enable_database_recovery", "false", false), + fabric2_db:delete(RepDb, [?ADMIN_CTX]), + wait_scheduler_docs_not_found(RepDb, DocId). + + +invalid_replication_docs({_, _, RepDb}) -> + Docs = [ + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1000">> + }, + #{ + <<"_id">> => <<"1">>, + <<"target">> => <<"http://127.0.0.1:1001">> + }, + #{ + <<"_id">> => <<"1">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1002">>, + <<"target">> => <<"http://127.0.0.1:1003">>, + <<"create_target">> => <<"bad">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => #{<<"junk">> => 42}, + <<"target">> => <<"http://127.0.0.1:1004">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1005">>, + <<"target">> => <<"http://127.0.0.1:1006">>, + <<"selector">> => #{}, + <<"filter">> => <<"a/b">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1007">>, + <<"target">> => <<"https://127.0.0.1:1008">>, + <<"doc_ids">> => 42 + } + ], + lists:foreach(fun(Doc) -> + ?assertThrow({forbidden, _}, create_doc(RepDb, Doc)) + end, Docs). + + +duplicate_persistent_replication({Source, Target, RepDb}) -> + DocId1 = <<"rdoc5">>, + RDoc1 = rep_doc(Source, Target, DocId1, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc1), + wait_scheduler_docs_state(RepDb, DocId1, <<"running">>), + + DocId2 = <<"rdoc6">>, + RDoc2 = rep_doc(Source, Target, DocId2, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc2), + wait_scheduler_docs_state(RepDb, DocId2, <<"failed">>), + + delete_doc(RepDb, DocId1), + delete_doc(RepDb, DocId2), + + wait_scheduler_docs_not_found(RepDb, DocId1), + wait_scheduler_docs_not_found(RepDb, DocId2). + + +duplicate_transient_replication({Source, Target, RepDb}) -> + {ok, _Pid, RepId} = couch_replicator_test_helper:replicate_continuous( + Source, Target), + + DocId = <<"rdoc7">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"crashing">>), + + couch_replicator_test_helper:cancel(RepId), + wait_reschedule_docs_state(RepDb, DocId, <<"running">>), + + delete_doc(RepDb, DocId), + wait_scheduler_docs_not_found(RepDb, DocId). + + +scheduler_jobs(Id) -> + SUrl = couch_replicator_test_helper:server_url(), + Url = lists:flatten(io_lib:format("~s/_scheduler/jobs/~s", [SUrl, Id])), + {ok, Code, _, Body} = test_request:get(Url, []), + {Code, jiffy:decode(Body, [return_maps])}. + + +scheduler_docs(DbName, DocId) -> + SUrl = couch_replicator_test_helper:server_url(), + Fmt = "~s/_scheduler/docs/~s/~s", + Url = lists:flatten(io_lib:format(Fmt, [SUrl, DbName, DocId])), + {ok, Code, _, Body} = test_request:get(Url, []), + {Code, jiffy:decode(Body, [return_maps])}. + + +rep_doc(Source, Target, DocId) -> + rep_doc(Source, Target, DocId, #{}). + + +rep_doc(Source, Target, DocId, #{} = Extra) -> + maps:merge(#{ + <<"_id">> => DocId, + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target) + }, Extra). + + +create_doc(DbName, Doc) -> + couch_replicator_test_helper:create_docs(DbName, [Doc]). + + +delete_doc(DbName, DocId) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Doc} = fabric2_db:open_doc(Db, DocId), + Doc1 = Doc#doc{deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, Doc1, []). + + +read_doc(DbName, DocId) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Doc} = fabric2_db:open_doc(Db, DocId, [ejson_body]), + Body = Doc#doc.body, + couch_util:json_decode(couch_util:json_encode(Body), [return_maps]). + + +wait_scheduler_docs_state(DbName, DocId, State) -> + test_util:wait(fun() -> + case scheduler_docs(DbName, DocId) of + {200, #{<<"state">> := State} = Res} -> Res; + {_, _} -> wait + end + end, 10000, 250). + + +wait_scheduler_docs_not_found(DbName, DocId) -> + test_util:wait(fun() -> + case scheduler_docs(DbName, DocId) of + {404, _} -> ok; + {_, _} -> wait + end + end, 10000, 250). + + +wait_reschedule_docs_state(DbName, DocId, State) -> + test_util:wait(fun() -> + couch_replicator_job_server:reschedule(), + case scheduler_docs(DbName, DocId) of + {200, #{<<"state">> := State} = Res} -> Res; + {_, _} -> wait + end + end, 10000, 500). + + +wait_doc_state(DbName, DocId, State) -> + test_util:wait(fun() -> + case read_doc(DbName, DocId) of + #{<<"_replication_state">> := State} -> ok; + #{} -> wait + end + end, 10000, 250). diff --git a/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl index 7ac9a4d71..4d72c84f2 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl @@ -12,17 +12,20 @@ -module(couch_replicator_filtered_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --define(DDOC, {[ - {<<"_id">>, <<"_design/filter_ddoc">>}, - {<<"filters">>, {[ - {<<"testfilter">>, <<" +-define(DDOC_ID, <<"_design/filter_ddoc">>). +-define(DDOC, #{ + <<"_id">> => ?DDOC_ID, + <<"filters">> => #{ + <<"testfilter">> => <<" function(doc, req){if (doc.class == 'mammal') return true;} - ">>}, - {<<"queryfilter">>, <<" + ">>, + <<"queryfilter">> => <<" function(doc, req) { if (doc.class && req.query.starts) { return doc.class.indexOf(req.query.starts) === 0; @@ -31,99 +34,87 @@ return false; } } - ">>} - ]}}, - {<<"views">>, {[ - {<<"mammals">>, {[ - {<<"map">>, <<" + ">> + }, + <<"views">> => #{ + <<"mammals">> => #{ + <<"map">> => <<" function(doc) { if (doc.class == 'mammal') { emit(doc._id, null); } } - ">>} - ]}} - ]}} -]}). - -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_docs(Source), - Target = create_db(), - {Ctx, {Source, Target}}. + ">> + } + } +}). -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). filtered_replication_test_() -> - Pairs = [{remote, remote}], { - "Filtered replication tests", + "Replications with filters tests", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(filtered_replication_test), + ?TDEF_FE(query_filtered_replication_test), + ?TDEF_FE(view_filtered_replication_test), + ?TDEF_FE(replication_id_changes_if_filter_changes, 15) + ] + } } }. -query_filtered_replication_test_() -> - Pairs = [{remote, remote}], - { - "Filtered with query replication tests", - { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed_with_query/2} || Pair <- Pairs] - } - }. -view_filtered_replication_test_() -> - Pairs = [{remote, remote}], - { - "Filtered with a view replication tests", - { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed_with_view/2} || Pair <- Pairs] - } - }. +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_docs(Source), + Target = couch_replicator_test_helper:create_db(), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "interval_sec", "1", false), + {Source, Target}. + + +teardown({Source, Target}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "checkpoint_interval", false), + config:delete("replicator", "interval_sec", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> + +filtered_replication_test({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"filter">>, <<"filter_ddoc/testfilter">>} ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - %% FilteredFun is an Erlang version of following JS function - %% function(doc, req){if (doc.class == 'mammal') return true;} + {ok, _} = couch_replicator_test_helper:replicate(RepObject), FilterFun = fun(_DocId, {Props}) -> couch_util:get_value(<<"class">>, Props) == <<"mammal">> end, {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(1, proplists:get_value(doc_count, TargetDbInfo))}, - {"Target DB doesn't have deleted docs", - ?_assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo))}, - {"All the docs filtered as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. - -should_succeed_with_query({From, To}, {_Ctx, {Source, Target}}) -> + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo)), + ?assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + + +query_filtered_replication_test({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"filter">>, <<"filter_ddoc/queryfilter">>}, {<<"query_params">>, {[ {<<"starts">>, <<"a">>} ]}} ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), FilterFun = fun(_DocId, {Props}) -> case couch_util:get_value(<<"class">>, Props) of <<"a", _/binary>> -> true; @@ -131,109 +122,144 @@ should_succeed_with_query({From, To}, {_Ctx, {Source, Target}}) -> end end, {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(2, proplists:get_value(doc_count, TargetDbInfo))}, - {"Target DB doesn't have deleted docs", - ?_assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo))}, - {"All the docs filtered as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. - -should_succeed_with_view({From, To}, {_Ctx, {Source, Target}}) -> + ?assertEqual(2, proplists:get_value(doc_count, TargetDbInfo)), + ?assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + + +view_filtered_replication_test({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"filter">>, <<"_view">>}, {<<"query_params">>, {[ {<<"view">>, <<"filter_ddoc/mammals">>} ]}} ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), FilterFun = fun(_DocId, {Props}) -> couch_util:get_value(<<"class">>, Props) == <<"mammal">> end, {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(1, proplists:get_value(doc_count, TargetDbInfo))}, - {"Target DB doesn't have deleted docs", - ?_assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo))}, - {"All the docs filtered as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo)), + ?assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + + +replication_id_changes_if_filter_changes({Source, Target}) -> + config:set("replicator", "checkpoint_interval", "500", false), + Rep = {[ + {<<"source">>, Source}, + {<<"target">>, Target}, + {<<"filter">>, <<"filter_ddoc/testfilter">>}, + {<<"continuous">>, true} + ]}, + {ok, _, RepId1} = couch_replicator_test_helper:replicate_continuous(Rep), + + wait_scheduler_docs_written(1), + + ?assertMatch([#{<<"id">> := RepId1}], + couch_replicator_test_helper:scheduler_jobs()), + + FilterFun1 = fun(_, {Props}) -> + couch_util:get_value(<<"class">>, Props) == <<"mammal">> + end, + {ok, TargetDbInfo1, AllReplies1} = compare_dbs(Source, Target, FilterFun1), + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo1)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies1)), + + {ok, SourceDb} = fabric2_db:open(Source, [?ADMIN_CTX]), + {ok, DDoc1} = fabric2_db:open_doc(SourceDb, ?DDOC_ID), + Flt = <<"function(doc, req) {if (doc.class == 'reptiles') return true};">>, + DDoc2 = DDoc1#doc{body = {[ + {<<"filters">>, {[ + {<<"testfilter">>, Flt} + ]}} + ]}}, + {ok, {_, _}} = fabric2_db:update_doc(SourceDb, DDoc2), + Info = wait_scheduler_repid_change(RepId1), + + RepId2 = maps:get(<<"id">>, Info), + ?assert(RepId1 =/= RepId2), + + wait_scheduler_docs_written(1), + + FilterFun2 = fun(_, {Props}) -> + Class = couch_util:get_value(<<"class">>, Props), + Class == <<"mammal">> orelse Class == <<"reptiles">> + end, + {ok, TargetDbInfo2, AllReplies2} = compare_dbs(Source, Target, FilterFun2), + ?assertEqual(2, proplists:get_value(doc_count, TargetDbInfo2)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies2)), + + couch_replicator_test_helper:cancel(RepId2). + compare_dbs(Source, Target, FilterFun) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - {ok, TargetDbInfo} = couch_db:get_db_info(TargetDb), - Fun = fun(FullDocInfo, Acc) -> - {ok, DocId, SourceDoc} = read_doc(SourceDb, FullDocInfo), - TargetReply = read_doc(TargetDb, DocId), - case FilterFun(DocId, SourceDoc) of - true -> - ValidReply = {ok, DocId, SourceDoc} == TargetReply, - {ok, [ValidReply|Acc]}; - false -> - ValidReply = {not_found, missing} == TargetReply, - {ok, [ValidReply|Acc]} + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + {ok, TargetDbInfo} = fabric2_db:get_db_info(TargetDb), + Fun = fun(SrcDoc, TgtDoc, Acc) -> + case FilterFun(SrcDoc#doc.id, SrcDoc#doc.body) of + true -> [SrcDoc == TgtDoc | Acc]; + false -> [not_found == TgtDoc | Acc] end end, - {ok, AllReplies} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb), - {ok, TargetDbInfo, AllReplies}. - -read_doc(Db, DocIdOrInfo) -> - case couch_db:open_doc(Db, DocIdOrInfo) of - {ok, Doc} -> - {Props} = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - {ok, DocId, {Props}}; - Error -> - Error - end. - -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. + Res = couch_replicator_test_helper:compare_fold(Source, Target, Fun, []), + {ok, TargetDbInfo, Res}. + create_docs(DbName) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), - DDoc = couch_doc:from_json_obj(?DDOC), - Doc1 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc1">>}, - {<<"class">>, <<"mammal">>}, - {<<"value">>, 1} - - ]}), - Doc2 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc2">>}, - {<<"class">>, <<"amphibians">>}, - {<<"value">>, 2} - - ]}), - Doc3 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc3">>}, - {<<"class">>, <<"reptiles">>}, - {<<"value">>, 3} - - ]}), - Doc4 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc4">>}, - {<<"class">>, <<"arthropods">>}, - {<<"value">>, 2} - - ]}), - {ok, _} = couch_db:update_docs(Db, [DDoc, Doc1, Doc2, Doc3, Doc4]), - couch_db:close(Db). - -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). + couch_replicator_test_helper:create_docs(DbName, [ + ?DDOC, + #{ + <<"_id">> => <<"doc1">>, + <<"class">> => <<"mammal">>, + <<"value">> => 1 + }, + #{ + <<"_id">> => <<"doc2">>, + <<"class">> => <<"amphibians">>, + <<"value">> => 2 + }, + #{ + <<"_id">> => <<"doc3">>, + <<"class">> => <<"reptiles">>, + <<"value">> => 3 + }, + #{ + <<"_id">> => <<"doc4">>, + <<"class">> => <<"arthropods">>, + <<"value">> => 2 + } + ]). + + +wait_scheduler_docs_written(DocsWritten) -> + test_util:wait(fun() -> + case couch_replicator_test_helper:scheduler_jobs() of + [] -> + wait; + [#{<<"info">> := null}] -> + wait; + [#{<<"info">> := Info}] -> + case Info of + #{<<"docs_written">> := DocsWritten} -> Info; + #{} -> wait + end + end + end, 10000, 250). + + +wait_scheduler_repid_change(OldRepId) -> + test_util:wait(fun() -> + case couch_replicator_test_helper:scheduler_jobs() of + [] -> + wait; + [#{<<"id">> := OldRepId}] -> + wait; + [#{<<"id">> := null}] -> + wait; + [#{<<"id">> := NewId} = Info] when is_binary(NewId) -> + Info + end + end, 10000, 250). diff --git a/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl index c4ad4e9b6..6c61446cc 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl @@ -12,17 +12,13 @@ -module(couch_replicator_httpc_pool_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). - --define(TIMEOUT, 1000). +-include_lib("fabric/test/fabric2_test.hrl"). -setup() -> - spawn_pool(). - -teardown(Pool) -> - stop_pool(Pool). +-define(TIMEOUT, 1000). httpc_pool_test_() -> @@ -30,75 +26,81 @@ httpc_pool_test_() -> "httpc pool tests", { setup, - fun() -> test_util:start_couch([couch_replicator]) end, fun test_util:stop_couch/1, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { foreach, - fun setup/0, fun teardown/1, + fun setup/0, + fun teardown/1, [ - fun should_block_new_clients_when_full/1, - fun should_replace_worker_on_death/1 + ?TDEF_FE(should_block_new_clients_when_full), + ?TDEF_FE(should_replace_worker_on_death) ] } } }. +setup() -> + spawn_pool(). + + +teardown(Pool) -> + stop_pool(Pool). + + should_block_new_clients_when_full(Pool) -> - ?_test(begin - Client1 = spawn_client(Pool), - Client2 = spawn_client(Pool), - Client3 = spawn_client(Pool), + Client1 = spawn_client(Pool), + Client2 = spawn_client(Pool), + Client3 = spawn_client(Pool), + + ?assertEqual(ok, ping_client(Client1)), + ?assertEqual(ok, ping_client(Client2)), + ?assertEqual(ok, ping_client(Client3)), - ?assertEqual(ok, ping_client(Client1)), - ?assertEqual(ok, ping_client(Client2)), - ?assertEqual(ok, ping_client(Client3)), + Worker1 = get_client_worker(Client1, "1"), + Worker2 = get_client_worker(Client2, "2"), + Worker3 = get_client_worker(Client3, "3"), - Worker1 = get_client_worker(Client1, "1"), - Worker2 = get_client_worker(Client2, "2"), - Worker3 = get_client_worker(Client3, "3"), + ?assert(is_process_alive(Worker1)), + ?assert(is_process_alive(Worker2)), + ?assert(is_process_alive(Worker3)), - ?assert(is_process_alive(Worker1)), - ?assert(is_process_alive(Worker2)), - ?assert(is_process_alive(Worker3)), + ?assertNotEqual(Worker1, Worker2), + ?assertNotEqual(Worker2, Worker3), + ?assertNotEqual(Worker3, Worker1), - ?assertNotEqual(Worker1, Worker2), - ?assertNotEqual(Worker2, Worker3), - ?assertNotEqual(Worker3, Worker1), + Client4 = spawn_client(Pool), + ?assertEqual(timeout, ping_client(Client4)), - Client4 = spawn_client(Pool), - ?assertEqual(timeout, ping_client(Client4)), + ?assertEqual(ok, stop_client(Client1)), + ?assertEqual(ok, ping_client(Client4)), - ?assertEqual(ok, stop_client(Client1)), - ?assertEqual(ok, ping_client(Client4)), + Worker4 = get_client_worker(Client4, "4"), + ?assertEqual(Worker1, Worker4), - Worker4 = get_client_worker(Client4, "4"), - ?assertEqual(Worker1, Worker4), + lists:foreach(fun(C) -> + ?assertEqual(ok, stop_client(C)) + end, [Client2, Client3, Client4]). - lists:foreach( - fun(C) -> - ?assertEqual(ok, stop_client(C)) - end, [Client2, Client3, Client4]) - end). should_replace_worker_on_death(Pool) -> - ?_test(begin - Client1 = spawn_client(Pool), - ?assertEqual(ok, ping_client(Client1)), - Worker1 = get_client_worker(Client1, "1"), - ?assert(is_process_alive(Worker1)), + Client1 = spawn_client(Pool), + ?assertEqual(ok, ping_client(Client1)), + Worker1 = get_client_worker(Client1, "1"), + ?assert(is_process_alive(Worker1)), - ?assertEqual(ok, kill_client_worker(Client1)), - ?assertNot(is_process_alive(Worker1)), - ?assertEqual(ok, stop_client(Client1)), + ?assertEqual(ok, kill_client_worker(Client1)), + ?assertNot(is_process_alive(Worker1)), + ?assertEqual(ok, stop_client(Client1)), - Client2 = spawn_client(Pool), - ?assertEqual(ok, ping_client(Client2)), - Worker2 = get_client_worker(Client2, "2"), - ?assert(is_process_alive(Worker2)), + Client2 = spawn_client(Pool), + ?assertEqual(ok, ping_client(Client2)), + Worker2 = get_client_worker(Client2, "2"), + ?assert(is_process_alive(Worker2)), - ?assertNotEqual(Worker1, Worker2), - ?assertEqual(ok, stop_client(Client2)) - end). + ?assertNotEqual(Worker1, Worker2), + ?assertEqual(ok, stop_client(Client2)). spawn_client(Pool) -> @@ -110,6 +112,7 @@ spawn_client(Pool) -> end), {Pid, Ref}. + ping_client({Pid, Ref}) -> Pid ! ping, receive @@ -119,18 +122,18 @@ ping_client({Pid, Ref}) -> timeout end. + get_client_worker({Pid, Ref}, ClientName) -> Pid ! get_worker, receive {worker, Ref, Worker} -> Worker after ?TIMEOUT -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, "Timeout getting client " ++ ClientName ++ " worker"}]}) + erlang:error({assertion_failed, [{module, ?MODULE}, {line, ?LINE}, + {reason, "Timeout getting client " ++ ClientName ++ " worker"}]}) end. + stop_client({Pid, Ref}) -> Pid ! stop, receive @@ -140,6 +143,7 @@ stop_client({Pid, Ref}) -> timeout end. + kill_client_worker({Pid, Ref}) -> Pid ! get_worker, receive @@ -150,6 +154,7 @@ kill_client_worker({Pid, Ref}) -> timeout end. + loop(Parent, Ref, Worker, Pool) -> receive ping -> @@ -163,12 +168,14 @@ loop(Parent, Ref, Worker, Pool) -> Parent ! {stop, Ref} end. + spawn_pool() -> - Host = config:get("httpd", "bind_address", "127.0.0.1"), - Port = config:get("httpd", "port", "5984"), + Host = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = config:get("chttpd", "port", "5984"), {ok, Pool} = couch_replicator_httpc_pool:start_link( "http://" ++ Host ++ ":" ++ Port, [{max_connections, 3}]), Pool. + stop_pool(Pool) -> ok = couch_replicator_httpc_pool:stop(Pool). diff --git a/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl index a4696c4b8..3a0e6f7bd 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl @@ -15,76 +15,57 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - - -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_doc(Source), - Target = create_db(), - {Ctx, {Source, Target}}. - - -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - config:set("replicator", "max_document_id_length", "infinity"), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). id_too_long_replication_test_() -> - Pairs = [{remote, remote}], { "Doc id too long tests", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] ++ - [{Pair, fun should_fail/2} || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_succeed), + ?TDEF_FE(should_fail) + + ] + } } }. -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("replicator", "max_document_id_length", "5"), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). - +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_doc(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -should_fail({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("replicator", "max_document_id_length", "4"), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertError({badmatch, {not_found, missing}}, - couch_replicator_test_helper:compare_dbs(Source, Target)). +teardown({Source, Target}) -> + config:delete("replicator", "max_document_id_length", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. +should_succeed({Source, Target}) -> + config:set("replicator", "max_document_id_length", "5", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). -create_doc(DbName) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), - Doc = couch_doc:from_json_obj({[{<<"_id">>, <<"12345">>}]}), - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db). +should_fail({Source, Target}) -> + config:set("replicator", "max_document_id_length", "4", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ExceptIds = [<<"12345">>], + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target, + ExceptIds)). -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). +create_doc(DbName) -> + Docs = [#{<<"_id">> => <<"12345">>}], + couch_replicator_test_helper:create_docs(DbName, Docs). diff --git a/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl new file mode 100644 index 000000000..698a84400 --- /dev/null +++ b/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl @@ -0,0 +1,437 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_job_server_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(SHUTDOWN_TIMEOUT, 1000). +-define(JOB_SERVER, couch_replicator_job_server). + + +job_server_test_() -> + { + "Test job server", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_start_up), + ?TDEF_FE(reschedule_resets_timer), + ?TDEF_FE(reschedule_reads_config), + ?TDEF_FE(acceptors_spawned_if_pending), + ?TDEF_FE(acceptors_not_spawned_if_no_pending), + ?TDEF_FE(acceptors_not_spawned_if_no_max_churn), + ?TDEF_FE(acceptors_not_spawned_if_no_churn_budget), + ?TDEF_FE(acceptors_spawned_on_acceptor_exit), + ?TDEF_FE(acceptor_turns_into_worker), + ?TDEF_FE(acceptors_spawned_on_worker_exit), + ?TDEF_FE(excess_acceptors_spawned), + ?TDEF_FE(excess_workers_trimmed_on_reschedule), + ?TDEF_FE(recent_workers_are_not_stopped) + ] + } + } + }. + + +setup_all() -> + Ctx = test_util:start_couch(), + meck:new(couch_replicator_job_server, [passthrough]), + mock_pending(0), + meck:expect(couch_replicator_jobs, set_timeout, 0, ok), + meck:expect(couch_replicator_jobs, fold_jobs, 3, ok), + meck:expect(couch_replicator_job, start_link, fun() -> + {ok, spawn_link(fun() -> start_job() end)} + end), + Ctx. + + +teardown_all(Ctx) -> + meck:unload(), + config_delete("interval_sec"), + config_delete("max_acceptors"), + config_delete("max_jobs"), + config_delete("max_churn"), + config_delete("min_run_time_sec"), + config_delete("transient_job_max_age_sec"), + test_util:stop_couch(Ctx). + + +setup() -> + config_set("interval_sec", "99999"), + config_set("max_acceptors", "0"), + config_set("max_jobs", "0"), + config_set("max_churn", "1"), + config_set("min_run_time_sec", "0"), + config_set("transient_job_max_age_sec", "99999"), + + mock_pending(0), + + {ok, SPid} = ?JOB_SERVER:start_link(?SHUTDOWN_TIMEOUT), + SPid. + + +teardown(SPid) when is_pid(SPid) -> + unlink(SPid), + Ref = monitor(process, SPid), + exit(SPid, kill), + receive {'DOWN', Ref, _, _, _} -> ok end, + + meck:reset(couch_replicator_jobs), + meck:reset(couch_replicator_job), + meck:reset(couch_replicator_job_server), + + config_delete("interval_sec"), + config_delete("max_acceptors"), + config_delete("max_jobs"), + config_delete("max_churn"), + config_delete("min_run_time_sec"), + config_delete("transient_job_max_age_sec"). + + +should_start_up(SPid) -> + ?assert(is_process_alive(SPid)), + ?assertEqual(SPid, whereis(?JOB_SERVER)), + State = sys:get_state(?JOB_SERVER), + #{ + acceptors := #{}, + workers := #{}, + churn := 0, + config := Config, + timer := Timer, + timeout := ?SHUTDOWN_TIMEOUT + } = State, + + % Make sure it read the config + ?assertMatch(#{ + max_acceptors := 0, + interval_sec := 99999, + max_jobs := 0, + max_churn := 1, + min_run_time_sec := 0, + transient_job_max_age_sec := 99999 + }, Config), + + % Timer was set up + ?assert(is_reference(Timer)), + ?assert(is_integer(erlang:read_timer(Timer))). + + +reschedule_resets_timer(_) -> + #{timer := OldTimer} = sys:get_state(?JOB_SERVER), + + ?assertEqual(ok, ?JOB_SERVER:reschedule()), + + #{timer := Timer} = sys:get_state(?JOB_SERVER), + ?assert(is_reference(Timer)), + ?assert(Timer =/= OldTimer). + + +reschedule_reads_config(_) -> + config_set("interval_sec", "99998"), + + ?JOB_SERVER:reschedule(), + + #{config := Config} = sys:get_state(?JOB_SERVER), + ?assertMatch(#{interval_sec := 99998}, Config). + + +acceptors_spawned_if_pending(_) -> + config_set("max_acceptors", "1"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + ?assertMatch([Pid] when is_pid(Pid), acceptors()). + + +acceptors_not_spawned_if_no_pending(_) -> + config_set("max_acceptors", "1"), + mock_pending(0), + + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()). + + +acceptors_not_spawned_if_no_max_churn(_) -> + config_set("max_churn", "0"), + config_set("max_acceptors", "1"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()). + + +acceptors_not_spawned_if_no_churn_budget(_) -> + config_set("max_churn", "1"), + config_set("max_acceptors", "1"), + mock_pending(0), + + % To read the config + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()), + + mock_pending(1), + + % Exhaust churn budget + sys:replace_state(couch_replicator_job_server, fun(#{} = St) -> + St#{churn := 1} + end), + + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()). + + +acceptors_spawned_on_acceptor_exit(_) -> + config_set("max_acceptors", "3"), + config_set("max_jobs", "4"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + [A1] = acceptors(), + + exit(A1, kill), + meck:wait(?JOB_SERVER, handle_info, [{'EXIT', A1, killed}, '_'], 2000), + + ?assertEqual(3, length(acceptors())). + + +acceptor_turns_into_worker(_) -> + config_set("max_acceptors", "3"), + config_set("max_jobs", "4"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + [A1] = acceptors(), + accept_job(A1, true), + ?assertEqual(3, length(acceptors())), + #{workers := Workers} = sys:get_state(?JOB_SERVER), + ?assertMatch([{A1, {true, _}}], maps:to_list(Workers)). + + +acceptors_spawned_on_worker_exit(_) -> + config_set("max_acceptors", "1"), + config_set("max_jobs", "1"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + [A1] = acceptors(), + accept_job(A1, true), + + % Since max_jobs = 1 no more acceptors are spawned + ?assertEqual(0, length(acceptors())), + + % Same acceptor process is now a worker + ?assertEqual([A1], workers()), + + exit(A1, shutdown), + meck:wait(?JOB_SERVER, handle_info, [{'EXIT', A1, shutdown}, '_'], 2000), + + % New acceptor process started + ?assertEqual(1, length(acceptors())), + ?assertEqual(0, length(workers())). + + +excess_acceptors_spawned(_) -> + config_set("max_acceptors", "2"), + config_set("max_churn", "3"), + config_set("max_jobs", "4"), + mock_pending(100), + + ?JOB_SERVER:reschedule(), + + ?assertEqual(3, length(acceptors())), + + accept_all(), + + ?assertEqual(3, length(workers())), + ?assertEqual(1, length(acceptors())), + % Check that the churn budget was consumed + ?assertMatch(#{churn := 3}, sys:get_state(?JOB_SERVER)), + + accept_all(), + + % No more acceptors spawned after reaching max_jobs + ?assertEqual(0, length(acceptors())), + ?assertEqual(4, length(workers())), + + ?JOB_SERVER:reschedule(), + + % Since all churn budget was consumed, no new acceptors should have beens + % spawned this cycle but churn budget should have been reset + ?assertEqual(0, length(acceptors())), + ?assertEqual(4, length(workers())), + ?assertMatch(#{churn := 0}, sys:get_state(?JOB_SERVER)), + + ?JOB_SERVER:reschedule(), + + % Should have spawned 3 excess acceptors + ?assertEqual(3, length(acceptors())), + ?assertEqual(4, length(workers())), + + accept_all(), + + % Running with an excess number of workers + ?assertEqual(0, length(acceptors())), + ?assertEqual(7, length(workers())). + + +excess_workers_trimmed_on_reschedule(_) -> + config_set("max_acceptors", "2"), + config_set("max_churn", "3"), + config_set("max_jobs", "4"), + mock_pending(100), + + ?JOB_SERVER:reschedule(), + + [A1, A2, A3] = acceptors(), + accept_job(A1, true), + accept_job(A2, false), + accept_job(A3, false), + [A4] = acceptors(), + accept_job(A4, true), + + ?JOB_SERVER:reschedule(), + + % First reschedule was to reset the churn budget, this next one is to spawn + % an excess number of acceptors. + ?JOB_SERVER:reschedule(), + + [A5, A6, A7] = acceptors(), + accept_job(A5, true), + accept_job(A6, false), + accept_job(A7, false), + + ?assertEqual(7, length(workers())), + + % Running with an excess number of workers. These should be trimmed on the + % during the next cycle + ?JOB_SERVER:reschedule(), + + Workers = workers(), + ?assertEqual(4, length(Workers)), + ?assertEqual(0, length(acceptors())), + + % Check that A1 and A4 were skipped since they are not continuous + ?assertEqual(Workers, Workers -- [A2, A3, A6]). + + +recent_workers_are_not_stopped(_) -> + config_set("max_acceptors", "2"), + config_set("max_churn", "3"), + config_set("max_jobs", "4"), + mock_pending(100), + + ?JOB_SERVER:reschedule(), + + [A1, A2, A3] = acceptors(), + accept_job(A1, true), + accept_job(A2, false), + accept_job(A3, false), + [A4] = acceptors(), + accept_job(A4, true), + + ?JOB_SERVER:reschedule(), + + % First reschedule was to reset the churn budget, this next one is to spawn + % an excess number of acceptors. + ?JOB_SERVER:reschedule(), + + [A5, A6, A7] = acceptors(), + accept_job(A5, true), + accept_job(A6, false), + accept_job(A7, false), + + ?assertEqual(7, length(workers())), + + % Running with an excess number of workers. But they won't be stopped on + % reschedule if they ran for a period less than min_run_time_sec during the + % next cycle + config_set("min_run_time_sec", "9999"), + + % don't want to start new acceptors anymore + mock_pending(0), + config_set("max_acceptors", "0"), + + ?JOB_SERVER:reschedule(), + + ?assertEqual(7, length(workers())), + ?assertEqual(0, length(acceptors())), + + config_set("min_run_time_sec", "0"), + + ?JOB_SERVER:reschedule(), + + ?assertEqual(4, length(workers())), + ?assertEqual(0, length(acceptors())). + + +config_set(K, V) -> + config:set("replicator", K, V, _Persist = false). + + +config_delete(K) -> + config:delete("replicator", K, _Persist = false). + + +mock_pending(N) -> + meck:expect(couch_replicator_jobs, pending_count, 2, N). + + +acceptors() -> + #{acceptors := Acceptors} = sys:get_state(?JOB_SERVER), + maps:keys(Acceptors). + + +workers() -> + #{workers := Workers} = sys:get_state(?JOB_SERVER), + maps:keys(Workers). + + +accept_job(APid, Normal) -> + APid ! {accept_job, Normal, self()}, + receive + {job_accepted, APid} -> ok + after + 5000 -> + error(test_job_accept_timeout) + end. + + +accept_all() -> + [accept_job(APid, true) || APid <- acceptors()]. + + +start_job() -> + receive + {accept_job, Normal, From} -> + ok = ?JOB_SERVER:accepted(self(), Normal), + From ! {job_accepted, self()}, + start_job(); + {exit_job, ExitSig} -> + exit(ExitSig) + end. diff --git a/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl index 27c89a0cd..fcbdf229f 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl @@ -14,12 +14,8 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/2, - compare_dbs/2 -]). -define(ATT_SIZE_1, 2 * 1024 * 1024). -define(ATT_SIZE_2, round(6.6 * 1024 * 1024)). @@ -27,90 +23,65 @@ -define(TIMEOUT_EUNIT, 120). -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - config:set("attachments", "compressible_types", "text/*", false), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - large_atts_test_() -> - Pairs = [{remote, remote}], { - "Replicate docs with large attachments", + "Large attachment replication test", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_populate_replicate_compact/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_attachments, 120) + ] + } } }. -should_populate_replicate_compact({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target)]}}. +setup() -> + AttCfg = config:get("attachments", "compressible_types"), + config:set("attachments", "compressible_types", "text/*", false), + Source = couch_replicator_test_helper:create_db(), + ok = populate_db(Source, ?DOCS_COUNT), + Target = couch_replicator_test_helper:create_db(), + {AttCfg, Source, Target}. + -should_populate_source({remote, Source}) -> - should_populate_source(Source); -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source, ?DOCS_COUNT))}. +teardown({AttCfg, Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target), + case AttCfg of + undefined -> + config:delete("attachments", "compressible_types", false); + _ -> + config:set("attachments", "compressible_types", AttCfg) + end. -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target))}. +should_replicate_attachments({_AttCfg, Source, Target}) -> + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(Source, Target)), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). populate_db(DbName, DocCount) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Doc = #doc{ - id = iolist_to_binary(["doc", integer_to_list(DocIdCounter)]), - body = {[]}, - atts = [ - att(<<"att1">>, ?ATT_SIZE_1, <<"text/plain">>), - att(<<"att2">>, ?ATT_SIZE_2, <<"app/binary">>) - ] - }, - [Doc | Acc] - end, - [], lists:seq(1, DocCount)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - couch_db:close(Db). + Docs = lists:foldl(fun(DocIdCounter, Acc) -> + Doc = #doc{ + id = iolist_to_binary(["doc", integer_to_list(DocIdCounter)]), + body = {[]}, + atts = [ + att(<<"att1">>, ?ATT_SIZE_1, <<"text/plain">>), + att(<<"att2">>, ?ATT_SIZE_2, <<"app/binary">>) + ] + }, + [Doc | Acc] + end, [], lists:seq(1, DocCount)), + couch_replicator_test_helper:create_docs(DbName, Docs). + att(Name, Size, Type) -> couch_att:new([ diff --git a/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl index c7933b472..3dbfa6aba 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl @@ -14,11 +14,8 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/2 -]). -define(DOCS_CONFLICTS, [ {<<"doc1">>, 10}, @@ -28,178 +25,150 @@ {<<"doc3">>, 210} ]). -define(NUM_ATTS, 2). --define(TIMEOUT_EUNIT, 60). -define(i2l(I), integer_to_list(I)). -define(io2b(Io), iolist_to_binary(Io)). -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). docs_with_many_leaves_test_() -> - Pairs = [{remote, remote}], { "Replicate documents with many leaves", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_populate_replicate_compact/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_doc_with_many_leaves, 180) + ] + } } }. -should_populate_replicate_compact({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_populate_source(Source), - should_replicate(Source, Target), - should_verify_target(Source, Target), - should_add_attachments_to_source(Source), - should_replicate(Source, Target), - should_verify_target(Source, Target) - ]}}. - -should_populate_source({remote, Source}) -> - should_populate_source(Source); -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source))}. - -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. - -should_verify_target({remote, Source}, Target) -> - should_verify_target(Source, Target); -should_verify_target(Source, {remote, Target}) -> - should_verify_target(Source, Target); -should_verify_target(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(begin - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - verify_target(SourceDb, TargetDb, ?DOCS_CONFLICTS), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb) - end)}. - -should_add_attachments_to_source({remote, Source}) -> - should_add_attachments_to_source(Source); -should_add_attachments_to_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(begin - {ok, SourceDb} = couch_db:open_int(Source, [?ADMIN_CTX]), - add_attachments(SourceDb, ?NUM_ATTS, ?DOCS_CONFLICTS), - ok = couch_db:close(SourceDb) - end)}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + populate_db(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. + + +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +should_replicate_doc_with_many_leaves({Source, Target}) -> + replicate(Source, Target), + {ok, SourceDb} = fabric2_db:open(Source, [?ADMIN_CTX]), + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + verify_target(SourceDb, TargetDb, ?DOCS_CONFLICTS), + add_attachments(SourceDb, ?NUM_ATTS, ?DOCS_CONFLICTS), + replicate(Source, Target), + verify_target(SourceDb, TargetDb, ?DOCS_CONFLICTS). + populate_db(DbName) -> - {ok, Db} = couch_db:open_int(DbName, [?ADMIN_CTX]), - lists:foreach( - fun({DocId, NumConflicts}) -> - Value = <<"0">>, - Doc = #doc{ - id = DocId, - body = {[ {<<"value">>, Value} ]} - }, - {ok, _} = couch_db:update_doc(Db, Doc, [?ADMIN_CTX]), - {ok, _} = add_doc_siblings(Db, DocId, NumConflicts) - end, ?DOCS_CONFLICTS), - couch_db:close(Db). - -add_doc_siblings(Db, DocId, NumLeaves) when NumLeaves > 0 -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + lists:foreach(fun({DocId, NumConflicts}) -> + Doc = #doc{ + id = DocId, + body = {[{<<"value">>, <<"0">>}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc), + {ok, _} = add_doc_siblings(Db, DocId, NumConflicts) + end, ?DOCS_CONFLICTS). + + +add_doc_siblings(#{} = Db, DocId, NumLeaves) when NumLeaves > 0 -> add_doc_siblings(Db, DocId, NumLeaves, [], []). -add_doc_siblings(Db, _DocId, 0, AccDocs, AccRevs) -> - {ok, []} = couch_db:update_docs(Db, AccDocs, [], replicated_changes), + +add_doc_siblings(#{} = Db, _DocId, 0, AccDocs, AccRevs) -> + {ok, []} = fabric2_db:update_docs(Db, AccDocs, [replicated_changes]), {ok, AccRevs}; -add_doc_siblings(Db, DocId, NumLeaves, AccDocs, AccRevs) -> +add_doc_siblings(#{} = Db, DocId, NumLeaves, AccDocs, AccRevs) -> Value = ?l2b(?i2l(NumLeaves)), Rev = couch_hash:md5_hash(Value), Doc = #doc{ id = DocId, revs = {1, [Rev]}, - body = {[ {<<"value">>, Value} ]} + body = {[{<<"value">>, Value}]} }, add_doc_siblings(Db, DocId, NumLeaves - 1, - [Doc | AccDocs], [{1, Rev} | AccRevs]). + [Doc | AccDocs], [{1, Rev} | AccRevs]). + verify_target(_SourceDb, _TargetDb, []) -> ok; -verify_target(SourceDb, TargetDb, [{DocId, NumConflicts} | Rest]) -> - {ok, SourceLookups} = couch_db:open_doc_revs( - SourceDb, - DocId, - all, - [conflicts, deleted_conflicts]), - {ok, TargetLookups} = couch_db:open_doc_revs( - TargetDb, - DocId, - all, - [conflicts, deleted_conflicts]), + +verify_target(#{} = SourceDb, #{} = TargetDb, + [{DocId, NumConflicts} | Rest]) -> + Opts = [conflicts, deleted_conflicts], + {ok, SourceLookups} = open_doc_revs(SourceDb, DocId, Opts), + {ok, TargetLookups} = open_doc_revs(TargetDb, DocId, Opts), SourceDocs = [Doc || {ok, Doc} <- SourceLookups], TargetDocs = [Doc || {ok, Doc} <- TargetLookups], Total = NumConflicts + 1, ?assertEqual(Total, length(TargetDocs)), - lists:foreach( - fun({SourceDoc, TargetDoc}) -> - SourceJson = couch_doc:to_json_obj(SourceDoc, [attachments]), - TargetJson = couch_doc:to_json_obj(TargetDoc, [attachments]), - ?assertEqual(SourceJson, TargetJson) - end, - lists:zip(SourceDocs, TargetDocs)), + lists:foreach(fun({SourceDoc, TargetDoc}) -> + ?assertEqual(json_doc(SourceDoc), json_doc(TargetDoc)) + end, lists:zip(SourceDocs, TargetDocs)), verify_target(SourceDb, TargetDb, Rest). -add_attachments(_SourceDb, _NumAtts, []) -> + +add_attachments(_SourceDb, _NumAtts, []) -> ok; -add_attachments(SourceDb, NumAtts, [{DocId, NumConflicts} | Rest]) -> - {ok, SourceLookups} = couch_db:open_doc_revs(SourceDb, DocId, all, []), + +add_attachments(#{} = SourceDb, NumAtts, + [{DocId, NumConflicts} | Rest]) -> + {ok, SourceLookups} = open_doc_revs(SourceDb, DocId, []), SourceDocs = [Doc || {ok, Doc} <- SourceLookups], Total = NumConflicts + 1, ?assertEqual(Total, length(SourceDocs)), - NewDocs = lists:foldl( - fun(#doc{atts = Atts, revs = {Pos, [Rev | _]}} = Doc, Acc) -> + NewDocs = lists:foldl(fun + (#doc{atts = Atts, revs = {Pos, [Rev | _]}} = Doc, Acc) -> NewAtts = lists:foldl(fun(I, AttAcc) -> - AttData = crypto:strong_rand_bytes(100), - NewAtt = couch_att:new([ - {name, ?io2b(["att_", ?i2l(I), "_", - couch_doc:rev_to_str({Pos, Rev})])}, - {type, <<"application/foobar">>}, - {att_len, byte_size(AttData)}, - {data, AttData} - ]), - [NewAtt | AttAcc] + [att(I, {Pos, Rev}, 100) | AttAcc] end, [], lists:seq(1, NumAtts)), [Doc#doc{atts = Atts ++ NewAtts} | Acc] - end, - [], SourceDocs), - {ok, UpdateResults} = couch_db:update_docs(SourceDb, NewDocs, []), - NewRevs = [R || {ok, R} <- UpdateResults], - ?assertEqual(length(NewDocs), length(NewRevs)), + end, [], SourceDocs), + lists:foreach(fun(#doc{} = Doc) -> + ?assertMatch({ok, _}, fabric2_db:update_doc(SourceDb, Doc)) + end, NewDocs), add_attachments(SourceDb, NumAtts, Rest). + +att(I, PosRev, Size) -> + Name = ?io2b(["att_", ?i2l(I), "_", couch_doc:rev_to_str(PosRev)]), + AttData = crypto:strong_rand_bytes(Size), + couch_att:new([ + {name, Name}, + {type, <<"application/foobar">>}, + {att_len, byte_size(AttData)}, + {data, AttData} + ]). + + +open_doc_revs(#{} = Db, DocId, Opts) -> + fabric2_db:open_doc_revs(Db, DocId, all, Opts). + + +json_doc(#doc{} = Doc) -> + couch_doc:to_json_obj(Doc, [attachments]). + + +replicate(Source, Target) -> + % Serialize the concurrent updates of the same document in order + % to prevent having to set higher timeouts due to FDB conflicts + RepObject = #{ + <<"source">> => Source, + <<"target">> => Target, + <<"worker_processes">> => 1, + <<"http_connections">> => 1 + }, + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(RepObject)). diff --git a/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl index ff08b5ee5..e672c76b7 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl @@ -14,103 +14,59 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/2, - compare_dbs/2 -]). -define(REVS_LIMIT, 3). --define(TIMEOUT_EUNIT, 30). -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). missing_stubs_test_() -> - Pairs = [{remote, remote}], { "Replicate docs with missing stubs (COUCHDB-1365)", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_replicate_docs_with_missed_att_stubs/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_docs_with_missed_att_stubs, 60) + ] + } } }. -should_replicate_docs_with_missed_att_stubs({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_populate_source(Source), - should_set_target_revs_limit(Target, ?REVS_LIMIT), - should_replicate(Source, Target), - should_compare_databases(Source, Target), - should_update_source_docs(Source, ?REVS_LIMIT * 2), - should_replicate(Source, Target), - should_compare_databases(Source, Target) - ]}}. - -should_populate_source({remote, Source}) -> - should_populate_source(Source); -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source))}. - -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. - -should_set_target_revs_limit({remote, Target}, RevsLimit) -> - should_set_target_revs_limit(Target, RevsLimit); -should_set_target_revs_limit(Target, RevsLimit) -> - ?_test(begin - {ok, Db} = couch_db:open_int(Target, [?ADMIN_CTX]), - ?assertEqual(ok, couch_db:set_revs_limit(Db, RevsLimit)), - ok = couch_db:close(Db) - end). - -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target))}. - -should_update_source_docs({remote, Source}, Times) -> - should_update_source_docs(Source, Times); -should_update_source_docs(Source, Times) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(update_db_docs(Source, Times))}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + populate_db(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. + + +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +should_replicate_docs_with_missed_att_stubs({Source, Target}) -> + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + ?assertEqual(ok, fabric2_db:set_revs_limit(TargetDb, ?REVS_LIMIT)), + + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(Source, Target)), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)), + + ok = update_db_docs(Source, ?REVS_LIMIT * 2), + + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(Source, Target)), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). populate_db(DbName) -> - {ok, Db} = couch_db:open_int(DbName, []), AttData = crypto:strong_rand_bytes(6000), Doc = #doc{ id = <<"doc1">>, @@ -120,35 +76,40 @@ populate_db(DbName) -> {type, <<"application/foobar">>}, {att_len, byte_size(AttData)}, {data, AttData} - ]) + ]) ] }, - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db). + couch_replicator_test_helper:create_docs(DbName, [Doc]). + update_db_docs(DbName, Times) -> - {ok, Db} = couch_db:open_int(DbName, []), - {ok, _} = couch_db:fold_docs( - Db, - fun(FDI, Acc) -> db_fold_fun(FDI, Acc) end, - {DbName, Times}, - []), - ok = couch_db:close(Db). - -db_fold_fun(FullDocInfo, {DbName, Times}) -> - {ok, Db} = couch_db:open_int(DbName, []), - {ok, Doc} = couch_db:open_doc(Db, FullDocInfo), - lists:foldl( - fun(_, {Pos, RevId}) -> - {ok, Db2} = couch_db:reopen(Db), - NewDocVersion = Doc#doc{ - revs = {Pos, [RevId]}, - body = {[{<<"value">>, base64:encode(crypto:strong_rand_bytes(100))}]} - }, - {ok, NewRev} = couch_db:update_doc(Db2, NewDocVersion, []), - NewRev - end, - {element(1, Doc#doc.revs), hd(element(2, Doc#doc.revs))}, - lists:seq(1, Times)), - ok = couch_db:close(Db), - {ok, {DbName, Times}}. + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + FoldFun = fun + ({meta, _Meta}, Acc) -> + {ok, Acc}; + (complete, Acc) -> + {ok, Acc}; + ({row, Row}, Acc) -> + {_, DocId} = lists:keyfind(id, 1, Row), + ok = update_doc(DbName, DocId, Times), + {ok, Acc} + end, + Opts = [{restart_tx, true}], + {ok, _} = fabric2_db:fold_docs(Db, FoldFun, ok, Opts), + ok. + + +update_doc(_DbName, _DocId, 0) -> + ok; + +update_doc(DbName, DocId, Times) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Doc} = fabric2_db:open_doc(Db, DocId, []), + #doc{revs = {Pos, [Rev | _]}} = Doc, + Val = base64:encode(crypto:strong_rand_bytes(100)), + Doc1 = Doc#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"value">>, Val}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + update_doc(DbName, DocId, Times - 1). diff --git a/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl index da46b8a26..f5e745d90 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl @@ -14,15 +14,7 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). - - -setup() -> - ok. - - -teardown(_) -> - ok. +-include_lib("fabric/test/fabric2_test.hrl"). replicator_proxy_test_() -> @@ -30,87 +22,78 @@ replicator_proxy_test_() -> "replicator proxy tests", { setup, - fun() -> test_util:start_couch([couch_replicator]) end, fun test_util:stop_couch/1, - { - foreach, - fun setup/0, fun teardown/1, - [ - fun parse_rep_doc_without_proxy/1, - fun parse_rep_doc_with_proxy/1, - fun parse_rep_source_target_proxy/1, - fun mutually_exclusive_proxy_and_source_proxy/1, - fun mutually_exclusive_proxy_and_target_proxy/1 - ] - } + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + with([ + ?TDEF(parse_rep_doc_without_proxy), + ?TDEF(parse_rep_doc_with_proxy), + ?TDEF(parse_rep_source_target_proxy), + ?TDEF(mutually_exclusive_proxy_and_source_proxy), + ?TDEF(mutually_exclusive_proxy_and_target_proxy) + ]) } }. parse_rep_doc_without_proxy(_) -> - ?_test(begin - NoProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>} - ]}, - Rep = couch_replicator_docs:parse_rep_doc(NoProxyDoc), - ?assertEqual((Rep#rep.source)#httpdb.proxy_url, undefined), - ?assertEqual((Rep#rep.target)#httpdb.proxy_url, undefined) - end). + NoProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>} + ]}, + Rep = couch_replicator_parse:parse_rep_doc(NoProxyDoc), + Src = maps:get(?SOURCE, Rep), + Tgt = maps:get(?TARGET, Rep), + ?assertEqual(null, maps:get(<<"proxy_url">>, Src)), + ?assertEqual(null, maps:get(<<"proxy_url">>, Tgt)). parse_rep_doc_with_proxy(_) -> - ?_test(begin - ProxyURL = <<"http://myproxy.com">>, - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"proxy">>, ProxyURL} - ]}, - Rep = couch_replicator_docs:parse_rep_doc(ProxyDoc), - ?assertEqual((Rep#rep.source)#httpdb.proxy_url, binary_to_list(ProxyURL)), - ?assertEqual((Rep#rep.target)#httpdb.proxy_url, binary_to_list(ProxyURL)) - end). + ProxyURL = <<"http://myproxy.com">>, + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"proxy">>, ProxyURL} + ]}, + Rep = couch_replicator_parse:parse_rep_doc(ProxyDoc), + Src = maps:get(?SOURCE, Rep), + Tgt = maps:get(?TARGET, Rep), + ?assertEqual(ProxyURL, maps:get(<<"proxy_url">>, Src)), + ?assertEqual(ProxyURL, maps:get(<<"proxy_url">>, Tgt)). parse_rep_source_target_proxy(_) -> - ?_test(begin - SrcProxyURL = <<"http://mysrcproxy.com">>, - TgtProxyURL = <<"http://mytgtproxy.com:9999">>, - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"source_proxy">>, SrcProxyURL}, - {<<"target_proxy">>, TgtProxyURL} - ]}, - Rep = couch_replicator_docs:parse_rep_doc(ProxyDoc), - ?assertEqual((Rep#rep.source)#httpdb.proxy_url, - binary_to_list(SrcProxyURL)), - ?assertEqual((Rep#rep.target)#httpdb.proxy_url, - binary_to_list(TgtProxyURL)) - end). + SrcProxyURL = <<"http://mysrcproxy.com">>, + TgtProxyURL = <<"http://mytgtproxy.com:9999">>, + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"source_proxy">>, SrcProxyURL}, + {<<"target_proxy">>, TgtProxyURL} + ]}, + Rep = couch_replicator_parse:parse_rep_doc(ProxyDoc), + Src = maps:get(?SOURCE, Rep), + Tgt = maps:get(?TARGET, Rep), + ?assertEqual(SrcProxyURL, maps:get(<<"proxy_url">>, Src)), + ?assertEqual(TgtProxyURL, maps:get(<<"proxy_url">>, Tgt)). mutually_exclusive_proxy_and_source_proxy(_) -> - ?_test(begin - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"proxy">>, <<"oldstyleproxy.local">>}, - {<<"source_proxy">>, <<"sourceproxy.local">>} - ]}, - ?assertThrow({bad_rep_doc, _}, - couch_replicator_docs:parse_rep_doc(ProxyDoc)) - end). + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"proxy">>, <<"oldstyleproxy.local">>}, + {<<"source_proxy">>, <<"sourceproxy.local">>} + ]}, + ?assertThrow({bad_rep_doc, _}, + couch_replicator_parse:parse_rep_doc(ProxyDoc)). mutually_exclusive_proxy_and_target_proxy(_) -> - ?_test(begin - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"proxy">>, <<"oldstyleproxy.local">>}, - {<<"target_proxy">>, <<"targetproxy.local">>} - ]}, - ?assertThrow({bad_rep_doc, _}, - couch_replicator_docs:parse_rep_doc(ProxyDoc)) - end). + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"proxy">>, <<"oldstyleproxy.local">>}, + {<<"target_proxy">>, <<"targetproxy.local">>} + ]}, + ?assertThrow({bad_rep_doc, _}, + couch_replicator_parse:parse_rep_doc(ProxyDoc)). diff --git a/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl index 034550aec..fb9892017 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl @@ -1,6 +1,7 @@ -module(couch_replicator_rate_limiter_tests). -include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). rate_limiter_test_() -> @@ -9,64 +10,52 @@ rate_limiter_test_() -> fun setup/0, fun teardown/1, [ - t_new_key(), - t_1_failure(), - t_2_failures_back_to_back(), - t_2_failures(), - t_success_threshold(), - t_1_failure_2_successes() + ?TDEF_FE(t_new_key), + ?TDEF_FE(t_1_failure), + ?TDEF_FE(t_2_failures_back_to_back), + ?TDEF_FE(t_2_failures), + ?TDEF_FE(t_success_threshold), + ?TDEF_FE(t_1_failure_2_successes) ] }. -t_new_key() -> - ?_test(begin - ?assertEqual(0, couch_replicator_rate_limiter:interval({"foo", get})) - end). +t_new_key(_) -> + ?assertEqual(0, couch_replicator_rate_limiter:interval({"foo", get})). -t_1_failure() -> - ?_test(begin - ?assertEqual(24, couch_replicator_rate_limiter:failure({"foo", get})) - end). +t_1_failure(_) -> + ?assertEqual(24, couch_replicator_rate_limiter:failure({"foo", get})). -t_2_failures() -> - ?_test(begin - couch_replicator_rate_limiter:failure({"foo", get}), - low_pass_filter_delay(), - Interval = couch_replicator_rate_limiter:failure({"foo", get}), - ?assertEqual(29, Interval) - end). +t_2_failures(_) -> + couch_replicator_rate_limiter:failure({"foo", get}), + low_pass_filter_delay(), + Interval = couch_replicator_rate_limiter:failure({"foo", get}), + ?assertEqual(29, Interval). -t_2_failures_back_to_back() -> - ?_test(begin - couch_replicator_rate_limiter:failure({"foo", get}), - Interval = couch_replicator_rate_limiter:failure({"foo", get}), - ?assertEqual(24, Interval) - end). +t_2_failures_back_to_back(_) -> + couch_replicator_rate_limiter:failure({"foo", get}), + Interval = couch_replicator_rate_limiter:failure({"foo", get}), + ?assertEqual(24, Interval). -t_success_threshold() -> - ?_test(begin - Interval = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(0, Interval), - Interval = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(0, Interval) - end). +t_success_threshold(_) -> + Interval = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(0, Interval), + Interval = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(0, Interval). -t_1_failure_2_successes() -> - ?_test(begin - couch_replicator_rate_limiter:failure({"foo", get}), - low_pass_filter_delay(), - Succ1 = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(20, Succ1), - low_pass_filter_delay(), - Succ2 = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(0, Succ2) - end). +t_1_failure_2_successes(_) -> + couch_replicator_rate_limiter:failure({"foo", get}), + low_pass_filter_delay(), + Succ1 = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(20, Succ1), + low_pass_filter_delay(), + Succ2 = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(0, Succ2). low_pass_filter_delay() -> diff --git a/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl b/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl index 037f37191..4b7c37d9e 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl @@ -15,139 +15,72 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - --define(DELAY, 500). --define(TIMEOUT, 60000). - - -setup_all() -> - test_util:start_couch([couch_replicator, chttpd, mem3, fabric]). - - -teardown_all(Ctx) -> - ok = test_util:stop_couch(Ctx). - - -setup() -> - Source = setup_db(), - Target = setup_db(), - {Source, Target}. +-include_lib("fabric/test/fabric2_test.hrl"). -teardown({Source, Target}) -> - teardown_db(Source), - teardown_db(Target), - ok. +-define(DELAY, 500). stats_retained_test_() -> { setup, - fun setup_all/0, - fun teardown_all/1, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { foreach, fun setup/0, fun teardown/1, [ - fun t_stats_retained_by_scheduler/1, - fun t_stats_retained_on_job_removal/1 + ?TDEF_FE(t_stats_retained_on_job_removal, 60) ] } }. -t_stats_retained_by_scheduler({Source, Target}) -> - ?_test(begin - {ok, _} = add_vdu(Target), - populate_db_reject_even_docs(Source, 1, 10), - {ok, RepPid, RepId} = replicate(Source, Target), - wait_target_in_sync(6, Target), - - check_active_tasks(10, 5, 5), - check_scheduler_jobs(10, 5, 5), +setup() -> + Source = couch_replicator_test_helper:create_db(), + Target = couch_replicator_test_helper:create_db(), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "checkpoint_interval", "1000", false), + {Source, Target}. - stop_job(RepPid), - check_scheduler_jobs(10, 5, 5), - start_job(), - check_active_tasks(10, 5, 5), - check_scheduler_jobs(10, 5, 5), - couch_replicator_scheduler:remove_job(RepId) - end). +teardown({Source, Target}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "checkpoint_interval", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). t_stats_retained_on_job_removal({Source, Target}) -> - ?_test(begin - {ok, _} = add_vdu(Target), - populate_db_reject_even_docs(Source, 1, 10), - {ok, _, RepId} = replicate(Source, Target), - wait_target_in_sync(6, Target), % 5 + 1 vdu - - check_active_tasks(10, 5, 5), - check_scheduler_jobs(10, 5, 5), + {ok, _} = add_vdu(Target), + populate_db_reject_even_docs(Source, 1, 10), + {ok, Pid1, RepId} = replicate(Source, Target), + wait_target_in_sync(6, Target), % 5 + 1 vdu - couch_replicator_scheduler:remove_job(RepId), + check_scheduler_jobs(10, 5, 5), - populate_db_reject_even_docs(Source, 11, 20), - {ok, _, RepId} = replicate(Source, Target), - wait_target_in_sync(11, Target), % 6 + 5 + cancel(RepId, Pid1), - check_scheduler_jobs(20, 10, 10), - check_active_tasks(20, 10, 10), + populate_db_reject_even_docs(Source, 11, 20), + {ok, Pid2, RepId} = replicate(Source, Target), + wait_target_in_sync(11, Target), % 6 + 5 - couch_replicator_scheduler:remove_job(RepId), + check_scheduler_jobs(20, 10, 10), - populate_db_reject_even_docs(Source, 21, 30), - {ok, _, RepId} = replicate(Source, Target), - wait_target_in_sync(16, Target), % 11 + 5 + cancel(RepId, Pid2), - check_scheduler_jobs(30, 15, 15), - check_active_tasks(30, 15, 15), - - couch_replicator_scheduler:remove_job(RepId) - end). + populate_db_reject_even_docs(Source, 21, 30), + {ok, Pid3, RepId} = replicate(Source, Target), + wait_target_in_sync(16, Target), % 11 + 5 + check_scheduler_jobs(30, 15, 15), -setup_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -teardown_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - - -stop_job(RepPid) -> - Ref = erlang:monitor(process, RepPid), - gen_server:cast(couch_replicator_scheduler, {set_max_jobs, 0}), - couch_replicator_scheduler:reschedule(), - receive - {'DOWN', Ref, _, _, _} -> ok - after ?TIMEOUT -> - erlang:error(timeout) - end. - - -start_job() -> - gen_server:cast(couch_replicator_scheduler, {set_max_jobs, 500}), - couch_replicator_scheduler:reschedule(). - - -check_active_tasks(DocsRead, DocsWritten, DocsFailed) -> - RepTask = wait_for_task_status(), - ?assertNotEqual(timeout, RepTask), - ?assertEqual(DocsRead, couch_util:get_value(docs_read, RepTask)), - ?assertEqual(DocsWritten, couch_util:get_value(docs_written, RepTask)), - ?assertEqual(DocsFailed, couch_util:get_value(doc_write_failures, - RepTask)). + cancel(RepId, Pid3). check_scheduler_jobs(DocsRead, DocsWritten, DocFailed) -> - Info = wait_scheduler_info(), + Info = wait_scheduler_info(DocsRead), ?assert(maps:is_key(<<"changes_pending">>, Info)), ?assert(maps:is_key(<<"doc_write_failures">>, Info)), ?assert(maps:is_key(<<"docs_read">>, Info)), @@ -161,27 +94,18 @@ check_scheduler_jobs(DocsRead, DocsWritten, DocFailed) -> ?assertMatch(#{<<"doc_write_failures">> := DocFailed}, Info). -replication_tasks() -> - lists:filter(fun(P) -> - couch_util:get_value(type, P) =:= replication - end, couch_task_status:all()). - - -wait_for_task_status() -> +wait_scheduler_info(DocsRead) -> test_util:wait(fun() -> - case replication_tasks() of - [] -> wait; - [RepTask] -> RepTask - end - end). - - -wait_scheduler_info() -> - test_util:wait(fun() -> - case scheduler_jobs() of - [] -> wait; - [#{<<"info">> := null}] -> wait; - [#{<<"info">> := Info}] -> Info + case couch_replicator_test_helper:scheduler_jobs() of + [] -> + wait; + [#{<<"info">> := null}] -> + wait; + [#{<<"info">> := Info}] -> + case Info of + #{<<"docs_read">> := DocsRead} -> Info; + #{} -> wait + end end end). @@ -197,16 +121,12 @@ populate_db_reject_even_docs(DbName, Start, End) -> populate_db(DbName, Start, End, BodyFun) when is_function(BodyFun, 1) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Id = integer_to_binary(DocIdCounter), - Doc = #doc{id = Id, body = BodyFun(DocIdCounter)}, - [Doc | Acc] - end, - [], lists:seq(Start, End)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db). + Docs = lists:foldl(fun(DocIdCounter, Acc) -> + Id = integer_to_binary(DocIdCounter), + Doc = #doc{id = Id, body = BodyFun(DocIdCounter)}, + [Doc | Acc] + end, [], lists:seq(Start, End)), + couch_replicator_test_helper:create_docs(DbName, Docs). wait_target_in_sync(DocCount, Target) when is_integer(DocCount) -> @@ -215,14 +135,13 @@ wait_target_in_sync(DocCount, Target) when is_integer(DocCount) -> wait_target_in_sync_loop(_DocCount, _TargetName, 0) -> erlang:error({assertion_failed, [ - {module, ?MODULE}, {line, ?LINE}, - {reason, "Could not get source and target databases in sync"} + {module, ?MODULE}, {line, ?LINE}, + {reason, "Could not get source and target databases in sync"} ]}); wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> - {ok, Target} = couch_db:open_int(TargetName, []), - {ok, TargetInfo} = couch_db:get_db_info(Target), - ok = couch_db:close(Target), + {ok, Db} = fabric2_db:open(TargetName, [?ADMIN_CTX]), + {ok, TargetInfo} = fabric2_db:get_db_info(Db), TargetDocCount = couch_util:get_value(doc_count, TargetInfo), case TargetDocCount == DocCount of true -> @@ -234,27 +153,11 @@ wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> replicate(Source, Target) -> - SrcUrl = couch_replicator_test_helper:db_url(Source), - TgtUrl = couch_replicator_test_helper:db_url(Target), - RepObject = {[ - {<<"source">>, SrcUrl}, - {<<"target">>, TgtUrl}, - {<<"continuous">>, true} - ]}, - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - Pid = couch_replicator_test_helper:get_pid(Rep#rep.id), - {ok, Pid, Rep#rep.id}. - - -scheduler_jobs() -> - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - Url = lists:flatten(io_lib:format("http://~s:~b/_scheduler/jobs", [Addr, Port])), - {ok, 200, _, Body} = test_request:get(Url, []), - Json = jiffy:decode(Body, [return_maps]), - maps:get(<<"jobs">>, Json). + couch_replicator_test_helper:replicate_continuous(Source, Target). + + +cancel(RepId, Pid) -> + couch_replicator_test_helper:cancel(RepId, Pid). vdu() -> @@ -274,9 +177,5 @@ add_vdu(DbName) -> {<<"validate_doc_update">>, vdu()} ], Doc = couch_doc:from_json_obj({DocProps}, []), - {ok, Db} = couch_db:open_int(DbName, [?ADMIN_CTX]), - try - {ok, _Rev} = couch_db:update_doc(Db, Doc, []) - after - couch_db:close(Db) - end. + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, _} = fabric2_db:update_doc(Db, Doc, []). diff --git a/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl index 5026c1435..5dfe4ba91 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl @@ -15,103 +15,69 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_docs(Source), - Target = create_db(), - {Ctx, {Source, Target}}. - -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - selector_replication_test_() -> - Pairs = [{remote, remote}], { "Selector filtered replication tests", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_with_selector) + ] + } } }. -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, - {<<"selector">>, {[{<<"_id">>, <<"doc2">>}]}} - ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - %% FilteredFun is an Erlang version of following mango selector - FilterFun = fun(_DocId, {Props}) -> - couch_util:get_value(<<"_id">>, Props) == <<"doc2">> - end, - {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(1, proplists:get_value(doc_count, TargetDbInfo))}, - {"All the docs selected as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. -compare_dbs(Source, Target, FilterFun) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - {ok, TargetDbInfo} = couch_db:get_db_info(TargetDb), - Fun = fun(FullDocInfo, Acc) -> - {ok, DocId, SourceDoc} = read_doc(SourceDb, FullDocInfo), - TargetReply = read_doc(TargetDb, DocId), - case FilterFun(DocId, SourceDoc) of - true -> - ValidReply = {ok, DocId, SourceDoc} == TargetReply, - {ok, [ValidReply|Acc]}; - false -> - ValidReply = {not_found, missing} == TargetReply, - {ok, [ValidReply|Acc]} - end - end, - {ok, AllReplies} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb), - {ok, TargetDbInfo, AllReplies}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_docs(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -read_doc(Db, DocIdOrInfo) -> - case couch_db:open_doc(Db, DocIdOrInfo) of - {ok, Doc} -> - {Props} = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - {ok, DocId, {Props}}; - Error -> - Error - end. -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -create_docs(DbName) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), - Doc1 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc1">>} - ]}), - Doc2 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc2">>} - ]}), - {ok, _} = couch_db:update_docs(Db, [Doc1, Doc2]), - couch_db:close(Db). -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). +should_replicate_with_selector({Source, Target}) -> + RepObject = #{ + <<"source">> => Source, + <<"target">> => Target, + <<"selector">> => #{ + <<"_id">> => <<"doc2">> + } + }, + ?assertMatch({ok, _}, couch_replicator_test_helper:replicate(RepObject)), + {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target), + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). +compare_dbs(Source, Target) -> + {ok, TargetDb} = fabric2_db:open(Target, []), + {ok, TargetDbInfo} = fabric2_db:get_db_info(TargetDb), + Fun = fun(SrcDoc, TgtDoc, Acc) -> + case SrcDoc#doc.id == <<"doc2">> of + true -> [SrcDoc#doc.body == TgtDoc#doc.body | Acc]; + false -> [not_found == TgtDoc | Acc] + end + end, + Res = couch_replicator_test_helper:compare_fold(Source, Target, Fun, []), + {ok, TargetDbInfo, Res}. + + +create_docs(DbName) -> + couch_replicator_test_helper:create_docs(DbName, [ + #{<<"_id">> => <<"doc1">>}, + #{<<"_id">> => <<"doc2">>} + ]). diff --git a/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl b/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl index 8aebbe151..b113c5392 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl @@ -1,139 +1,70 @@ -module(couch_replicator_small_max_request_size_target). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). - --import(couch_replicator_test_helper, [ - db_url/1, - replicate/1, - compare_dbs/3 -]). - --define(TIMEOUT_EUNIT, 360). - - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -setup(remote) -> - {remote, setup()}; - -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - config:set("httpd", "max_http_request_size", "10000", false), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). reduce_max_request_size_test_() -> - Pairs = [{remote, remote}], { "Replicate docs when target has a small max_http_request_size", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_replicate_all_docs/2} - || Pair <- Pairs] - ++ [{Pair, fun should_replicate_one/2} - || Pair <- Pairs] - % Disabled. See issue 574. Sometimes PUTs with a doc and - % attachment which exceed maximum request size are simply - % closed instead of returning a 413 request. That makes these - % tests flaky. - ++ [{Pair, fun should_replicate_one_with_attachment/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_all_docs, 120), + ?TDEF_FE(should_replicate_one, 120), + ?TDEF_FE(should_replicate_one_with_attachment, 120) + ] + } } }. -% Test documents which are below max_http_request_size but when batched, batch size -% will be greater than max_http_request_size. Replicator could automatically split -% the batch into smaller batches and POST those separately. -should_replicate_all_docs({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target, [])]}}. - - -% If a document is too large to post as a single request, that document is -% skipped but replication overall will make progress and not crash. -should_replicate_one({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source_one_large_one_small(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target, [<<"doc0">>])]}}. - - -% If a document has an attachment > 64 * 1024 bytes, replicator will switch to -% POST-ing individual documents directly and skip bulk_docs. Test that case -% separately -% See note in main test function why this was disabled. -should_replicate_one_with_attachment({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source_one_large_attachment(Source), - should_populate_source(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target, [<<"doc0">>])]}}. - - -should_populate_source({remote, Source}) -> - should_populate_source(Source); - -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(add_docs(Source, 5, 3000, 0))}. - - -should_populate_source_one_large_one_small({remote, Source}) -> - should_populate_source_one_large_one_small(Source); - -should_populate_source_one_large_one_small(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(one_large_one_small(Source, 12000, 3000))}. - - -should_populate_source_one_large_attachment({remote, Source}) -> - should_populate_source_one_large_attachment(Source); - -should_populate_source_one_large_attachment(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(one_large_attachment(Source, 70000, 70000))}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + Target = couch_replicator_test_helper:create_db(), + config:set("httpd", "max_http_request_size", "10000", false), + {Source, Target}. -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); +teardown({Source, Target}) -> + config:delete("httpd", "max_http_request_size", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. +% Test documents which are below max_http_request_size but when batched, batch +% size will be greater than max_http_request_size. Replicator could +% automatically split the batch into smaller batches and POST those separately. +should_replicate_all_docs({Source, Target}) -> + ?assertEqual(ok, add_docs(Source, 5, 3000, 0)), + replicate(Source, Target), + compare_dbs(Source, Target, []). -should_compare_databases({remote, Source}, Target, ExceptIds) -> - should_compare_databases(Source, Target, ExceptIds); +% If a document is too large to post as a single request, that document is +% skipped but replication overall will make progress and not crash. +should_replicate_one({Source, Target}) -> + ?assertEqual(ok, one_large_one_small(Source, 12000, 3000)), + replicate(Source, Target), + compare_dbs(Source, Target, [<<"doc0">>]). -should_compare_databases(Source, {remote, Target}, ExceptIds) -> - should_compare_databases(Source, Target, ExceptIds); -should_compare_databases(Source, Target, ExceptIds) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target, ExceptIds))}. +% If a document has an attachment > 64 * 1024 bytes, replicator will switch to +% POST-ing individual documents directly and skip bulk_docs. Test that case +% separately See note in main test function why this was disabled. +should_replicate_one_with_attachment({Source, Target}) -> + ?assertEqual(ok, one_large_attachment(Source, 70000, 70000)), + ?assertEqual(ok, add_docs(Source, 5, 3000, 0)), + replicate(Source, Target), + compare_dbs(Source, Target, [<<"doc0">>]). binary_chunk(Size) when is_integer(Size), Size > 0 -> @@ -150,19 +81,21 @@ add_docs(DbName, DocCount, DocSize, AttSize) -> one_large_one_small(DbName, Large, Small) -> add_doc(DbName, <<"doc0">>, Large, 0), - add_doc(DbName, <<"doc1">>, Small, 0). + add_doc(DbName, <<"doc1">>, Small, 0), + ok. one_large_attachment(DbName, Size, AttSize) -> - add_doc(DbName, <<"doc0">>, Size, AttSize). + add_doc(DbName, <<"doc0">>, Size, AttSize), + ok. add_doc(DbName, DocId, Size, AttSize) when is_binary(DocId) -> - {ok, Db} = couch_db:open_int(DbName, []), - Doc0 = #doc{id = DocId, body = {[{<<"x">>, binary_chunk(Size)}]}}, - Doc = Doc0#doc{atts = atts(AttSize)}, - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db). + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + Doc0 = #doc{id = DocId, body = {[{<<"x">>, binary_chunk(Size)}]}}, + Doc = Doc0#doc{atts = atts(AttSize)}, + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + ok. atts(0) -> @@ -178,8 +111,13 @@ atts(Size) -> replicate(Source, Target) -> - replicate({[ - {<<"source">>, Source}, - {<<"target">>, Target}, - {<<"worker_processes">>, "1"} % This make batch_size predictable - ]}). + ?assertMatch({ok, _}, couch_replicator_test_helper:replicate(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"worker_processes">> => 1 % This make batch_size predictable + })). + + +compare_dbs(Source, Target, ExceptIds) -> + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target, + ExceptIds)). diff --git a/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl b/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl index fd0409164..2ac447eb3 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl @@ -1,51 +1,166 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + -module(couch_replicator_test_helper). --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/src/couch_replicator.hrl"). -export([ + start_couch/0, + stop_couch/1, + + create_db/0, + create_db/1, + delete_db/1, + + server_url/0, + db_url/1, + + create_docs/2, + compare_dbs/2, compare_dbs/3, - db_url/1, - replicate/1, + compare_fold/4, + + compare_docs/2, + get_pid/1, - replicate/2 + + replicate/1, + replicate/2, + replicate_continuous/1, + replicate_continuous/2, + + cancel/1, + cancel/2, + + scheduler_jobs/0 ]). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/src/couch_replicator.hrl"). + + +-define(USERNAME, "rep_eunit_admin"). +-define(PASSWORD, "rep_eunit_password"). + + +start_couch() -> + Ctx = test_util:start_couch([fabric, chttpd, couch_replicator]), + Hashed = couch_passwords:hash_admin_password(?PASSWORD), + ok = config:set("admins", ?USERNAME, ?b2l(Hashed), _Persist = false), + Ctx. + + +stop_couch(Ctx) -> + config:delete("admins", ?USERNAME, _Persist = false), + test_util:stop_couch(Ctx). + + +create_db() -> + {ok, Db} = fabric2_db:create(?tempdb(), [?ADMIN_CTX]), + fabric2_db:name(Db). + + +create_db(DbName) when is_binary(DbName) -> + {ok, Db} = fabric2_db:create(DbName, [?ADMIN_CTX]), + fabric2_db:name(Db). + + +delete_db(DbName) -> + try + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]) + catch + error:database_does_not_exist -> + ok + end. + + +server_url() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + Fmt = "http://~s:~s@~s:~b", + ?l2b(io_lib:format(Fmt, [?USERNAME, ?PASSWORD, Addr, Port])). + + +db_url(DbName) -> + ?l2b(io_lib:format("~s/~s", [server_url(), DbName])). + + +create_docs(DbName, Docs) when is_binary(DbName), is_list(Docs) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + Docs1 = lists:map(fun(Doc) -> + case Doc of + #{} -> + Doc1 = couch_util:json_decode(couch_util:json_encode(Doc)), + couch_doc:from_json_obj(Doc1); + #doc{} -> + Doc + end + end, Docs), + {ok, ResList} = fabric2_db:update_docs(Db, Docs1), + lists:foreach(fun(Res) -> + ?assertMatch({ok, {_, Rev}} when is_binary(Rev), Res) + end, ResList). + + compare_dbs(Source, Target) -> - compare_dbs(Source, Target, []). - - -compare_dbs(Source, Target, ExceptIds) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - - Fun = fun(FullDocInfo, Acc) -> - {ok, DocSource} = couch_db:open_doc(SourceDb, FullDocInfo), - Id = DocSource#doc.id, - case lists:member(Id, ExceptIds) of - true -> - ?assertEqual(not_found, couch_db:get_doc_info(TargetDb, Id)); - false -> - {ok, TDoc} = couch_db:open_doc(TargetDb, Id), - compare_docs(DocSource, TDoc) + Fun = fun(SrcDoc, TgtDoc, ok) -> compare_docs(SrcDoc, TgtDoc) end, + compare_fold(Source, Target, Fun, ok). + + +compare_dbs(Source, Target, ExceptIds) when is_binary(Source), + is_binary(Target), is_list(ExceptIds) -> + Fun = fun(SrcDoc, TgtDoc, ok) -> + case lists:member(SrcDoc#doc.id, ExceptIds) of + true -> ?assertEqual(not_found, TgtDoc); + false -> compare_docs(SrcDoc, TgtDoc) end, - {ok, Acc} + ok end, + compare_fold(Source, Target, Fun, ok). + - {ok, _} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb). +compare_fold(Source, Target, Fun, Acc0) when + is_binary(Source), is_binary(Target), is_function(Fun, 3) -> + {ok, SourceDb} = fabric2_db:open(Source, [?ADMIN_CTX]), + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + fabric2_fdb:transactional(SourceDb, fun(TxSourceDb) -> + FoldFun = fun + ({meta, _Meta}, Acc) -> + {ok, Acc}; + (complete, Acc) -> + {ok, Acc}; + ({row, Row}, Acc) -> + {_, Id} = lists:keyfind(id, 1, Row), + SrcDoc = open_doc(TxSourceDb, Id), + TgtDoc = open_doc(TargetDb, Id), + {ok, Fun(SrcDoc, TgtDoc, Acc)} + end, + Opts = [{restart_tx, true}], + {ok, AccF} = fabric2_db:fold_docs(TxSourceDb, FoldFun, Acc0, Opts), + AccF + end). -compare_docs(Doc1, Doc2) -> +compare_docs(#doc{} = Doc1, Doc2) when + is_record(Doc2, doc) orelse Doc2 =:= not_found -> + ?assert(Doc2 =/= not_found), ?assertEqual(Doc1#doc.body, Doc2#doc.body), #doc{atts = Atts1} = Doc1, #doc{atts = Atts2} = Doc2, ?assertEqual(lists:sort([couch_att:fetch(name, Att) || Att <- Atts1]), - lists:sort([couch_att:fetch(name, Att) || Att <- Atts2])), + lists:sort([couch_att:fetch(name, Att) || Att <- Atts2])), FunCompareAtts = fun(Att) -> AttName = couch_att:fetch(name, Att), {ok, AttTarget} = find_att(Atts2, AttName), @@ -68,19 +183,109 @@ compare_docs(Doc1, Doc2) -> ?assert(is_integer(couch_att:fetch(disk_len, AttTarget))), ?assert(is_integer(couch_att:fetch(att_len, AttTarget))), ?assertEqual(couch_att:fetch(disk_len, Att), - couch_att:fetch(disk_len, AttTarget)), + couch_att:fetch(disk_len, AttTarget)), ?assertEqual(couch_att:fetch(att_len, Att), - couch_att:fetch(att_len, AttTarget)), + couch_att:fetch(att_len, AttTarget)), ?assertEqual(couch_att:fetch(type, Att), - couch_att:fetch(type, AttTarget)), + couch_att:fetch(type, AttTarget)), ?assertEqual(couch_att:fetch(md5, Att), - couch_att:fetch(md5, AttTarget)) + couch_att:fetch(md5, AttTarget)) end, lists:foreach(FunCompareAtts, Atts1). +get_pid(RepId) -> + JobId = case couch_replicator_jobs:get_job_id(undefined, RepId) of + {ok, JobId0} -> JobId0; + {error, not_found} -> RepId + end, + {ok, #{<<"state">> := <<"running">>, <<"pid">> := Pid0}} = + couch_replicator_jobs:get_job_data(undefined, JobId), + Pid = list_to_pid(binary_to_list(Pid0)), + ?assert(is_pid(Pid)), + ?assert(is_process_alive(Pid)), + Pid. + + +replicate({[_ | _]} = EJson) -> + Str = couch_util:json_encode(EJson), + replicate(couch_util:json_decode(Str, [return_maps])); + +replicate(#{} = Rep0) -> + Rep = maybe_db_urls(Rep0), + {ok, Id, _} = couch_replicator_parse:parse_transient_rep(Rep, null), + ok = cancel(Id), + try + couch_replicator:replicate(Rep, ?ADMIN_USER) + after + ok = cancel(Id) + end. + + +replicate(Source, Target) -> + replicate(#{ + <<"source">> => Source, + <<"target">> => Target + }). + + +replicate_continuous({[_ | _]} = EJson) -> + Str = couch_util:json_encode(EJson), + replicate_continuous(couch_util:json_decode(Str, [return_maps])); + +replicate_continuous(#{<<"continuous">> := true} = Rep0) -> + Rep = maybe_db_urls(Rep0), + {ok, {continuous, RepId}} = couch_replicator:replicate(Rep, ?ADMIN_USER), + {ok, get_pid(RepId), RepId}. + + +replicate_continuous(Source, Target) -> + replicate_continuous(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"continuous">> => true + }). + + +cancel(Id) when is_binary(Id) -> + CancelRep = #{<<"cancel">> => true, <<"id">> => Id}, + case couch_replicator:replicate(CancelRep, ?ADMIN_USER) of + {ok, {cancelled, <<_/binary>>}} -> ok; + {error, not_found} -> ok + end. + + +cancel(Id, Pid) when is_pid(Pid), is_binary(Id) -> + Ref = monitor(process, Pid), + try + cancel(Id) + after + receive + {'DOWN', Ref, _, _, _} -> ok + after 60000 -> + error(replicator_pid_death_timeout) + end + end. + + +scheduler_jobs() -> + ServerUrl = couch_replicator_test_helper:server_url(), + Url = lists:flatten(io_lib:format("~s/_scheduler/jobs", [ServerUrl])), + {ok, 200, _, Body} = test_request:get(Url, []), + Json = jiffy:decode(Body, [return_maps]), + maps:get(<<"jobs">>, Json). + + +open_doc(Db, DocId) -> + case fabric2_db:open_doc(Db, DocId, []) of + {ok, #doc{deleted = false} = Doc} -> Doc; + {not_found, missing} -> not_found + end. + + find_att([], _Name) -> nil; + find_att([Att | Rest], Name) -> case couch_att:fetch(name, Att) of Name -> @@ -91,45 +296,29 @@ find_att([Att | Rest], Name) -> att_md5(Att) -> - Md50 = couch_att:foldl( - Att, - fun(Chunk, Acc) -> couch_hash:md5_hash_update(Acc, Chunk) end, - couch_hash:md5_hash_init()), + Md50 = couch_att:foldl(Att, fun(Chunk, Acc) -> + couch_hash:md5_hash_update(Acc, Chunk) + end, couch_hash:md5_hash_init()), couch_hash:md5_hash_final(Md50). + att_decoded_md5(Att) -> - Md50 = couch_att:foldl_decode( - Att, - fun(Chunk, Acc) -> couch_hash:md5_hash_update(Acc, Chunk) end, - couch_hash:md5_hash_init()), + Md50 = couch_att:foldl_decode(Att, fun(Chunk, Acc) -> + couch_hash:md5_hash_update(Acc, Chunk) + end, couch_hash:md5_hash_init()), couch_hash:md5_hash_final(Md50). -db_url(DbName) -> - iolist_to_binary([ - "http://", config:get("httpd", "bind_address", "127.0.0.1"), - ":", integer_to_list(mochiweb_socket_server:get(couch_httpd, port)), - "/", DbName - ]). - -get_pid(RepId) -> - Pid = global:whereis_name({couch_replicator_scheduler_job,RepId}), - ?assert(is_pid(Pid)), - Pid. -replicate(Source, Target) -> - replicate({[ - {<<"source">>, Source}, - {<<"target">>, Target} - ]}). - -replicate({[_ | _]} = RepObject) -> - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - Pid = get_pid(Rep#rep.id), - MonRef = erlang:monitor(process, Pid), - receive - {'DOWN', MonRef, process, Pid, _} -> - ok +maybe_db_urls(#{} = Rep) -> + #{<<"source">> := Src, <<"target">> := Tgt} = Rep, + Src1 = case Src of + <<"http://", _/binary>> -> Src; + <<"https://", _/binary>> -> Src; + <<_/binary>> -> db_url(Src) + end, + Tgt1 = case Tgt of + <<"http://", _/binary>> -> Tgt; + <<"https://", _/binary>> -> Tgt; + <<_/binary>> -> db_url(Tgt) end, - ok = couch_replicator_scheduler:remove_job(Rep#rep.id). + Rep#{<<"source">> := Src1, <<"target">> := Tgt1}. diff --git a/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl new file mode 100644 index 000000000..25fc6a3ff --- /dev/null +++ b/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl @@ -0,0 +1,106 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_transient_jobs_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +transient_jobs_test_() -> + { + "Transient jobs tests", + { + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(transient_job_is_removed, 10), + ?TDEF_FE(posting_same_job_is_a_noop, 10) + ] + } + } + }. + + +setup() -> + Source = couch_replicator_test_helper:create_db(), + couch_replicator_test_helper:create_docs(Source, [ + #{<<"_id">> => <<"doc1">>} + ]), + Target = couch_replicator_test_helper:create_db(), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "transient_job_max_age_sec", "9999", false), + {Source, Target}. + + +teardown({Source, Target}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "transient_job_max_age_sec", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +transient_job_is_removed({Source, Target}) -> + {ok, #{}} = replicate(Source, Target), + JobId = get_rep_id(Source, Target), + + couch_replicator_job_server:reschedule(), + + % Still there after clean up attempt ran + ?assertMatch({200, #{}}, scheduler_jobs(JobId)), + + config:set("replicator", "transient_job_max_age_sec", "0", false), + couch_replicator_job_server:reschedule(), + + % Should be gone now + ?assertMatch({404, #{}}, scheduler_jobs(JobId)). + + +posting_same_job_is_a_noop({Source, Target}) -> + {ok, Pid1, RepId1} = replicate_continuous(Source, Target), + {ok, Pid2, RepId2} = replicate_continuous(Source, Target), + ?assertEqual(RepId1, RepId2), + ?assertEqual(Pid1, Pid2), + couch_replicator_test_helper:cancel(RepId1). + + +get_rep_id(Source, Target) -> + {ok, Id, _} = couch_replicator_parse:parse_transient_rep(#{ + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target) + }, null), + Id. + + +replicate(Source, Target) -> + couch_replicator:replicate(#{ + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target) + }, ?ADMIN_USER). + + +replicate_continuous(Source, Target) -> + couch_replicator_test_helper:replicate_continuous(Source, Target). + + +scheduler_jobs(Id) -> + SUrl = couch_replicator_test_helper:server_url(), + Url = lists:flatten(io_lib:format("~s/_scheduler/jobs/~s", [SUrl, Id])), + {ok, Code, _, Body} = test_request:get(Url, []), + {Code, jiffy:decode(Body, [return_maps])}. diff --git a/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl index 8e4a21dbb..4371eff1f 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl @@ -14,165 +14,82 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/1 -]). -define(DOCS_COUNT, 100). --define(TIMEOUT_EUNIT, 30). -define(i2l(I), integer_to_list(I)). -define(io2b(Io), iolist_to_binary(Io)). -start(false) -> - fun - ({finished, _, {CheckpointHistory}}) -> - ?assertEqual([{<<"use_checkpoints">>,false}], CheckpointHistory); - (_) -> - ok - end; -start(true) -> - fun - ({finished, _, {CheckpointHistory}}) -> - ?assertNotEqual(false, lists:keyfind(<<"session_id">>, - 1, CheckpointHistory)); - (_) -> - ok - end. - -stop(_, _) -> - ok. - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({_, Fun, {A, B}}) -> - Ctx = test_util:start_couch([couch_replicator]), - {ok, Listener} = couch_replicator_notifier:start_link(Fun), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target, Listener}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target, Listener}}) -> - teardown(Source), - teardown(Target), - - couch_replicator_notifier:stop(Listener), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - use_checkpoints_test_() -> { - "Replication use_checkpoints feature tests", + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { - foreachx, - fun start/1, fun stop/2, - [{UseCheckpoints, fun use_checkpoints_tests/2} - || UseCheckpoints <- [false, true]] + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_replicate_with_checkpoints, 15), + ?TDEF_FE(t_replicate_without_checkpoints, 15) + ] } }. -use_checkpoints_tests(UseCheckpoints, Fun) -> - Pairs = [{remote, remote}], - { - "use_checkpoints: " ++ atom_to_list(UseCheckpoints), - { - foreachx, - fun setup/1, fun teardown/2, - [{{UseCheckpoints, Fun, Pair}, fun should_test_checkpoints/2} - || Pair <- Pairs] - } - }. -should_test_checkpoints({UseCheckpoints, _, {From, To}}, {_Ctx, {Source, Target, _}}) -> - should_test_checkpoints(UseCheckpoints, {From, To}, {Source, Target}). -should_test_checkpoints(UseCheckpoints, {From, To}, {Source, Target}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_populate_source(Source, ?DOCS_COUNT), - should_replicate(Source, Target, UseCheckpoints), - should_compare_databases(Source, Target) - ]}}. - -should_populate_source({remote, Source}, DocCount) -> - should_populate_source(Source, DocCount); -should_populate_source(Source, DocCount) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source, DocCount))}. - -should_replicate({remote, Source}, Target, UseCheckpoints) -> - should_replicate(db_url(Source), Target, UseCheckpoints); -should_replicate(Source, {remote, Target}, UseCheckpoints) -> - should_replicate(Source, db_url(Target), UseCheckpoints); -should_replicate(Source, Target, UseCheckpoints) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target, UseCheckpoints))}. - -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target))}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -populate_db(DbName, DocCount) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Id = ?io2b(["doc", ?i2l(DocIdCounter)]), - Value = ?io2b(["val", ?i2l(DocIdCounter)]), - Doc = #doc{ - id = Id, - body = {[ {<<"value">>, Value} ]} - }, - [Doc | Acc] - end, - [], lists:seq(1, DocCount)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db). - -compare_dbs(Source, Target) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - Fun = fun(FullDocInfo, Acc) -> - {ok, Doc} = couch_db:open_doc(SourceDb, FullDocInfo), - {Props} = DocJson = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - DocTarget = case couch_db:open_doc(TargetDb, DocId) of - {ok, DocT} -> - DocT; - Error -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Error opening document '", - ?b2l(DocId), "' from target: ", - couch_util:to_list(Error)])}]}) - end, - DocTargetJson = couch_doc:to_json_obj(DocTarget, [attachments]), - ?assertEqual(DocJson, DocTargetJson), - {ok, Acc} - end, - {ok, _} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb). - -replicate(Source, Target, UseCheckpoints) -> - replicate({[ - {<<"source">>, Source}, - {<<"target">>, Target}, - {<<"use_checkpoints">>, UseCheckpoints} - ]}). +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + +t_replicate_with_checkpoints({Source, Target}) -> + populate_db(Source, ?DOCS_COUNT), + Res = couch_replicator_test_helper:replicate(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"use_checkpoints">> => true + }), + ?assertMatch({ok, _}, Res), + + {ok, History} = Res, + ?assertMatch(#{<<"history">> := _, <<"session_id">> := _}, History), + + Checkpoints = maps:get(<<"history">>, History), + SessionId = maps:get(<<"session_id">>, History), + ?assert(is_binary(SessionId)), + ?assert(is_list(Checkpoints)), + ?assert(length(Checkpoints) >= 1), + + couch_replicator_test_helper:compare_dbs(Source, Target). + + +t_replicate_without_checkpoints({Source, Target}) -> + populate_db(Source, ?DOCS_COUNT), + Res = couch_replicator_test_helper:replicate(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"use_checkpoints">> => false + }), + ?assertEqual({ok, #{<<"use_checkpoints">> => false}}, Res), + couch_replicator_test_helper:compare_dbs(Source, Target). + + +populate_db(DbName, DocCount) -> + Docs = lists:foldl(fun(DocIdCounter, Acc) -> + Id = ?io2b(["doc", ?i2l(DocIdCounter)]), + Value = ?io2b(["val", ?i2l(DocIdCounter)]), + Doc = #doc{ + id = Id, + body = {[{<<"value">>, Value}]} + }, + [Doc | Acc] + end, [], lists:seq(1, DocCount)), + couch_replicator_test_helper:create_docs(DbName, Docs). diff --git a/test/elixir/test/replication_test.exs b/test/elixir/test/replication_test.exs index 8b657d916..9af5ef81a 100644 --- a/test/elixir/test/replication_test.exs +++ b/test/elixir/test/replication_test.exs @@ -14,7 +14,10 @@ defmodule ReplicationTest do # This should probably go into `make elixir` like what # happens for JavaScript tests. - @moduletag config: [{"replicator", "startup_jitter", "0"}] + @moduletag config: [ + {"replicator", "startup_jitter", "0"}, + {"replicator", "stats_update_interval_sec", "0"} + ] test "source database not found with host" do name = random_db_name() -- cgit v1.2.1 From 41e75e63d533e9e74e7ca7e00fafe3139980a68e Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 17 Sep 2020 12:00:24 -0400 Subject: Add url validation in replicator creds stripping logic Previously, in 3.x we re-parsed the endpoint URLs with `ibrowse_lib:parse_url/1` when stripping credentials, which threw an error if the URL was invalid. So we try to preserve that same logic. Backport some tests from 3.x to make sure URL stripping works when URL is valid and, also use the nicer ?TDEF and ?TDEF_FE test helpers. --- src/couch_replicator/src/couch_replicator.erl | 104 ++++++++++++++------------ 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator.erl b/src/couch_replicator/src/couch_replicator.erl index a53aa1045..8ab36e587 100644 --- a/src/couch_replicator/src/couch_replicator.erl +++ b/src/couch_replicator/src/couch_replicator.erl @@ -35,6 +35,7 @@ ]). +-include_lib("ibrowse/include/ibrowse.hrl"). -include_lib("couch/include/couch_db.hrl"). -include("couch_replicator.hrl"). @@ -483,6 +484,10 @@ ejson_url(null) -> -spec strip_url_creds(binary()) -> binary() | null. strip_url_creds(Url) -> try + case ibrowse_lib:parse_url(binary_to_list(Url)) of + #url{} -> ok; + {error, Error} -> error(Error) + end, iolist_to_binary(couch_util:url_strip_password(Url)) catch error:_ -> @@ -510,6 +515,8 @@ check_authorization(JobId, #user_ctx{} = Ctx) when is_binary(JobId) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + authorization_test_() -> { @@ -517,40 +524,34 @@ authorization_test_() -> fun () -> ok end, fun (_) -> meck:unload() end, [ - t_admin_is_always_authorized(), - t_username_must_match(), - t_replication_not_found() + ?TDEF_FE(t_admin_is_always_authorized), + ?TDEF_FE(t_username_must_match), + ?TDEF_FE(t_replication_not_found) ] }. -t_admin_is_always_authorized() -> - ?_test(begin - expect_job_data({ok, #{?REP => #{?REP_USER => <<"someuser">>}}}), - UserCtx = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, - ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx)) - end). +t_admin_is_always_authorized(_) -> + expect_job_data({ok, #{?REP => #{?REP_USER => <<"someuser">>}}}), + UserCtx = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, + ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx)). -t_username_must_match() -> - ?_test(begin - expect_job_data({ok, #{?REP => #{?REP_USER => <<"user1">>}}}), - UserCtx1 = #user_ctx{name = <<"user1">>, roles = [<<"somerole">>]}, - ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx1)), - UserCtx2 = #user_ctx{name = <<"other">>, roles = [<<"somerole">>]}, - ?assertThrow({unauthorized, _}, check_authorization(<<"RepId">>, - UserCtx2)) - end). +t_username_must_match(_) -> + expect_job_data({ok, #{?REP => #{?REP_USER => <<"user1">>}}}), + UserCtx1 = #user_ctx{name = <<"user1">>, roles = [<<"somerole">>]}, + ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx1)), + UserCtx2 = #user_ctx{name = <<"other">>, roles = [<<"somerole">>]}, + ?assertThrow({unauthorized, _}, check_authorization(<<"RepId">>, + UserCtx2)). -t_replication_not_found() -> - ?_test(begin - expect_job_data({error, not_found}), - UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, - ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx1)), - UserCtx2 = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, - ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx2)) - end). +t_replication_not_found(_) -> + expect_job_data({error, not_found}), + UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, + ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx1)), + UserCtx2 = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, + ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx2)). expect_job_data(JobDataRes) -> @@ -558,7 +559,7 @@ expect_job_data(JobDataRes) -> strip_url_creds_test_() -> - { + { setup, fun() -> meck:expect(config, get, fun(_, _, Default) -> Default end) @@ -566,28 +567,39 @@ strip_url_creds_test_() -> fun(_) -> meck:unload() end, - [ - t_strip_url_creds_errors() - ] + with([ + ?TDEF(t_strip_http_basic_creds), + ?TDEF(t_strip_url_creds_errors) + ]) }. -t_strip_url_creds_errors() -> - ?_test(begin - Bad1 = <<"http://adm:pass/bad">>, - ?assertEqual(null, strip_url_creds(Bad1)), - Bad2 = <<"more garbage">>, - ?assertEqual(null, strip_url_creds(Bad2)), - Bad3 = <<"http://a:b:c">>, - ?assertEqual(null, strip_url_creds(Bad3)), - Bad4 = <<"http://adm:pass:pass/bad">>, - ?assertEqual(null, strip_url_creds(Bad4)), - ?assertEqual(null, strip_url_creds(null)), - ?assertEqual(null, strip_url_creds(42)), - ?assertEqual(null, strip_url_creds([<<"a">>, <<"b">>])), - Bad5 = <<"http://adm:pass/bad">>, - ?assertEqual(null, strip_url_creds(Bad5)) - end). +t_strip_http_basic_creds(_) -> + Url1 = <<"http://adm:pass@host/db/">>, + ?assertEqual(<<"http://adm:*****@host/db/">>, strip_url_creds(Url1)), + Url2 = <<"https://adm:pass@host/db/">>, + ?assertEqual(<<"https://adm:*****@host/db/">>, strip_url_creds(Url2)), + Url3 = <<"http://adm:pass@host:80/db/">>, + ?assertEqual(<<"http://adm:*****@host:80/db/">>, strip_url_creds(Url3)), + Url4 = <<"http://adm:pass@host/db?a=b&c=d">>, + ?assertEqual(<<"http://adm:*****@host/db?a=b&c=d">>, + strip_url_creds(Url4)). + + +t_strip_url_creds_errors(_) -> + Bad1 = <<"http://adm:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad1)), + Bad2 = <<"more garbage">>, + ?assertEqual(null, strip_url_creds(Bad2)), + Bad3 = <<"http://a:b:c">>, + ?assertEqual(null, strip_url_creds(Bad3)), + Bad4 = <<"http://adm:pass:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad4)), + ?assertEqual(null, strip_url_creds(null)), + ?assertEqual(null, strip_url_creds(42)), + ?assertEqual(null, strip_url_creds([<<"a">>, <<"b">>])), + Bad5 = <<"http://adm:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad5)). -endif. -- cgit v1.2.1 From ffb85f17714330e0ef859e7f79c9c5d230e331f4 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Thu, 17 Sep 2020 14:21:52 -0400 Subject: Fix flaky couch_replicator_job_server tests Job exits are asynchronous so we ensure we wait for exit signals to be handled before checking the state. --- .../eunit/couch_replicator_job_server_tests.erl | 24 ++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl index 698a84400..921f29fed 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl @@ -213,7 +213,7 @@ acceptors_spawned_on_acceptor_exit(_) -> [A1] = acceptors(), exit(A1, kill), - meck:wait(?JOB_SERVER, handle_info, [{'EXIT', A1, killed}, '_'], 2000), + wait_job_exit(A1, killed), ?assertEqual(3, length(acceptors())). @@ -248,8 +248,9 @@ acceptors_spawned_on_worker_exit(_) -> % Same acceptor process is now a worker ?assertEqual([A1], workers()), + meck:reset(couch_replicator_job_server), exit(A1, shutdown), - meck:wait(?JOB_SERVER, handle_info, [{'EXIT', A1, shutdown}, '_'], 2000), + wait_job_exit(A1, shutdown), % New acceptor process started ?assertEqual(1, length(acceptors())), @@ -330,7 +331,9 @@ excess_workers_trimmed_on_reschedule(_) -> % Running with an excess number of workers. These should be trimmed on the % during the next cycle + meck:reset(couch_replicator_job_server), ?JOB_SERVER:reschedule(), + wait_jobs_exit([A2, A3, A6], shutdown), Workers = workers(), ?assertEqual(4, length(Workers)), @@ -383,8 +386,10 @@ recent_workers_are_not_stopped(_) -> ?assertEqual(0, length(acceptors())), config_set("min_run_time_sec", "0"), - + + meck:reset(couch_replicator_job_server), ?JOB_SERVER:reschedule(), + wait_jobs_exit([A2, A3, A6], shutdown), ?assertEqual(4, length(workers())), ?assertEqual(0, length(acceptors())). @@ -431,7 +436,14 @@ start_job() -> {accept_job, Normal, From} -> ok = ?JOB_SERVER:accepted(self(), Normal), From ! {job_accepted, self()}, - start_job(); - {exit_job, ExitSig} -> - exit(ExitSig) + start_job() end. + + +wait_jobs_exit(PidList, Signal) when is_list(PidList) -> + [wait_job_exit(Pid, Signal) || Pid <- PidList], + ok. + + +wait_job_exit(Pid, Signal) when is_pid(Pid) -> + meck:wait(?JOB_SERVER, handle_info, [{'EXIT', Pid, Signal}, '_'], 2000). -- cgit v1.2.1 From 39aa7423b2e79d9859f8626b1678636dba738c9f Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 16 Sep 2020 14:23:51 -0500 Subject: Fix bug in ebtree:umerge_members/4 Caught during Elixir tests. I've added a unit test to `ebtree.erl` to ensure we don't regress in the future. --- src/ebtree/src/ebtree.erl | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl index 3cfb82030..97a820304 100644 --- a/src/ebtree/src/ebtree.erl +++ b/src/ebtree/src/ebtree.erl @@ -1187,7 +1187,7 @@ umerge_members_int(_Collate, List1, [], Acc) -> umerge_members_int(Collate, [H1 | T1], [H2 | T2], Acc) -> case Collate(H1, H2) of lt -> umerge_members_int(Collate, T1, [H2 | T2], [H1 | Acc]); - eq -> umerge_members_int(Collate, T1, [H2 | T2], [H1 | Acc]); + eq -> umerge_members_int(Collate, T1, T2, [H1 | Acc]); gt -> umerge_members_int(Collate, [H1 | T1], T2, [H2 | Acc]) end. @@ -1800,4 +1800,20 @@ cache_test_() -> ?assertEqual(3, length(NodeCache)) end]}. + +umerge_members_test() -> + Tree = #tree{collate_fun = fun collate_raw/2}, + NewList = fun() -> + Raw = [{rand:uniform(100), rand:uniform()} || _ <- lists:seq(1, 100)], + lists:ukeysort(1, Raw) + end, + lists:foreach(fun(_) -> + A = NewList(), + B = NewList(), + Stdlib = lists:ukeymerge(1, A, B), + Custom = umerge_members(Tree, 0, A, B), + ?assertEqual(Stdlib, Custom) + end, lists:seq(1, 100)). + + -endif. -- cgit v1.2.1 From 5cc430f0ff8428a57d95f3179983d170fb948cfc Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 21 Sep 2020 15:16:15 -0500 Subject: Fix include directive in couch_views_batch_impl This fixes compilation if CouchDB is used as a dependency. --- src/couch_views/src/couch_views_batch_impl.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch_views/src/couch_views_batch_impl.erl b/src/couch_views/src/couch_views_batch_impl.erl index cacd53b1b..d315a3bf6 100644 --- a/src/couch_views/src/couch_views_batch_impl.erl +++ b/src/couch_views/src/couch_views_batch_impl.erl @@ -22,7 +22,7 @@ ]). --include("couch_mrview/include/couch_mrview.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). -record(batch_st, { -- cgit v1.2.1 From f1906774e727982621a1acd8961a7a0483314ffb Mon Sep 17 00:00:00 2001 From: Russell Branca Date: Wed, 23 Sep 2020 14:05:49 -0700 Subject: Workaround dirty schedulers in run_queue stats (#3168) --- src/chttpd/src/chttpd_node.erl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl index 1ca4bbd5e..0159672f5 100644 --- a/src/chttpd/src/chttpd_node.erl +++ b/src/chttpd/src/chttpd_node.erl @@ -15,7 +15,8 @@ -export([ handle_node_req/1, - get_stats/0 + get_stats/0, + run_queues/0 ]). -include_lib("couch/include/couch_db.hrl"). @@ -212,10 +213,12 @@ get_stats() -> {CF, CDU} = db_pid_stats(), MessageQueues0 = [{couch_file, {CF}}, {couch_db_updater, {CDU}}], MessageQueues = MessageQueues0 ++ message_queues(registered()), + {SQ, DCQ} = run_queues(), [ {uptime, couch_app:uptime() div 1000}, {memory, {Memory}}, - {run_queue, statistics(run_queue)}, + {run_queue, SQ}, + {run_queue_dirty_cpu, DCQ}, {ets_table_count, length(ets:all())}, {context_switches, element(1, statistics(context_switches))}, {reductions, element(1, statistics(reductions))}, @@ -287,3 +290,13 @@ message_queues(Registered) -> {Type, Length} = process_info(whereis(Name), Type), {Name, Length} end, Registered). + +%% Workaround for https://bugs.erlang.org/browse/ERL-1355 +run_queues() -> + case erlang:system_info(dirty_cpu_schedulers) > 0 of + false -> + {statistics(run_queue), 0}; + true -> + [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)), + {lists:sum(SQs), DCQ} + end. -- cgit v1.2.1 From ddae72a707ce51c229da9188275f18664302cba7 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 6 Aug 2020 12:34:29 -0500 Subject: Export fabric2_fdb:chunkify_binary/1,2 --- src/fabric/src/fabric2_fdb.erl | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 52303cef1..36fa451ab 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -77,6 +77,9 @@ get_approximate_tx_size/1, + chunkify_binary/1, + chunkify_binary/2, + debug_cluster/0, debug_cluster/2 ]). @@ -1176,6 +1179,21 @@ get_approximate_tx_size(#{} = TxDb) -> erlfdb:wait(erlfdb:get_approximate_size(Tx)). +chunkify_binary(Data) -> + chunkify_binary(Data, binary_chunk_size()). + + +chunkify_binary(Data, Size) -> + case Data of + <<>> -> + []; + <> -> + [Head | chunkify_binary(Rest, Size)]; + <<_/binary>> when size(Data) < Size -> + [Data] + end. + + debug_cluster() -> debug_cluster(<<>>, <<16#FE, 16#FF, 16#FF>>). @@ -1677,21 +1695,6 @@ sum_rem_rev_sizes(RevInfos) -> end, 0, RevInfos). -chunkify_binary(Data) -> - chunkify_data(Data, binary_chunk_size()). - - -chunkify_data(Data, Size) -> - case Data of - <<>> -> - []; - <> -> - [Head | chunkify_data(Rest, Size)]; - <<_/binary>> when size(Data) < Size -> - [Data] - end. - - get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) when is_map(Db) orelse Db =:= undefined -> -- cgit v1.2.1 From b91f193563c9b3dadd4f8de4c49de9cbf4304837 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 24 Jul 2020 10:59:05 -0500 Subject: Views on ebtree --- rel/overlay/etc/default.ini | 6 + src/couch_views/include/couch_views.hrl | 5 + src/couch_views/src/couch_views.erl | 55 +-- src/couch_views/src/couch_views_fdb.erl | 331 ++--------------- src/couch_views/src/couch_views_indexer.erl | 54 +-- src/couch_views/src/couch_views_reader.erl | 115 +++--- src/couch_views/src/couch_views_trees.erl | 429 ++++++++++++++++++++++ src/couch_views/src/couch_views_updater.erl | 13 +- src/couch_views/src/couch_views_util.erl | 35 ++ src/couch_views/test/couch_views_cleanup_test.erl | 2 +- src/couch_views/test/couch_views_indexer_test.erl | 64 ++-- src/couch_views/test/couch_views_size_test.erl | 25 +- src/couch_views/test/couch_views_updater_test.erl | 4 +- src/mango/src/mango_cursor_view.erl | 14 +- src/mango/src/mango_idx_view.erl | 7 +- src/mango/src/mango_idx_view.hrl | 13 + 16 files changed, 687 insertions(+), 485 deletions(-) create mode 100644 src/couch_views/src/couch_views_trees.erl create mode 100644 src/mango/src/mango_idx_view.hrl diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index abcf0bda2..3a377c7c8 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -337,6 +337,12 @@ iterations = 10 ; iterations for password hashing ; The maximum allowed value size emitted from a view for a document (in bytes) ;value_size_limit = 64000 ; +; The maximum size of B+Tree nodes used by the id btree +;id_btree_node_size = 100 +; +; The maximum size of B+Tree nodes used by view btrees +;view_btree_node_size = 100 +; ; Batch size sensing parameters ; batch_initial_size = 100 ; Initial batch size in number of documents ; batch_search_increment = 500 ; Size change when searching for the threshold diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl index 3d0110f65..388219118 100644 --- a/src/couch_views/include/couch_views.hrl +++ b/src/couch_views/include/couch_views.hrl @@ -13,6 +13,7 @@ % Index info/data subspaces -define(VIEW_INFO, 0). -define(VIEW_DATA, 1). +-define(VIEW_TREES, 3). % Index info keys -define(VIEW_UPDATE_SEQ, 0). @@ -25,6 +26,10 @@ -define(VIEW_ID_RANGE, 0). -define(VIEW_MAP_RANGE, 1). +% Tree keys +-define(VIEW_ID_TREE, 0). +-define(VIEW_ROW_TREES, 1). + % jobs api -define(INDEX_JOB_TYPE, <<"views">>). diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index d9ba0c16b..da8a142f9 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -48,11 +48,7 @@ query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> Args1 = to_mrargs(Args0), Args2 = couch_mrview_util:set_view_type(Args1, ViewName, Views), Args3 = couch_mrview_util:validate_args(Args2), - ok = check_range(Args3), - case is_reduce_view(Args3) of - true -> throw(not_implemented); - false -> ok - end, + ok = check_range(Mrst, ViewName, Args3), try fabric2_fdb:transactional(Db, fun(TxDb) -> @@ -100,9 +96,10 @@ get_info(Db, DDoc) -> {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), Sig = fabric2_util:to_hex(Mrst#mrst.sig), {UpdateSeq, DataSize, Status} = fabric2_fdb:transactional(Db, fun(TxDb) -> - Seq = couch_views_fdb:get_update_seq(TxDb, Mrst), - DataSize = get_total_view_size(TxDb, Mrst), - JobStatus = case couch_views_jobs:job_state(TxDb, Mrst) of + Mrst1 = couch_views_trees:open(TxDb, Mrst), + Seq = couch_views_fdb:get_update_seq(TxDb, Mrst1), + DataSize = get_total_view_size(TxDb, Mrst1), + JobStatus = case couch_views_jobs:job_state(TxDb, Mrst1) of {ok, pending} -> true; {ok, running} -> true; {ok, finished} -> false; @@ -124,10 +121,9 @@ get_info(Db, DDoc) -> get_total_view_size(TxDb, Mrst) -> - ViewIds = [View#mrview.id_num || View <- Mrst#mrst.views], - lists:foldl(fun (ViewId, Total) -> - Total + couch_views_fdb:get_kv_size(TxDb, Mrst, ViewId) - end, 0, ViewIds). + lists:foldl(fun(View, Total) -> + Total + couch_views_trees:get_kv_size(TxDb, View) + end, 0, Mrst#mrst.views). read_view(Db, Mrst, ViewName, Callback, Acc0, Args) -> @@ -185,16 +181,29 @@ to_mrargs(#{} = Args) -> end, #mrargs{}, Args). -check_range(#mrargs{start_key = undefined}) -> +check_range(Mrst, ViewName, Args) -> + #mrst{ + language = Lang, + views = Views + } = Mrst, + View = case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of + {map, V, _} -> V; + {red, {_, _, V}, _} -> V + end, + Cmp = couch_views_util:collate_fun(View), + check_range(Args, Cmp). + + +check_range(#mrargs{start_key = undefined}, _Cmp) -> ok; -check_range(#mrargs{end_key = undefined}) -> +check_range(#mrargs{end_key = undefined}, _Cmp) -> ok; -check_range(#mrargs{start_key = K, end_key = K}) -> +check_range(#mrargs{start_key = K, end_key = K}, _Cmp) -> ok; -check_range(Args) -> +check_range(Args, Cmp) -> #mrargs{ direction = Dir, start_key = SK, @@ -203,10 +212,10 @@ check_range(Args) -> end_key_docid = EKD } = Args, - case {Dir, view_cmp(SK, SKD, EK, EKD)} of - {fwd, false} -> + case {Dir, Cmp({SK, SKD}, {EK, EKD})} of + {fwd, gt} -> throw(check_range_error(<<"true">>)); - {rev, true} -> + {rev, lt} -> throw(check_range_error(<<"false">>)); _ -> ok @@ -220,14 +229,6 @@ check_range_error(Descending) -> Descending/binary>>}. -view_cmp(SK, SKD, EK, EKD) -> - BinSK = couch_views_encoding:encode(SK, key), - BinEK = couch_views_encoding:encode(EK, key), - PackedSK = erlfdb_tuple:pack({BinSK, SKD}), - PackedEK = erlfdb_tuple:pack({BinEK, EKD}), - PackedSK =< PackedEK. - - get_update_options(#mrst{design_opts = Opts}) -> IncDesign = couch_util:get_value(<<"include_design">>, Opts, false), LocalSeq = couch_util:get_value(<<"local_seq">>, Opts, false), diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index c95722230..e813f2b61 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -22,15 +22,10 @@ get_update_seq/2, set_update_seq/3, - get_row_count/3, - get_kv_size/3, - - fold_map_idx/6, - - write_doc/4, - list_signatures/1, - clear_index/2 + clear_index/2, + + persist_chunks/3 ]). -ifdef(TEST). @@ -38,10 +33,6 @@ -compile(nowarn_export_all). -endif. --define(LIST_VALUE, 0). --define(JSON_VALUE, 1). --define(VALUE, 2). - -include("couch_views.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). @@ -126,94 +117,6 @@ set_update_seq(TxDb, Sig, Seq) -> ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq). -get_row_count(TxDb, #mrst{sig = Sig}, ViewId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - case erlfdb:wait(erlfdb:get(Tx, row_count_key(DbPrefix, Sig, ViewId))) of - not_found -> 0; % Can this happen? - CountBin -> ?bin2uint(CountBin) - end. - - -get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of - not_found -> 0; % Can this happen? - SizeBin -> ?bin2uint(SizeBin) - end. - - -fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> - #{ - db_prefix := DbPrefix - } = TxDb, - - MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), - FoldAcc = #{ - prefix => MapIdxPrefix, - callback => Callback, - acc => Acc0 - }, - Fun = aegis:wrap_fold_fun(TxDb, fun fold_fwd/2), - - #{ - acc := Acc1 - } = fabric2_fdb:fold_range(TxDb, MapIdxPrefix, Fun, FoldAcc, Options), - - Acc1. - - -write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> - #{ - id := DocId - } = Doc, - - ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), - - clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> - clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys), - update_kv_size(TxDb, Sig, ViewId, -TotalSize) - end, ExistingViewKeys); - -write_doc(TxDb, Sig, ViewIds, Doc) -> - #{ - id := DocId, - results := Results, - kv_sizes := KVSizes - } = Doc, - - ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), - - clear_id_idx(TxDb, Sig, DocId), - - lists:foreach(fun({ViewId, NewRows, KVSize}) -> - update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize), - - ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, TotalSize, EKeys} -> - RowChange = length(NewRows) - TotalRows, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize), - EKeys; - false -> - RowChange = length(NewRows), - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, KVSize), - [] - end, - update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) - end, lists:zip3(ViewIds, Results, KVSizes)). - - list_signatures(Db) -> #{ db_prefix := DbPrefix @@ -244,145 +147,38 @@ clear_index(Db, Signature) -> end, Keys), % Clear index data - RangeTuple = {?DB_VIEWS, ?VIEW_DATA, Signature}, - RangePrefix = erlfdb_tuple:pack(RangeTuple, DbPrefix), - erlfdb:clear_range_startswith(Tx, RangePrefix). - - -% For each row in a map view we store the the key/value -% in FoundationDB: -% -% `(EncodedSortKey, (EncodedKey, EncodedValue))` -% -% The difference between `EncodedSortKey` and `EndcodedKey` is -% the use of `couch_util:get_sort_key/1` which turns UTF-8 -% strings into binaries that are byte comparable. Given a sort -% key binary we cannot recover the input so to return unmodified -% user data we are forced to store the original. - -fold_fwd({RowKey, PackedKeyValue}, Acc) -> - #{ - prefix := Prefix, - callback := UserCallback, - acc := UserAcc0 - } = Acc, - - {{_SortKey, DocId}, _DupeId} = - erlfdb_tuple:unpack(RowKey, Prefix), - - {EncodedOriginalKey, EncodedValue} = erlfdb_tuple:unpack(PackedKeyValue), - Value = couch_views_encoding:decode(EncodedValue), - Key = couch_views_encoding:decode(EncodedOriginalKey), - - UserAcc1 = UserCallback(DocId, Key, Value, UserAcc0), - - Acc#{ - acc := UserAcc1 - }. - - -clear_id_idx(TxDb, Sig, DocId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - {Start, End} = id_idx_range(DbPrefix, Sig, DocId), - ok = erlfdb:clear_range(Tx, Start, End). - - -clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, + DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature}, + DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix), + erlfdb:clear_range_startswith(Tx, DataPrefix), - lists:foreach(fun(ViewKey) -> - {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, ViewKey, DocId), - ok = erlfdb:clear_range(Tx, Start, End) - end, ViewKeys). + % Clear tree data + TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature}, + TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix), + erlfdb:clear_range_startswith(Tx, TreePrefix). -update_id_idx(TxDb, Sig, ViewId, DocId, [], _KVSize) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), - ok = erlfdb:clear(Tx, Key); - -update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - Unique = lists:usort([K || {K, _V} <- NewRows]), - - Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), - Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]), - ok = erlfdb:set(Tx, Key, aegis:encrypt(TxDb, Key, Val)). - - -update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - lists:foreach(fun(RemKey) -> - {Start, End} = map_idx_range(DbPrefix, Sig, ViewId, RemKey, DocId), - ok = erlfdb:clear_range(Tx, Start, End) - end, ExistingKeys), - - KVsToAdd = process_rows(NewRows), - MapIdxPrefix = map_idx_prefix(DbPrefix, Sig, ViewId), - - lists:foreach(fun({DupeId, Key1, Key2, EV}) -> - KK = map_idx_key(MapIdxPrefix, {Key1, DocId}, DupeId), - Val = erlfdb_tuple:pack({Key2, EV}), - ok = erlfdb:set(Tx, KK, aegis:encrypt(TxDb, KK, Val)) - end, KVsToAdd). - - -get_view_keys(TxDb, Sig, DocId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - {Start, End} = id_idx_range(DbPrefix, Sig, DocId), - lists:map(fun({K, V}) -> - {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = - erlfdb_tuple:unpack(K, DbPrefix), - [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), - {ViewId, TotalKeys, TotalSize, UniqueKeys} - end, aegis:decrypt(TxDb, erlfdb:get_range(Tx, Start, End, []))). - - -update_row_count(TxDb, Sig, ViewId, Increment) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - Key = row_count_key(DbPrefix, Sig, ViewId), - erlfdb:add(Tx, Key, Increment). +persist_chunks(Tx, set, [Key, Value]) -> + Chunks = fabric2_fdb:chunkify_binary(Value), + LastId = lists:foldl(fun(Chunk, Id) -> + ChunkKey = erlfdb_tuple:pack({Id}, Key), + erlfdb:set(Tx, ChunkKey, Chunk), + Id + 1 + end, 0, Chunks), + % We update nodes in place, so its possible that + % a node shrank. This clears any keys that we haven't + % just overwritten for the provided key. + LastIdKey = erlfdb_tuple:pack({LastId}, Key), + EndRange = <>, + erlfdb:clear_range(Tx, LastIdKey, EndRange); -update_kv_size(TxDb, Sig, ViewId, Increment) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - % Track a view specific size for calls to - % GET /dbname/_design/doc/_info` - IdxKey = kv_size_key(DbPrefix, Sig, ViewId), - erlfdb:add(Tx, IdxKey, Increment), +persist_chunks(Tx, get, Key) -> + Rows = erlfdb:get_range_startswith(Tx, Key), + Values = [V || {_K, V} <- Rows], + iolist_to_binary(Values); - % Track a database level rollup for calls to - % GET /dbname - DbKey = db_kv_size_key(DbPrefix), - erlfdb:add(Tx, DbKey, Increment). +persist_chunks(Tx, clear, Key) -> + erlfdb:clear_range_startswith(Tx, Key). seq_key(DbPrefix, Sig) -> @@ -390,54 +186,6 @@ seq_key(DbPrefix, Sig) -> erlfdb_tuple:pack(Key, DbPrefix). -row_count_key(DbPrefix, Sig, ViewId) -> - Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, ViewId}, - erlfdb_tuple:pack(Key, DbPrefix). - - -kv_size_key(DbPrefix, Sig, ViewId) -> - Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, ViewId}, - erlfdb_tuple:pack(Key, DbPrefix). - - -db_kv_size_key(DbPrefix) -> - Key = {?DB_STATS, <<"sizes">>, <<"views">>}, - erlfdb_tuple:pack(Key, DbPrefix). - - -id_idx_key(DbPrefix, Sig, DocId, ViewId) -> - Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId}, - erlfdb_tuple:pack(Key, DbPrefix). - - -id_idx_range(DbPrefix, Sig, DocId) -> - Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId}, - erlfdb_tuple:range(Key, DbPrefix). - - -map_idx_prefix(DbPrefix, Sig, ViewId) -> - Key = {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, ViewId}, - erlfdb_tuple:pack(Key, DbPrefix). - - -map_idx_key(MapIdxPrefix, MapKey, DupeId) -> - Key = {MapKey, DupeId}, - erlfdb_tuple:pack(Key, MapIdxPrefix). - - -map_idx_range(DbPrefix, Sig, ViewId, MapKey, DocId) -> - Encoded = couch_views_encoding:encode(MapKey, key), - Key = { - ?DB_VIEWS, - ?VIEW_DATA, - Sig, - ?VIEW_MAP_RANGE, - ViewId, - {Encoded, DocId} - }, - erlfdb_tuple:range(Key, DbPrefix). - - creation_vs_key(Db, Sig) -> #{ db_prefix := DbPrefix @@ -452,24 +200,3 @@ build_status_key(Db, Sig) -> } = Db, Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig}, erlfdb_tuple:pack(Key, DbPrefix). - - -process_rows(Rows) -> - Encoded = lists:map(fun({K, V}) -> - EK1 = couch_views_encoding:encode(K, key), - EK2 = couch_views_encoding:encode(K, value), - EV = couch_views_encoding:encode(V, value), - {EK1, EK2, EV} - end, Rows), - - Grouped = lists:foldl(fun({K1, K2, V}, Acc) -> - dict:append(K1, {K2, V}, Acc) - end, dict:new(), Encoded), - - dict:fold(fun(K1, Vals, DAcc) -> - Vals1 = lists:keysort(2, Vals), - {_, Labeled} = lists:foldl(fun({K2, V}, {Count, Acc}) -> - {Count + 1, [{Count, K1, K2, V} | Acc]} - end, {0, []}, Vals1), - Labeled ++ DAcc - end, [], Grouped). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 17b0daab7..da2393999 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -110,6 +110,10 @@ init() -> error:database_does_not_exist -> fail_job(Job, Data, db_deleted, "Database was deleted"); Error:Reason -> + Stack = erlang:get_stacktrace(), + Fmt = "Error building view for ddoc ~s in ~s: ~p:~p ~p", + couch_log:error(Fmt, [DbName, DDocId, Error, Reason, Stack]), + NewRetry = Retries + 1, RetryLimit = retry_limit(), @@ -196,6 +200,7 @@ do_update(Db, Mrst0, State0) -> tx := Tx } = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), State1 = get_update_start_state(TxDb, Mrst0, State0), {ok, State2} = fold_changes(State1), @@ -212,7 +217,7 @@ do_update(Db, Mrst0, State0) -> DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), - {Mrst1, MappedDocs} = map_docs(Mrst0, DocAcc1), + {Mrst2, MappedDocs} = map_docs(Mrst0, DocAcc1), TotalKVs = write_docs(TxDb, Mrst1, MappedDocs, State2), ChangesDone = ChangesDone0 + length(DocAcc), @@ -225,14 +230,14 @@ do_update(Db, Mrst0, State0) -> case Count < Limit of true -> - maybe_set_build_status(TxDb, Mrst1, ViewVS, + maybe_set_build_status(TxDb, Mrst2, ViewVS, ?INDEX_READY), report_progress(State2#{changes_done := ChangesDone}, finished), - {Mrst1, finished}; + {Mrst2, finished}; false -> State3 = report_progress(State2, update), - {Mrst1, State3#{ + {Mrst2, State3#{ tx_db := undefined, count := 0, doc_acc := [], @@ -339,7 +344,7 @@ map_docs(Mrst, Docs) -> end, Docs), Deleted1 = lists:map(fun(Doc) -> - Doc#{results => []} + Doc#{results => [[] || _ <- Mrst1#mrst.views]} end, Deleted0), DocsToMap = lists:map(fun(Doc) -> @@ -370,9 +375,8 @@ map_docs(Mrst, Docs) -> {Mrst1, MappedDocs}. -write_docs(TxDb, Mrst, Docs, State) -> +write_docs(TxDb, Mrst, Docs0, State) -> #mrst{ - views = Views, sig = Sig } = Mrst, @@ -380,15 +384,15 @@ write_docs(TxDb, Mrst, Docs, State) -> last_seq := LastSeq } = State, - ViewIds = [View#mrview.id_num || View <- Views], KeyLimit = key_size_limit(), ValLimit = value_size_limit(), - TotalKVCount = lists:foldl(fun(Doc0, KVCount) -> - Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), - couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1), - KVCount + count_kvs(Doc1) - end, 0, Docs), + {Docs1, TotalKVCount} = lists:mapfoldl(fun(Doc0, KVCount) -> + Doc1 = check_kv_size_limit(Mrst, Doc0, KeyLimit, ValLimit), + {Doc1, KVCount + count_kvs(Doc1)} + end, 0, Docs0), + + couch_views_trees:update_views(TxDb, Mrst, Docs1), if LastSeq == false -> ok; true -> couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq) @@ -479,7 +483,7 @@ start_query_server(#mrst{} = Mrst) -> Mrst. -calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> +check_kv_size_limit(Mrst, Doc, KeyLimit, ValLimit) -> #mrst{ db_name = DbName, idx_name = IdxName @@ -488,10 +492,10 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> results := Results } = Doc, try - KVSizes = lists:map(fun(ViewRows) -> - lists:foldl(fun({K, V}, Acc) -> - KeySize = erlang:external_size(K), - ValSize = erlang:external_size(V), + lists:foreach(fun(ViewRows) -> + lists:foreach(fun({K, V}) -> + KeySize = couch_ejson_size:encoded_size(K), + ValSize = couch_ejson_size:encoded_size(V), if KeySize =< KeyLimit -> ok; true -> throw({size_error, key}) @@ -499,18 +503,20 @@ calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> if ValSize =< ValLimit -> ok; true -> throw({size_error, value}) - end, - - Acc + KeySize + ValSize - end, 0, ViewRows) + end + end, ViewRows) end, Results), - Doc#{kv_sizes => KVSizes} + Doc catch throw:{size_error, Type} -> #{id := DocId} = Doc, Fmt = "View ~s size error for docid `~s`, excluded from indexing " "in db `~s` for design doc `~s`", couch_log:error(Fmt, [Type, DocId, DbName, IdxName]), - Doc#{deleted := true, results := [], kv_sizes => []} + Doc#{ + deleted := true, + results := [[] || _ <- Mrst#mrst.views], + kv_sizes => [] + } end. diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl index 61a78d7f8..a785c7b35 100644 --- a/src/couch_views/src/couch_views_reader.erl +++ b/src/couch_views/src/couch_views_reader.erl @@ -23,24 +23,24 @@ -include_lib("fabric/include/fabric2.hrl"). -read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) -> - #mrst{ - language = Lang, - sig = Sig, - views = Views - } = Mrst, - - ViewId = get_view_id(Lang, Args, ViewName, Views), - Fun = fun handle_row/4, - +read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> try fabric2_fdb:transactional(Db, fun(TxDb) -> - Meta = get_meta(TxDb, Mrst, ViewId, Args), + #mrst{ + language = Lang, + views = Views + } = Mrst = couch_views_trees:open(TxDb, Mrst0), + + View = get_map_view(Lang, Args, ViewName, Views), + Fun = fun handle_map_row/4, + + Meta = get_map_meta(TxDb, Mrst, View, Args), UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)), Acc0 = #{ db => TxDb, skip => Args#mrargs.skip, + limit => Args#mrargs.limit, mrargs => undefined, callback => UserCallback, acc => UserAcc1 @@ -51,14 +51,7 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) -> KeyAcc1 = KeyAcc0#{ mrargs := KeyArgs }, - couch_views_fdb:fold_map_idx( - TxDb, - Sig, - ViewId, - Opts, - Fun, - KeyAcc1 - ) + couch_views_trees:fold_map_idx(TxDb, View, Opts, Fun, KeyAcc1) end, Acc0, expand_keys_args(Args)), #{ @@ -66,27 +59,35 @@ read(Db, Mrst, ViewName, UserCallback, UserAcc0, Args) -> } = Acc1, {ok, maybe_stop(UserCallback(complete, UserAcc2))} end) - catch throw:{done, Out} -> - {ok, Out} + catch + throw:{complete, Out} -> + {_, Final} = UserCallback(complete, Out), + {ok, Final}; + throw:{done, Out} -> + {ok, Out} end. -get_meta(TxDb, Mrst, ViewId, #mrargs{update_seq = true}) -> - TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId), +get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) -> + TotalRows = couch_views_trees:get_row_count(TxDb, View), ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), {meta, [{update_seq, ViewSeq}, {total, TotalRows}, {offset, null}]}; -get_meta(TxDb, Mrst, ViewId, #mrargs{}) -> - TotalRows = couch_views_fdb:get_row_count(TxDb, Mrst, ViewId), +get_map_meta(TxDb, _Mrst, View, #mrargs{}) -> + TotalRows = couch_views_trees:get_row_count(TxDb, View), {meta, [{total, TotalRows}, {offset, null}]}. -handle_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 -> +handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 -> Acc#{skip := Skip - 1}; -handle_row(DocId, Key, Value, Acc) -> +handle_map_row(_DocID, _Key, _Value, #{limit := 0, acc := UserAcc}) -> + throw({complete, UserAcc}); + +handle_map_row(DocId, Key, Value, Acc) -> #{ db := TxDb, + limit := Limit, mrargs := Args, callback := UserCallback, acc := UserAcc0 @@ -111,13 +112,13 @@ handle_row(DocId, Key, Value, Acc) -> end, UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)), - Acc#{acc := UserAcc1}. + Acc#{limit := Limit - 1, acc := UserAcc1}. -get_view_id(Lang, Args, ViewName, Views) -> +get_map_view(Lang, Args, ViewName, Views) -> case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of - {map, View, _Args} -> View#mrview.id_num; - {red, {_Idx, _Lang, View}} -> View#mrview.id_num + {map, View, _Args} -> View; + {red, {_Idx, _Lang, View}, _} -> View end. @@ -135,57 +136,33 @@ expand_keys_args(#mrargs{keys = Keys} = Args) -> mrargs_to_fdb_options(Args) -> #mrargs{ - start_key = StartKey0, + start_key = StartKey, start_key_docid = StartKeyDocId, - end_key = EndKey0, - end_key_docid = EndKeyDocId, + end_key = EndKey, + end_key_docid = EndKeyDocId0, direction = Direction, - limit = Limit, - skip = Skip, inclusive_end = InclusiveEnd } = Args, - StartKey1 = if StartKey0 == undefined -> undefined; true -> - couch_views_encoding:encode(StartKey0, key) - end, - - StartKeyOpts = case {StartKey1, StartKeyDocId} of - {undefined, _} -> - []; - {StartKey1, StartKeyDocId} -> - [{start_key, {StartKey1, StartKeyDocId}}] + StartKeyOpts = if StartKey == undefined -> []; true -> + [{start_key, {StartKey, StartKeyDocId}}] end, - EndKey1 = if EndKey0 == undefined -> undefined; true -> - couch_views_encoding:encode(EndKey0, key) + EndKeyDocId = case {Direction, EndKeyDocId0} of + {fwd, <<255>>} when InclusiveEnd -> <<255>>; + {fwd, <<255>>} when not InclusiveEnd -> <<>>; + {rev, <<>>} when InclusiveEnd -> <<>>; + {rev, <<>>} when not InclusiveEnd -> <<255>>; + _ -> EndKeyDocId0 end, - EndKeyOpts = case {EndKey1, EndKeyDocId, Direction} of - {undefined, _, _} -> - []; - {EndKey1, <<>>, rev} when not InclusiveEnd -> - % When we iterate in reverse with - % inclusive_end=false we have to set the - % EndKeyDocId to <<255>> so that we don't - % include matching rows. - [{end_key_gt, {EndKey1, <<255>>}}]; - {EndKey1, <<255>>, _} when not InclusiveEnd -> - % When inclusive_end=false we need to - % elide the default end_key_docid so as - % to not sort past the docids with the - % given end key. - [{end_key_gt, {EndKey1}}]; - {EndKey1, EndKeyDocId, _} when not InclusiveEnd -> - [{end_key_gt, {EndKey1, EndKeyDocId}}]; - {EndKey1, EndKeyDocId, _} when InclusiveEnd -> - [{end_key, {EndKey1, EndKeyDocId}}] + EndKeyOpts = if EndKey == undefined -> []; true -> + [{end_key, {EndKey, EndKeyDocId}}] end, [ {dir, Direction}, - {limit, Limit + Skip}, - {streaming_mode, want_all}, - {restart_tx, true} + {inclusive_end, InclusiveEnd} ] ++ StartKeyOpts ++ EndKeyOpts. diff --git a/src/couch_views/src/couch_views_trees.erl b/src/couch_views/src/couch_views_trees.erl new file mode 100644 index 000000000..0f680a623 --- /dev/null +++ b/src/couch_views/src/couch_views_trees.erl @@ -0,0 +1,429 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_trees). + +-export([ + open/2, + + get_row_count/2, + get_kv_size/2, + + fold_map_idx/5, + + update_views/3 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + + +-include("couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +open(TxDb, Mrst) -> + #mrst{ + sig = Sig, + language = Lang, + views = Views + } = Mrst, + Mrst#mrst{ + id_btree = open_id_tree(TxDb, Sig), + views = [open_view_tree(TxDb, Sig, Lang, V) || V <- Views] + }. + + +get_row_count(TxDb, View) -> + #{ + tx := Tx + } = TxDb, + {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree), + Count. + + +get_kv_size(TxDb, View) -> + #{ + tx := Tx + } = TxDb, + {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree), + TotalSize. + + +fold_map_idx(TxDb, View, Options, Callback, Acc0) -> + #{ + tx := Tx + } = TxDb, + #mrview{ + btree = Btree + } = View, + + CollateFun = couch_views_util:collate_fun(View), + + {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options), + + Wrapper = fun(KVs0, WAcc) -> + % Remove any keys that match Start or End key + % depending on direction + KVs1 = case InclusiveEnd of + true -> + KVs0; + false when Dir == fwd -> + lists:filter(fun({K, _V}) -> + case CollateFun(K, EndKey) of + lt -> true; + eq -> false; + gt -> false + end + end, KVs0); + false when Dir == rev -> + lists:filter(fun({K, _V}) -> + case CollateFun(K, EndKey) of + lt -> false; + eq -> false; + gt -> true + end + end, KVs0) + end, + % Expand dups + KVs2 = lists:flatmap(fun({K, V}) -> + case V of + {dups, Dups} when Dir == fwd -> + [{K, D} || D <- Dups]; + {dups, Dups} when Dir == rev -> + [{K, D} || D <- lists:reverse(Dups)]; + _ -> + [{K, V}] + end + end, KVs1), + lists:foldl(fun({{Key, DocId}, Value}, WAccInner) -> + Callback(DocId, Key, Value, WAccInner) + end, WAcc, KVs2) + end, + + case Dir of + fwd -> + ebtree:range(Tx, Btree, StartKey, EndKey, Wrapper, Acc0); + rev -> + % Start/End keys swapped on purpose because ebtree + ebtree:reverse_range(Tx, Btree, EndKey, StartKey, Wrapper, Acc0) + end. + + +update_views(TxDb, Mrst, Docs) -> + #{ + tx := Tx + } = TxDb, + + % Collect update information + #{ + ids := IdMap, + views := ViewMaps, + delete_ref := DeleteRef + } = gather_update_info(Tx, Mrst, Docs), + + % Update the IdBtree + update_btree(Tx, Mrst#mrst.id_btree, IdMap, DeleteRef), + + % Update each view's BTree + lists:foreach(fun(View) -> + #mrview{ + id_num = ViewId, + btree = BTree + } = View, + + ViewMap = maps:get(ViewId, ViewMaps, #{}), + update_btree(Tx, BTree, ViewMap, DeleteRef) + end, Mrst#mrst.views). + + +open_id_tree(TxDb, Sig) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + Prefix = id_tree_prefix(DbPrefix, Sig), + TreeOpts = [ + {persist_fun, fun couch_views_fdb:persist_chunks/3}, + {cache_fun, create_cache_fun(id_tree)} + ], + ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts). + + +open_view_tree(TxDb, Sig, Lang, View) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + #mrview{ + id_num = ViewId + } = View, + Prefix = view_tree_prefix(DbPrefix, Sig, ViewId), + TreeOpts = [ + {collate_fun, couch_views_util:collate_fun(View)}, + {reduce_fun, make_reduce_fun(Lang, View)}, + {persist_fun, fun couch_views_fdb:persist_chunks/3}, + {cache_fun, create_cache_fun({view, ViewId})} + ], + View#mrview{ + btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts) + }. + + +get_order(id_btree) -> + min_order(config:get_integer("couch_views", "id_btree_node_size", 100)); +get_order(view_btree) -> + min_order(config:get_integer("couch_views", "view_btree_node_size", 100)). + + +min_order(V) when is_integer(V), V < 2 -> + 2; +min_order(V) when is_integer(V), V rem 2 == 0 -> + V; +min_order(V) -> + V + 1. + + +make_reduce_fun(_Lang, #mrview{}) -> + fun + (KVs, _ReReduce = false) -> + TotalSize = lists:foldl(fun({{K, _DocId}, V}, Acc) -> + KSize = couch_ejson_size:encoded_size(K), + VSize = case V of + {dups, Dups} -> + lists:foldl(fun(D, DAcc) -> + DAcc + couch_ejson_size:encoded_size(D) + end, 0, Dups); + _ -> + couch_ejson_size:encoded_size(V) + end, + KSize + VSize + Acc + end, 0, KVs), + {length(KVs), TotalSize}; + (KRs, _ReReduce = true) -> + lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) -> + {Count + CountAcc, Size + SizeAcc} + end, {0, 0}, KRs) + end. + + +create_cache_fun(TreeId) -> + CacheTid = case get(TreeId) of + undefined -> + Tid = ets:new(?MODULE, [protected, set]), + put(TreeId, {ebtree_cache, Tid}), + Tid; + {ebtree_cache, Tid} -> + Tid + end, + fun + (set, [Id, Node]) -> + true = ets:insert_new(CacheTid, {Id, Node}), + ok; + (clear, Id) -> + ets:delete(CacheTid, Id), + ok; + (get, Id) -> + case ets:lookup(CacheTid, Id) of + [{Id, Node}] -> Node; + [] -> undefined + end + end. + + +to_map_opts(Options) -> + Dir = case lists:keyfind(dir, 1, Options) of + {dir, D} -> D; + _ -> fwd + end, + + InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of + {inclusive_end, IE} -> IE; + _ -> true + end, + + StartKey = case lists:keyfind(start_key, 1, Options) of + {start_key, SK} -> SK; + false when Dir == fwd -> ebtree:min(); + false when Dir == rev -> ebtree:max() + end, + + EndKey = case lists:keyfind(end_key, 1, Options) of + {end_key, EK} -> EK; + false when Dir == fwd -> ebtree:max(); + false when Dir == rev -> ebtree:min() + end, + + {Dir, StartKey, EndKey, InclusiveEnd}. + + +gather_update_info(Tx, Mrst, Docs) -> + % A special token used to indicate that the row should be deleted + DeleteRef = erlang:make_ref(), + + AllDocIds = [DocId || #{id := DocId} <- Docs], + + BaseIdMap = lists:foldl(fun(DocId, Acc) -> + maps:put(DocId, DeleteRef, Acc) + end, #{}, AllDocIds), + + % Build the initial set of rows to delete + % ExistingViewKeys is a list of {DocId, [{ViewId, [Key | _]} | _]} + ExistingViewKeys = ebtree:lookup_multi(Tx, Mrst#mrst.id_btree, AllDocIds), + + % For each view, create an initial map that contains the + % list of keys to delete. The final result is a map of + % maps: + % #{ViewId => #{Key => DeleteRef}} + BaseViewMaps = lists:foldl(fun({DocId, ViewIdKeys}, ViewIdAcc1) -> + lists:foldl(fun({ViewId, Keys}, ViewIdAcc2) -> + OldViewMap = maps:get(ViewId, ViewIdAcc2, #{}), + NewViewMap = lists:foldl(fun(Key, ViewMapAcc) -> + maps:put({Key, DocId}, DeleteRef, ViewMapAcc) + end, OldViewMap, Keys), + maps:put(ViewId, NewViewMap, ViewIdAcc2) + end, ViewIdAcc1, ViewIdKeys) + end, #{}, ExistingViewKeys), + + % Build our base accumulator + InfoAcc1 = #{ + ids => BaseIdMap, + views => BaseViewMaps, + delete_ref => DeleteRef + }, + + % Insert results from each document into the map of + % maps which leaves us with a final shape of: + % #{ViewId => #{Key => Value}} + % where Value may be a copy of `DeleteRef` which flags + % that the Key should be deleted from the view. + lists:foldl(fun(Doc, InfoAcc2) -> + insert_doc(Mrst, Doc, InfoAcc2) + end, InfoAcc1, Docs). + + +insert_doc(_Mrst, #{deleted := true} = _Doc, InfoAcc) -> + InfoAcc; +insert_doc(Mrst, Doc, InfoAcc0) -> + #{ + id := DocId, + results := Results + } = Doc, + + FinalAcc = lists:foldl(fun({View, RawNewRows}, {IdKeyAcc, InfoAcc1}) -> + #mrview{ + id_num = ViewId + } = View, + #{ + views := ViewMaps + } = InfoAcc1, + + DedupedRows = dedupe_rows(View, RawNewRows), + IdKeys = lists:usort([K || {K, _V} <- DedupedRows]), + + OldViewMap = maps:get(ViewId, ViewMaps, #{}), + NewViewMap = lists:foldl(fun({K, V}, ViewMapAcc) -> + maps:put({K, DocId}, V, ViewMapAcc) + end, OldViewMap, DedupedRows), + + {[{ViewId, IdKeys} | IdKeyAcc], InfoAcc1#{ + views := maps:put(ViewId, NewViewMap, ViewMaps) + }} + end, {[], InfoAcc0}, lists:zip(Mrst#mrst.views, Results)), + + {IdRows, #{ids := IdMap} = InfoAcc2} = FinalAcc, + + % Don't store a row in the id_btree if it hasn't got any + % keys that will need to be deleted. + NonEmptyRows = [1 || {_ViewId, Rows} <- IdRows, Rows /= []], + if length(NonEmptyRows) == 0 -> InfoAcc2; true -> + InfoAcc2#{ids := maps:put(DocId, IdRows, IdMap)} + end. + + +update_btree(Tx, BTree, Map, DeleteRef) -> + {ToRemove, ToInsert} = maps:fold(fun(Key, Value, {Keys, Rows}) -> + case Value of + DeleteRef -> {[Key | Keys], Rows}; + _ -> {Keys, [{Key, Value} | Rows]} + end + end, {[], []}, Map), + + lists:foreach(fun(Key) -> + ebtree:delete(Tx, BTree, Key) + end, ToRemove), + + ebtree:insert_multi(Tx, BTree, ToInsert). + + +dedupe_rows(View, KVs0) -> + CollateFun = couch_views_util:collate_fun(View), + KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) -> + case CollateFun({KeyA, <<>>}, {KeyB, <<>>}) of + lt -> true; + eq -> ValA =< ValB; + gt -> false + end + end, KVs0), + dedupe_rows_int(CollateFun, KVs1). + + +dedupe_rows_int(_CollateFun, []) -> + []; + +dedupe_rows_int(_CollateFun, [KV]) -> + [KV]; + +dedupe_rows_int(CollateFun, [{K1, V1} | RestKVs]) -> + RestDeduped = dedupe_rows_int(CollateFun, RestKVs), + case RestDeduped of + [{K2, V2} | RestRestDeduped] -> + case CollateFun({K1, <<>>}, {K2, <<>>}) of + eq -> [{K1, combine_vals(V1, V2)} | RestRestDeduped]; + _ -> [{K1, V1} | RestDeduped] + end; + [] -> + [{K1, V1}] + end. + + +combine_vals(V1, {dups, V2}) -> + {dups, [V1 | V2]}; +combine_vals(V1, V2) -> + {dups, [V1, V2]}. + + +id_tree_prefix(DbPrefix, Sig) -> + Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE}, + erlfdb_tuple:pack(Key, DbPrefix). + + +view_tree_prefix(DbPrefix, Sig, ViewId) -> + Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ROW_TREES, ViewId}, + erlfdb_tuple:pack(Key, DbPrefix). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +dedupe_basic_test() -> + View = #mrview{}, + ?assertEqual([{1, 1}], dedupe_rows(View, [{1, 1}])). + +dedupe_simple_test() -> + View = #mrview{}, + ?assertEqual([{1, {dups, [1, 2]}}], dedupe_rows(View, [{1, 1}, {1, 2}])). + +-endif. diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl index ba9fadb51..7e5466eb8 100644 --- a/src/couch_views/src/couch_views_updater.erl +++ b/src/couch_views/src/couch_views_updater.erl @@ -87,16 +87,17 @@ write_doc(Db, #doc{deleted = Deleted} = Doc) -> }, lists:foreach(fun(DDoc) -> - {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + {ok, Mrst0} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + Mrst1 = couch_views_trees:open(Db, Mrst0), - case should_index_doc(Doc, Mrst) of + case should_index_doc(Doc, Mrst1) of true -> - {Mrst1, Result1} = couch_views_indexer:map_docs(Mrst, Result0), - DocNumber = couch_views_indexer:write_docs(Db, Mrst1, + {Mrst2, Result1} = couch_views_indexer:map_docs(Mrst1, Result0), + DocNumber = couch_views_indexer:write_docs(Db, Mrst2, Result1, State), - couch_views_plugin:after_interactive_write(Db, Mrst1, + couch_views_plugin:after_interactive_write(Db, Mrst2, Result1, DocNumber), - couch_eval:release_map_context(Mrst1#mrst.qserver); + couch_eval:release_map_context(Mrst2#mrst.qserver); false -> ok end diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl index 6298acf33..1e3e4beef 100644 --- a/src/couch_views/src/couch_views_util.erl +++ b/src/couch_views/src/couch_views_util.erl @@ -15,6 +15,7 @@ -export([ ddoc_to_mrst/2, + collate_fun/1, validate_args/1, validate_args/2, is_paginated/1, @@ -82,6 +83,40 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> {ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}. +collate_fun(View) -> + #mrview{ + options = Options + } = View, + case couch_util:get_value(<<"collation">>, Options) of + <<"raw">> -> fun collate_raw/2; + _ -> fun collate_rows/2 + end. + + +collate_raw(A, A) -> eq; +collate_raw(A, B) when A < B -> lt; +collate_raw(A, B) when A > B -> gt. + + +collate_rows({KeyA, DocIdA}, {KeyB, DocIdB}) -> + case couch_ejson_compare:less(KeyA, KeyB) of + N when N < 0 -> lt; + 0 when DocIdA < DocIdB -> lt; + 0 when DocIdA == DocIdB -> eq; + 0 -> gt; % when DocIdA > DocIdB + N when N > 0 -> gt + end; + +collate_rows(KeyA, KeyB) -> + % When collating reduce group keys they don't + % come with a docid. + case couch_ejson_compare:less(KeyA, KeyB) of + N when N < 0 -> lt; + 0 -> eq; + N when N > 0 -> gt + end. + + validate_args(Args) -> validate_args(Args, []). diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl index e4dcdceea..54048c968 100644 --- a/src/couch_views/test/couch_views_cleanup_test.erl +++ b/src/couch_views/test/couch_views_cleanup_test.erl @@ -302,7 +302,7 @@ view_has_data(Db, DDoc) -> SigKey = erlfdb_tuple:pack(SigKeyTuple, DbPrefix), SigVal = erlfdb:wait(erlfdb:get(Tx, SigKey)), - RangeKeyTuple = {?DB_VIEWS, ?VIEW_DATA, Sig}, + RangeKeyTuple = {?DB_VIEWS, ?VIEW_TREES, Sig}, RangeKey = erlfdb_tuple:pack(RangeKeyTuple, DbPrefix), Range = erlfdb:wait(erlfdb:get_range_startswith(Tx, RangeKey)), diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 86c0a8195..75be2459f 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -126,13 +126,12 @@ updated_docs_are_reindexed(Db) -> % Check that our id index is updated properly % as well. DbName = fabric2_db:name(Db), - {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), - Sig = Mrst#mrst.sig, + {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc), fabric2_fdb:transactional(Db, fun(TxDb) -> - ?assertMatch( - [{0, 1, _, [1]}], - couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) - ) + #{tx := Tx} = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>), + ?assertEqual({<<"0">>, [{1, []}, {0, [1]}]}, IdRow) end). @@ -160,13 +159,12 @@ updated_docs_without_changes_are_reindexed(Db) -> % Check fdb directly to make sure we've also % removed the id idx keys properly. DbName = fabric2_db:name(Db), - {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), - Sig = Mrst#mrst.sig, + {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc), fabric2_fdb:transactional(Db, fun(TxDb) -> - ?assertMatch( - [{0, 1, _, [0]}], - couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) - ) + #{tx := Tx} = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>), + ?assertEqual({<<"0">>, [{1, []}, {0, [0]}]}, IdRow) end). @@ -208,10 +206,12 @@ deleted_docs_are_unindexed(Db) -> % Check fdb directly to make sure we've also % removed the id idx keys properly. DbName = fabric2_db:name(Db), - {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), - Sig = Mrst#mrst.sig, + {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc), fabric2_fdb:transactional(Db, fun(TxDb) -> - ?assertEqual([], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>)) + #{tx := Tx} = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>), + ?assertEqual(false, IdRow) end). @@ -296,11 +296,9 @@ fewer_multipe_identical_keys_from_same_doc(Db) -> handle_size_key_limits(Db) -> ok = meck:new(config, [passthrough]), - ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> - case Section == "couch_views" andalso Key == "key_size_limit" of - true -> 15; - _ -> Default - end + ok = meck:expect(config, get_integer, fun + ("couch_views", "key_size_limit", _Default) -> 15; + (_Section, _Key, Default) -> Default end), DDoc = create_ddoc(multi_emit_key_limit), @@ -328,11 +326,9 @@ handle_size_key_limits(Db) -> handle_size_value_limits(Db) -> ok = meck:new(config, [passthrough]), - ok = meck:expect(config, get_integer, fun(Section, _, Default) -> - case Section of - "couch_views" -> 15; - _ -> Default - end + ok = meck:expect(config, get_integer, fun + ("couch_views", "value_size_limit", _Default) -> 15; + (_Section, _Key, Default) -> Default end), DDoc = create_ddoc(multi_emit_key_limit), @@ -386,12 +382,6 @@ multiple_design_docs(Db) -> end) end, - % This is how we check that no index updates took place - meck:new(couch_views_fdb, [passthrough]), - meck:expect(couch_views_fdb, write_doc, fun(TxDb, Sig, ViewIds, Doc) -> - meck:passthrough([TxDb, Sig, ViewIds, Doc]) - end), - DDoc1 = create_ddoc(simple, <<"_design/bar1">>), DDoc2 = create_ddoc(simple, <<"_design/bar2">>), @@ -399,7 +389,7 @@ multiple_design_docs(Db) -> {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), ?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc1, ?MAP_FUN1)), - % Because run_query/3 can return, and unsurbscribe from the job, + % Because run_query/3 can return, and unsubscribe from the job, % before it actually finishes, ensure we wait for the job to % finish so we get a deterministic setup every time. JobId = get_job_id(Db, DDoc1), @@ -413,10 +403,16 @@ multiple_design_docs(Db) -> Cleanup(), - meck:reset(couch_views_fdb), + % Assert that no updates are applied + meck:new(couch_views_fdb, [passthrough]), + meck:expect(couch_views_trees, update_views, fun(TxDb, Mrst, Docs) -> + case Docs of + [] -> meck:passthrough([TxDb, Mrst, Docs]); + [_ | _] -> erlang:error(update_triggered) + end + end), ?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc2, ?MAP_FUN1)), ?assertEqual(ok, wait_job_finished(JobId, 5000)), - ?assertEqual(0, meck:num_calls(couch_views_fdb, write_doc, 4)), DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl index 18fa9e628..cc2fe39fc 100644 --- a/src/couch_views/test/couch_views_size_test.erl +++ b/src/couch_views/test/couch_views_size_test.erl @@ -193,16 +193,21 @@ cleanup({Ctx, Db}) -> create_transition_tests({_Ctx, Db}) -> - Transitions = generate_transitions(), - Single = lists:flatmap(fun(T) -> - Name = lists:flatten(io_lib:format("single ~s", [tname(T)])), - [{Name, fun() -> check_single_transition(Db, T) end}] - end, lists:sort(Transitions)), - Multi = lists:flatmap(fun(T) -> - Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])), - [{Name, fun() -> check_multi_transition(Db, T) end}] - end, lists:sort(group(shuffle(Transitions)))), - subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi). + try + throw(disabled), + Transitions = generate_transitions(), + Single = lists:flatmap(fun(T) -> + Name = lists:flatten(io_lib:format("single ~s", [tname(T)])), + [{Name, fun() -> check_single_transition(Db, T) end}] + end, lists:sort(Transitions)), + Multi = lists:flatmap(fun(T) -> + Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])), + [{Name, fun() -> check_multi_transition(Db, T) end}] + end, lists:sort(group(shuffle(Transitions)))), + subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi) + catch throw:disabled -> + [{"Disabled", fun() -> ok end}] + end. check_single_transition(Db, {Set1, Set2, Transition}) -> diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl index 89c341a17..aadbe940b 100644 --- a/src/couch_views/test/couch_views_updater_test.erl +++ b/src/couch_views/test/couch_views_updater_test.erl @@ -69,7 +69,7 @@ foreach_setup() -> Docs = make_docs(3), fabric2_db:update_docs(Db, Docs), - meck:new(couch_views_fdb, [passthrough]), + meck:new(couch_views_trees, [passthrough]), {Db, DDoc}. @@ -135,7 +135,7 @@ includes_design_docs({Db, _}) -> handle_erlfdb_errors({Db, _}) -> - meck:expect(couch_views_fdb, write_doc, fun(_, _, _, _) -> + meck:expect(couch_views_trees, update_views, fun(_, _, _) -> error({erlfdb_error, 1009}) end), ?assertError({erlfdb_error, 1009}, fabric2_db:update_docs(Db, [doc(4)])). diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 44ae22093..411f4af65 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -31,6 +31,7 @@ -include_lib("fabric/include/fabric.hrl"). -include("mango_cursor.hrl"). +-include("mango_idx_view.hrl"). create(Db, Indexes, Selector, Opts) -> @@ -85,16 +86,15 @@ explain(Cursor) -> maybe_replace_max_json([]) -> []; +maybe_replace_max_json([?MAX_JSON_OBJ | T]) -> + [<<"">> | maybe_replace_max_json(T)]; + +maybe_replace_max_json([H | T]) -> + [H | maybe_replace_max_json(T)]; + maybe_replace_max_json(?MAX_STR) -> <<"">>; -maybe_replace_max_json([H | T] = EndKey) when is_list(EndKey) -> - MAX_VAL = couch_views_encoding:max(), - H1 = if H == MAX_VAL -> <<"">>; - true -> H - end, - [H1 | maybe_replace_max_json(T)]; - maybe_replace_max_json(EndKey) -> EndKey. diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index f80cc217b..a73d82ae6 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -34,6 +34,7 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). -include("mango_idx.hrl"). +-include("mango_idx_view.hrl"). validate_new(#idx{}=Idx, _Db) -> @@ -131,7 +132,7 @@ is_usable(Idx, Selector, SortFields) -> % and the selector is not a text search (so requires a text index) RequiredFields = columns(Idx), - % sort fields are required to exist in the results so + % sort fields are required to exist in the results so % we don't need to check the selector for these RequiredFields1 = ordsets:subtract(lists:usort(RequiredFields), lists:usort(SortFields)), @@ -182,11 +183,11 @@ start_key([{'$eq', Key, '$eq', Key} | Rest]) -> end_key([]) -> - [couch_views_encoding:max()]; + [?MAX_JSON_OBJ]; end_key([{_, _, '$lt', Key} | Rest]) -> case mango_json:special(Key) of true -> - [couch_views_encoding:max()]; + [?MAX_JSON_OBJ]; false -> [Key | end_key(Rest)] end; diff --git a/src/mango/src/mango_idx_view.hrl b/src/mango/src/mango_idx_view.hrl new file mode 100644 index 000000000..d0f46748b --- /dev/null +++ b/src/mango/src/mango_idx_view.hrl @@ -0,0 +1,13 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(MAX_JSON_OBJ, {[{<<255, 255, 255, 255>>, <<>>}]}). \ No newline at end of file -- cgit v1.2.1 From a8cebfa6d3ddbfb6419c83ca7b3b607da330de1a Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 3 Sep 2020 12:05:45 -0500 Subject: Reimplement db wide view size tracking --- src/couch_views/src/couch_views_fdb.erl | 30 +- src/couch_views/src/couch_views_trees.erl | 27 +- src/couch_views/test/couch_views_size_test.erl | 829 +++++++++---------------- 3 files changed, 357 insertions(+), 529 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index e813f2b61..f238a8f18 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -25,7 +25,8 @@ list_signatures/1, clear_index/2, - persist_chunks/3 + persist_chunks/3, + update_kv_size/4 ]). -ifdef(TEST). @@ -135,6 +136,11 @@ clear_index(Db, Signature) -> db_prefix := DbPrefix } = Db, + % Get view size to remove from global counter + SizeTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Signature}, + SizeKey = erlfdb_tuple:pack(SizeTuple, DbPrefix), + ViewSize = ?bin2uint(erlfdb:wait(erlfdb:get(Tx, SizeKey))), + % Clear index info keys Keys = [ {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Signature}, @@ -154,7 +160,12 @@ clear_index(Db, Signature) -> % Clear tree data TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature}, TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix), - erlfdb:clear_range_startswith(Tx, TreePrefix). + erlfdb:clear_range_startswith(Tx, TreePrefix), + + % Decrement db wide view size counter + DbSizeTuple = {?DB_STATS, <<"sizes">>, <<"views">>}, + DbSizeKey = erlfdb_tuple:pack(DbSizeTuple, DbPrefix), + erlfdb:add(Tx, DbSizeKey, -ViewSize). persist_chunks(Tx, set, [Key, Value]) -> @@ -181,6 +192,21 @@ persist_chunks(Tx, clear, Key) -> erlfdb:clear_range_startswith(Tx, Key). +update_kv_size(TxDb, Sig, OldSize, NewSize) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + ViewTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig}, + ViewKey = erlfdb_tuple:pack(ViewTuple, DbPrefix), + erlfdb:set(Tx, ViewKey, ?uint2bin(NewSize)), + + DbTuple = {?DB_STATS, <<"sizes">>, <<"views">>}, + DbKey = erlfdb_tuple:pack(DbTuple, DbPrefix), + erlfdb:add(Tx, DbKey, NewSize - OldSize). + + seq_key(DbPrefix, Sig) -> Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}, erlfdb_tuple:pack(Key, DbPrefix). diff --git a/src/couch_views/src/couch_views_trees.erl b/src/couch_views/src/couch_views_trees.erl index 0f680a623..7ce350506 100644 --- a/src/couch_views/src/couch_views_trees.erl +++ b/src/couch_views/src/couch_views_trees.erl @@ -127,6 +127,12 @@ update_views(TxDb, Mrst, Docs) -> tx := Tx } = TxDb, + % Get initial KV size + OldKVSize = lists:foldl(fun(View, SizeAcc) -> + {_, Size} = ebtree:full_reduce(Tx, View#mrview.btree), + SizeAcc + Size + end, 0, Mrst#mrst.views), + % Collect update information #{ ids := IdMap, @@ -146,7 +152,15 @@ update_views(TxDb, Mrst, Docs) -> ViewMap = maps:get(ViewId, ViewMaps, #{}), update_btree(Tx, BTree, ViewMap, DeleteRef) - end, Mrst#mrst.views). + end, Mrst#mrst.views), + + % Get new KV size after update + NewKVSize = lists:foldl(fun(View, SizeAcc) -> + {_, Size} = ebtree:full_reduce(Tx, View#mrview.btree), + SizeAcc + Size + end, 0, Mrst#mrst.views), + + couch_views_fdb:update_kv_size(TxDb, Mrst#mrst.sig, OldKVSize, NewKVSize). open_id_tree(TxDb, Sig) -> @@ -201,15 +215,16 @@ make_reduce_fun(_Lang, #mrview{}) -> (KVs, _ReReduce = false) -> TotalSize = lists:foldl(fun({{K, _DocId}, V}, Acc) -> KSize = couch_ejson_size:encoded_size(K), - VSize = case V of + Acc + case V of {dups, Dups} -> lists:foldl(fun(D, DAcc) -> - DAcc + couch_ejson_size:encoded_size(D) + VSize = couch_ejson_size:encoded_size(D), + DAcc + KSize + VSize end, 0, Dups); _ -> - couch_ejson_size:encoded_size(V) - end, - KSize + VSize + Acc + VSize = couch_ejson_size:encoded_size(V), + KSize + VSize + end end, 0, KVs), {length(KVs), TotalSize}; (KRs, _ReReduce = true) -> diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl index cc2fe39fc..e69b5b292 100644 --- a/src/couch_views/test/couch_views_size_test.erl +++ b/src/couch_views/test/couch_views_size_test.erl @@ -16,162 +16,38 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). --include_lib("fabric/include/fabric2.hrl"). -include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). -% N.B., we should move to couch_ejson_size instead -% of erlang:external_size -% -% to calculate view size: -% total = 0 -% for (fdb_k, fdb_v) in VIEW_MAP_RANGE: -% {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v), -% UserKey = couch_views_encoding:decode(EncUserKey), -% UserVal = couch_views_encoding:decode(EncUserVal), -% total += erlang:external_size(UserKey), -% total += erlang:external_size(UserVal) -% -% Our goal in checking the size calculations is that we cover -% as much of the possible key mutation space as possible while -% not relying on fuzzing out the edge cases. Conceptually we have -% two sets of keys E and U. E is keys as currently exist in the -% view, and U is the new set of keys corresponding to an update. -% -% Both sets E and U have the same possible set of state variables: -% -% 1. N unique keys, where 0 =< N =< infinity -% 2. D keys with duplicates, where 0 =< D =< N, -% 3. R repeats for each member of D, for 2 =< R =< infinity -% -% Given two sets S1 and S2, we then have a set of transition variables: -% -% 1. deltaN - shared unique keys, where 0 =< deltaN =< N -% 2. deltaD - shared duplicates, where 0 =< deltaD =< N -% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity -% -% To search our state transition space, we can create two functions to -% first define our start and end states, and for each transition we have -% a function that defines the shared overlap between states. -% -% Given a list of transitions are checks then become simple in that -% we can iterate over each transition checking that our index is valid -% after each one. Index validation will purely look at the existing -% state of the index in fdb and validate correctness. - --define(NUM_SINGLE_TESTS, 100). --define(NUM_MULTI_TESTS, 100). - --define(N_DOMAIN, [0, 1, 2, 5]). --define(D_DOMAIN, [0, 1, 2, 5]). --define(R_DOMAIN, [2, 4]). - --define(DELTA_N_DOMAIN, [0, 1, 2, 5]). --define(DELTA_D_DOMAIN, [0, 1, 2, 5]). --define(DELTA_R_DOMAIN, [1, 2, 4]). - - -generate_sets() -> - permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) -> - % We can't have more duplicates than total keys - case D > N of - true -> throw(skip); - false -> ok - end, - - % Only include one of the repeat values - % for our zero sets - case D == 0 of - true when R == 2 -> ok; - true -> throw(skip); - false -> ok - end, - - % Replace R with a sentinel value for sanity - % when there are no dupes to have repeats - ActualR = if D == 0 -> 0; true -> R end, - - {N, D, ActualR} - end). - - -generate_transitions() -> - Sets = generate_sets(), - Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets], - lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) -> - Filter = fun(DeltaN, DeltaD, DeltaR) -> - % Can't share more keys than the smaller of the - % two sets - case DeltaN > min(N1, N2) of - true -> throw(skip); - false -> ok - end, - - % For DeltaD == 0, all combinations of DeltaD and - % DeltaR are equivalent tests - case DeltaN == 0 of - true when DeltaD == 0, DeltaR == 1 -> ok; - true -> throw(skip); - false -> ok - end, - - % Can't share more dupes than exist in either set - % or the total number of shared keys - case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of - true -> throw(skip); - false -> ok - end, - - % For DeltaD == 0, all DeltaR correspond to the - % same test so only include one instance - case DeltaD == 0 of - true when DeltaR == 1 -> ok; - true -> throw(skip); - false -> ok - end, - - % If we have more non-repeated keys in our - % transition than there's "room" for in the target - % set it isn't a valid test case. - TransitionNonRepeats = DeltaN - DeltaD, - TargetNonRepeats = N2 - D2, - case TransitionNonRepeats > TargetNonRepeats of - true -> throw(skip); - false -> ok - end, - - {S1, S2, {DeltaN, DeltaD, DeltaR}} - end, - permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter) - end, Pairs). - - -permute(NList, DList, RList, Filter) -> - % Technically we could call into Filter in each - % outer loops to conditionally skip inner loops. - % If someone comes along looking to speed up the - % fixture setup time, this would likely be an - % easy win. - lists:foldl(fun(N, NAcc) -> - lists:foldl(fun(D, DAcc) -> - lists:foldl(fun(R, RAcc) -> - try - [Filter(N, D, R) | RAcc] - catch throw:skip -> - RAcc - end - end, DAcc, RList) - end, NAcc, DList) - end, [], NList). - - -row_transition_test_() -> + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). + + +indexer_test_() -> { - "Test view size tracking", + "Test view indexing", { setup, fun setup/0, fun cleanup/1, - fun create_transition_tests/1 + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(empty_view), + ?TDEF_FE(single_doc), + ?TDEF_FE(multiple_docs), + ?TDEF_FE(update_no_size_change), + ?TDEF_FE(update_increases_size), + ?TDEF_FE(update_decreases_size), + ?TDEF_FE(deleting_docs_decreases_size), + ?TDEF_FE(multi_identical_keys_count_twice), + ?TDEF_FE(multiple_design_docs), + ?TDEF_FE(multiple_identical_design_docs) + ] + } } }. @@ -183,387 +59,298 @@ setup() -> couch_js, couch_views ]), - {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), - {Ctx, Db}. + Ctx. -cleanup({Ctx, Db}) -> - ok = fabric2_db:delete(fabric2_db:name(Db), []), +cleanup(Ctx) -> test_util:stop_couch(Ctx). -create_transition_tests({_Ctx, Db}) -> - try - throw(disabled), - Transitions = generate_transitions(), - Single = lists:flatmap(fun(T) -> - Name = lists:flatten(io_lib:format("single ~s", [tname(T)])), - [{Name, fun() -> check_single_transition(Db, T) end}] - end, lists:sort(Transitions)), - Multi = lists:flatmap(fun(T) -> - Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])), - [{Name, fun() -> check_multi_transition(Db, T) end}] - end, lists:sort(group(shuffle(Transitions)))), - subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi) - catch throw:disabled -> - [{"Disabled", fun() -> ok end}] - end. - - -check_single_transition(Db, {Set1, Set2, Transition}) -> - clear_views(Db), - InitKVs = init_set(Set1, [a, b, c, d, e]), - CommonKVs = reduce_set(Transition, InitKVs), - FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]), - {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}), - {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings), - - Sig = couch_uuids:random(), - DocId = couch_uuids:random(), - - fabric2_fdb:transactional(Db, fun(TxDb) -> - write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)]) - end), - - fabric2_fdb:transactional(Db, fun(TxDb) -> - write_docs(TxDb, Sig, [make_doc(DocId, FinalJSONKVs)]) - end), - - validate_index(Db, Sig, #{DocId => FinalJSONKVs}). - - -check_multi_transition(Db, Transitions) -> - clear_views(Db), - - {Docs, IdMap} = lists:mapfoldl(fun({Set1, Set2, Transition}, IdMapAcc) -> - DocId = couch_uuids:random(), - InitKVs = init_set(Set1, [a, b, c, d, e]), - CommonKVs = reduce_set(Transition, InitKVs), - FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]), - {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}), - {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings), - InitDoc = make_doc(DocId, InitJSONKVs), - FinalDoc = make_doc(DocId, FinalJSONKVs), - {{InitDoc, FinalDoc}, maps:put(DocId, FinalJSONKVs, IdMapAcc)} - end, #{}, Transitions), - - {InitDocs, FinalDocs} = lists:unzip(Docs), - - Sig = couch_uuids:random(), - - fabric2_fdb:transactional(Db, fun(TxDb) -> - write_docs(TxDb, Sig, InitDocs) - end), - - fabric2_fdb:transactional(Db, fun(TxDb) -> - write_docs(TxDb, Sig, FinalDocs) - end), - - validate_index(Db, Sig, IdMap). - - -clear_views(Db) -> - fabric2_fdb:transactional(Db, fun(TxDb) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix), - erlfdb:clear_range(Tx, Start, End), - - GlobalKey = {?DB_STATS, <<"sizes">>, <<"views">>}, - BinGlobalKey = erlfdb_tuple:pack(GlobalKey, DbPrefix), - erlfdb:set(Tx, BinGlobalKey, ?uint2bin(0)) - end). - - -write_docs(TxDb, Sig, Docs) -> - Mrst = #mrst{ - sig = Sig, - views = [#mrview{ - id_num = 1 - }] +foreach_setup() -> + config:set("couch_views", "view_btree_node_size", "4", false), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + config:delete("couch_views", "change_limit"), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_view(Db) -> + DDoc = create_ddoc(), + ?assertEqual(0, view_size(Db)), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual(0, view_size(Db)). + + +single_doc(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row: key: 0, row: 0 + % Bytes: key: 1, row: 1 + % Total: 1 + 1 = 2 + ?assertEqual(2, view_size(Db)). + + +multiple_docs(Db) -> + DDoc = create_ddoc(), + Docs = [doc(I) || I <- lists:seq(0, 49)], + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_docs(Db, Docs, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Rows 0-9: 1 + 1 = 2 + % Rows 10->49: 2 + 2 = 4 + % 10 * 2 + 40 * 4 = 180 + ?assertEqual(180, view_size(Db)). + + +update_no_size_change(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(2, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 1}]} }, - IdxState = #{ - last_seq => <<"foo">> + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row became: key: 1, val: 1 + % 1 + 1 = 2 so samesies + ?assertEqual(2, view_size(Db)). + + +update_increases_size(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(2, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 10}]} }, - couch_views_indexer:write_docs(TxDb, Mrst, Docs, IdxState). - - -validate_index(Db, Sig, JSONRows) -> - #{ - db_prefix := DbPrefix - } = Db, - Rows = fabric2_fdb:transactional(Db, fun(TxDb) -> - #{ - tx := Tx - } = TxDb, - {Start, End} = erlfdb_tuple:range({?DB_VIEWS}, DbPrefix), - erlfdb:get_range(Tx, Start, End) - end), - - InitAcc = #{ - row_count => 0, - kv_size => 0, - ids => #{}, - rows => [] + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row became: key: 10, val: 10 + % 2 + 2 = 4 + ?assertEqual(4, view_size(Db)). + + +update_decreases_size(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(10), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row: key: 10, val: 10 + % 2 + 2 = 4 + ?assertEqual(4, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 0}]} }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), - MapData = lists:foldl(fun({Key, Value}, Acc) -> - case erlfdb_tuple:unpack(Key, DbPrefix) of - {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig} -> - ?assertEqual(<<"foo">>, Value), - Acc; - {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Sig, 1} -> - maps:put(row_count, ?bin2uint(Value), Acc); - {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig, 1} -> - maps:put(kv_size, ?bin2uint(Value), Acc); - {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, 1} -> - [ - TotalKeys, TotalSize, UniqueKeys - ] = couch_views_encoding:decode(Value), - maps:update_with(ids, fun(Ids) -> - false = maps:is_key(DocId, Ids), - maps:put(DocId, {TotalKeys, TotalSize, UniqueKeys}, Ids) - end, Acc); - {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_MAP_RANGE, 1, MapKey, _DupeId} -> - {EncKey, DocId} = MapKey, - {UserKey, UserVal} = erlfdb_tuple:unpack(Value), - - UserJsonKey = couch_views_encoding:decode(UserKey), - UserJsonVal = couch_views_encoding:decode(UserVal), - - ?assertEqual( - EncKey, - couch_views_encoding:encode(UserJsonKey, key) - ), - - maps:update_with(rows, fun(RAcc) -> - [{DocId, UserJsonKey, UserJsonVal} | RAcc] - end, Acc) - end - end, InitAcc, Rows), - - #{ - row_count := RowCount, - kv_size := KVSize, - ids := MapIds, - rows := MapRows - } = MapData, - - SumFun = fun(_DocId, {TotalKVs, TotalSize, _UniqueKeys}, {KVAcc, SAcc}) -> - {KVAcc + TotalKVs, SAcc + TotalSize} - end, - {SumKVCount, SumKVSize} = maps:fold(SumFun, {0, 0}, MapIds), - ?assertEqual(RowCount, length(MapRows)), - ?assertEqual(RowCount, SumKVCount), - ?assertEqual(KVSize, SumKVSize), - ?assert(KVSize >= 0), - - fabric2_fdb:transactional(Db, fun(TxDb) -> - GlobalSize = get_global_size(TxDb), - ?assertEqual(KVSize, GlobalSize), - - ViewSize = couch_views_fdb:get_kv_size(TxDb, #mrst{sig = Sig}, 1), - ?assertEqual(KVSize, ViewSize) - end), - - % Compare our raw JSON rows to what was indexed - IdsFromJSONRows = maps:fold(fun(DocId, DocRows, IdAcc) -> - FinalAcc = lists:foldl(fun({JsonKey, JsonVal}, {CAcc, SAcc, UAcc}) -> - KeySize = erlang:external_size(JsonKey), - ValSize = erlang:external_size(JsonVal), - NewUnique = lists:usort([JsonKey | UAcc]), - {CAcc + 1, SAcc + KeySize + ValSize, NewUnique} - end, {0, 0, []}, DocRows), - if FinalAcc == {0, 0, []} -> IdAcc; true -> - maps:put(DocId, FinalAcc, IdAcc) - end - end, #{}, JSONRows), - ?assertEqual(MapIds, IdsFromJSONRows), - - % Compare the found id entries to our row data - IdsFromMapRows = lists:foldl(fun({DocId, JsonKey, JsonVal}, Acc) -> - KeySize = erlang:external_size(JsonKey), - ValSize = erlang:external_size(JsonVal), - Default = {1, KeySize + ValSize, [JsonKey]}, - maps:update_with(DocId, fun({TotalKVs, TotalSize, UniqueKeys}) -> - NewUnique = lists:usort([JsonKey | UniqueKeys]), - {TotalKVs + 1, TotalSize + KeySize + ValSize, NewUnique} - end, Default, Acc) - end, #{}, MapRows), - ?assertEqual(MapIds, IdsFromMapRows). - - -make_doc(DocId, []) -> - case rand:uniform() < 0.5 of - true -> - #{ - id => DocId, - deleted => true, - results => [[]] - }; - false -> - #{ - id => DocId, - deleted => false, - results => [[]] - } - end; -make_doc(DocId, Results) -> - #{ - id => DocId, - deleted => false, - results => [Results] - }. + % Row became: key: 0, val: 0 + % 1 + 1 = 2 + ?assertEqual(2, view_size(Db)). + + +deleting_docs_decreases_size(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(2, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + deleted = true, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, []} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(0, view_size(Db)). -get_global_size(TxDb) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - GlobalKey = {?DB_STATS, <<"sizes">>, <<"views">>}, - BinGlobalKey = erlfdb_tuple:pack(GlobalKey, DbPrefix), - ?bin2uint(erlfdb:wait(erlfdb:get(Tx, BinGlobalKey))). - - -init_set({N, D, R}, Labels) -> - {Dupes, RestLabels} = fill_keys(D, Labels, []), - {Unique, _} = fill_keys(N - D, RestLabels, []), - % Sanity assertions - N = length(Unique) + length(Dupes), - D = length(Dupes), - {Unique, [{Key, R} || Key <- Dupes]}. - - -reduce_set({DeltaN, DeltaD, DeltaR}, {Unique, Dupes}) -> - NewDupes = lists:sublist(Dupes, DeltaD), - NewUnique = lists:sublist(Unique, DeltaN - DeltaD), - {NewUnique, [{Key, DeltaR} || {Key, _} <- NewDupes]}. - - -fill_set({N, D, R}, {Unique, Dupes}, Labels) -> - AddDupes = D - length(Dupes), - {NewDupes, RestLabels} = fill_keys(AddDupes, Labels, Dupes), - - AddUnique = N - length(Unique) - length(NewDupes), - {NewUnique, _} = fill_keys(AddUnique, RestLabels, Unique), - % Sanity assertions - N = length(NewUnique) + length(NewDupes), - D = length(NewDupes), - {NewUnique, lists:map(fun(Dupe) -> - case Dupe of - {_, _} -> Dupe; - A when is_atom(A) -> {A, R} - end - end, NewDupes)}. - - -fill_keys(0, Labels, Acc) -> - {Acc, Labels}; -fill_keys(Count, [Label | RestLabels], Acc) when Count > 0 -> - fill_keys(Count - 1, RestLabels, [Label | Acc]). - - -unlabel({Unique, Dupes}, Bindings) -> - lists:foldl(fun(Item, {KVAcc, BindingsAcc}) -> - {KVs, NewBindingsAcc} = unlabel_item(Item, BindingsAcc), - {KVs ++ KVAcc, NewBindingsAcc} - end, {[], Bindings}, Unique ++ Dupes). - - -unlabel_item(Label, Bindings) when is_atom(Label) -> - NewBindings = maybe_bind(Label, Bindings), - KV = maps:get(Label, NewBindings), - {[KV], NewBindings}; -unlabel_item({Label, Count}, Bindings) when is_atom(Label), is_integer(Count) -> - NewBindings = maybe_bind(Label, Bindings), - {K, _} = KV = maps:get(Label, NewBindings), - ToAdd = lists:map(fun(_) -> - {K, gen_value()} - end, lists:seq(1, Count - 1)), - {[KV | ToAdd], NewBindings}. - - -maybe_bind(Label, Bindings) -> - case maps:is_key(Label, Bindings) of - true -> - case rand:uniform() < 0.5 of - true -> - rebind(Label, Bindings); - false -> - Bindings - end; - false -> - bind(Label, Bindings) - end. - - -bind(Label, Bindings) -> - maps:put(Label, {gen_key(), gen_value()}, Bindings). - - -rebind(Label, Bindings) -> - {Key, _} = maps:get(Label, Bindings), - maps:put(Label, {Key, gen_value()}, Bindings). - - -gen_key() -> - Unique = couch_uuids:random(), - case rand:uniform() of - N when N < 0.2 -> - [Unique, true, rand:uniform()]; - N when N < 0.4 -> - {[{Unique, true}, {<<"foo">>, [<<"bar">>, null, 1, {[]}]}]}; - _ -> - Unique - end. - - -gen_value() -> - case rand:uniform() of - N when N < 0.2 -> - [false, rand:uniform(), {[]}]; - N when N < 0.4 -> - {[{<<"a">>, 1}, {<<"b">>, 2}]}; - N when N < 0.6 -> - rand:uniform(100); - N when N < 0.8 -> - rand:uniform(); - _ -> - 1 - end. - - -group(Items) -> - case length(Items) > 5 of - true -> - {Group, Rest} = lists:split(5, Items), - [lists:sort(Group) | group(Rest)]; - false when Items == [] -> - []; - false -> - [lists:sort(Items)] - end. - - -shuffle(Items) -> - Tagged = [{rand:uniform(), I} || I <- Items], - Sorted = lists:sort(Tagged), - [I || {_T, I} <- Sorted]. - - -subset(Count, Items) -> - Random = shuffle(Items), - Take = lists:sublist(Random, Count), - lists:sort(Take). - - -tname([]) -> - []; -tname([Transition | RestTransitions]) -> - [tname(Transition) | tname(RestTransitions)]; -tname({{N1, D1, R1}, {N2, D2, R2}, {DN, DD, DR}}) -> - io_lib:format("~b~b~b~b~b~b~b~b~b", [N1, D1, R1, N2, D2, R2, DN, DD, DR]). +multi_identical_keys_count_twice(Db) -> + DDoc = create_ddoc(multi_emit_same), + Doc = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Two rows that are the same + ?assertEqual(4, view_size(Db)). + + +multiple_design_docs(Db) -> + Cleanup = fun() -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + ok = couch_views:cleanup_indices(TxDb, DDocs) + end) + end, + + DDoc1 = create_ddoc(simple, <<"_design/bar1">>), + DDoc2 = create_ddoc(multi_emit_same, <<"_design/bar2">>), + + % Simple test as before + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), + {ok, _} = run_query(Db, DDoc1, ?MAP_FUN1), + ?assertEqual(2, view_size(Db)), + + % Adding a second ddoc increases the size + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, DDoc2, []), + {ok, _} = run_query(Db, DDoc2, ?MAP_FUN1), + ?assertEqual(6, view_size(Db)), + + % Removing the first ddoc decreases the size + DDoc1Del = DDoc1#doc{revs = {Pos1, [Rev1]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc1Del, []), + Cleanup(), + ?assertEqual(4, view_size(Db)), + + % Removing the second ddoc drops the size + DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), + Cleanup(), + ?assertEqual(0, view_size(Db)). + + +multiple_identical_design_docs(Db) -> + Cleanup = fun() -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + ok = couch_views:cleanup_indices(TxDb, DDocs) + end) + end, + DDoc1 = create_ddoc(simple, <<"_design/bar1">>), + DDoc2 = create_ddoc(simple, <<"_design/bar2">>), + + % Simple test as before + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), + {ok, _} = run_query(Db, DDoc1, ?MAP_FUN1), + ?assertEqual(2, view_size(Db)), + + % Adding a second ddoc with the same sig does not double the size + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, DDoc2, []), + {ok, _} = run_query(Db, DDoc2, ?MAP_FUN1), + ?assertEqual(2, view_size(Db)), + + % Removing the first ddoc does not decrease the size + DDoc1Del = DDoc1#doc{revs = {Pos1, [Rev1]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc1Del, []), + Cleanup(), + ?assertEqual(2, view_size(Db)), + + % Removing the second ddoc drops the size + DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), + Cleanup(), + ?assertEqual(0, view_size(Db)). + + +view_size(Db) -> + {ok, Info} = fabric2_db:get_db_info(Db), + {sizes, {Sizes}} = lists:keyfind(sizes, 1, Info), + {<<"views">>, ViewSize} = lists:keyfind(<<"views">>, 1, Sizes), + ViewSize. + + +create_ddoc() -> + create_ddoc(simple). + + +create_ddoc(Type) -> + create_ddoc(Type, <<"_design/bar">>). + + +create_ddoc(simple, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_same, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val, doc.val * 2); " + "emit(doc.val, doc.val); " + "if(doc.extra) {" + " emit(doc.val, doc.extra);" + "}" + "}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. \ No newline at end of file -- cgit v1.2.1 From ef6b60b760a79542df31df9c7ad64d737b860c92 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 18 Sep 2020 11:05:00 -0500 Subject: Upgrade legacy views --- src/couch_views/include/couch_views.hrl | 4 + src/couch_views/src/couch_views_fdb.erl | 180 ++++++++-- src/couch_views/src/couch_views_indexer.erl | 8 +- src/couch_views/test/couch_views_upgrade_test.erl | 400 ++++++++++++++++++++++ 4 files changed, 551 insertions(+), 41 deletions(-) create mode 100644 src/couch_views/test/couch_views_upgrade_test.erl diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl index 388219118..92b8f46fb 100644 --- a/src/couch_views/include/couch_views.hrl +++ b/src/couch_views/include/couch_views.hrl @@ -10,6 +10,9 @@ % License for the specific language governing permissions and limitations under % the License. +% Current implementation version +-define(CURRENT_VIEW_IMPL_VERSION, 1). + % Index info/data subspaces -define(VIEW_INFO, 0). -define(VIEW_DATA, 1). @@ -21,6 +24,7 @@ -define(VIEW_KV_SIZE, 2). -define(VIEW_BUILD_STATUS, 3). -define(VIEW_CREATION_VS, 4). +-define(VIEW_IMPL_VERSION, 5). % Data keys -define(VIEW_ID_RANGE, 0). diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index f238a8f18..28a60b872 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -13,6 +13,8 @@ -module(couch_views_fdb). -export([ + get_view_state/2, + new_interactive_index/3, new_creation_vs/3, get_creation_vs/2, @@ -40,52 +42,89 @@ -include_lib("fabric/include/fabric2.hrl"). -new_interactive_index(Db, Mrst, VS) -> - couch_views_fdb:new_creation_vs(Db, Mrst, VS), - couch_views_fdb:set_build_status(Db, Mrst, ?INDEX_BUILDING). +get_view_state(Db, #mrst{} = Mrst) -> + get_view_state(Db, Mrst#mrst.sig); + +get_view_state(Db, Sig) when is_binary(Sig) -> + #{ + tx := Tx + } = Db, + + VersionF = erlfdb:get(Tx, version_key(Db, Sig)), + ViewSeqF = erlfdb:get(Tx, seq_key(Db, Sig)), + ViewVSF = erlfdb:get(Tx, creation_vs_key(Db, Sig)), + BuildStatusF = erlfdb:get(Tx, build_status_key(Db, Sig)), + + Version = case erlfdb:wait(VersionF) of + not_found -> not_found; + VsnVal -> element(1, erlfdb_tuple:unpack(VsnVal)) + end, + + ViewSeq = case erlfdb:wait(ViewSeqF) of + not_found -> <<>>; + SeqVal -> SeqVal + end, + + ViewVS = case erlfdb:wait(ViewVSF) of + not_found -> not_found; + VSVal -> element(1, erlfdb_tuple:unpack(VSVal)) + end, + + State = #{ + version => Version, + view_seq => ViewSeq, + view_vs => ViewVS, + build_status => erlfdb:wait(BuildStatusF) + }, + + maybe_upgrade_view(Db, Sig, State). + + +new_interactive_index(Db, #mrst{} = Mrst, VS) -> + new_interactive_index(Db, Mrst#mrst.sig, VS); + +new_interactive_index(Db, Sig, VS) -> + set_version(Db, Sig), + new_creation_vs(Db, Sig, VS), + set_build_status(Db, Sig, ?INDEX_BUILDING). %Interactive View Creation Versionstamp %(, ?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig) = VS new_creation_vs(TxDb, #mrst{} = Mrst, VS) -> + new_creation_vs(TxDb, Mrst#mrst.sig, VS); + +new_creation_vs(TxDb, Sig, VS) -> #{ tx := Tx } = TxDb, - Key = creation_vs_key(TxDb, Mrst#mrst.sig), + Key = creation_vs_key(TxDb, Sig), Value = erlfdb_tuple:pack_vs({VS}), ok = erlfdb:set_versionstamped_value(Tx, Key, Value). -get_creation_vs(TxDb, #mrst{} = Mrst) -> - get_creation_vs(TxDb, Mrst#mrst.sig); - -get_creation_vs(TxDb, Sig) -> +get_creation_vs(TxDb, MrstOrSig) -> #{ - tx := Tx - } = TxDb, - Key = creation_vs_key(TxDb, Sig), - case erlfdb:wait(erlfdb:get(Tx, Key)) of - not_found -> - not_found; - EK -> - {VS} = erlfdb_tuple:unpack(EK), - VS - end. + view_vs := ViewVS + } = get_view_state(TxDb, MrstOrSig), + ViewVS. %Interactive View Build Status %(, ?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig) = INDEX_BUILDING | INDEX_READY -get_build_status(TxDb, #mrst{sig = Sig}) -> +get_build_status(TxDb, MrstOrSig) -> #{ - tx := Tx - } = TxDb, - Key = build_status_key(TxDb, Sig), - erlfdb:wait(erlfdb:get(Tx, Key)). + build_status := BuildStatus + } = get_view_state(TxDb, MrstOrSig), + BuildStatus. -set_build_status(TxDb, #mrst{sig = Sig}, State) -> +set_build_status(TxDb, #mrst{} = Mrst, State) -> + set_build_status(TxDb, Mrst#mrst.sig, State); + +set_build_status(TxDb, Sig, State) -> #{ tx := Tx } = TxDb, @@ -98,24 +137,18 @@ set_build_status(TxDb, #mrst{sig = Sig}, State) -> % (, ?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ) = Sequence -get_update_seq(TxDb, #mrst{sig = Sig}) -> +get_update_seq(TxDb, MrstOrSig) -> #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - - case erlfdb:wait(erlfdb:get(Tx, seq_key(DbPrefix, Sig))) of - not_found -> <<>>; - UpdateSeq -> UpdateSeq - end. + view_seq := ViewSeq + } = get_view_state(TxDb, MrstOrSig), + ViewSeq. set_update_seq(TxDb, Sig, Seq) -> #{ - tx := Tx, - db_prefix := DbPrefix + tx := Tx } = TxDb, - ok = erlfdb:set(Tx, seq_key(DbPrefix, Sig), Seq). + ok = erlfdb:set(Tx, seq_key(TxDb, Sig), Seq). list_signatures(Db) -> @@ -139,7 +172,10 @@ clear_index(Db, Signature) -> % Get view size to remove from global counter SizeTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Signature}, SizeKey = erlfdb_tuple:pack(SizeTuple, DbPrefix), - ViewSize = ?bin2uint(erlfdb:wait(erlfdb:get(Tx, SizeKey))), + ViewSize = case erlfdb:wait(erlfdb:get(Tx, SizeKey)) of + not_found -> 0; + SizeVal -> ?bin2uint(SizeVal) + end, % Clear index info keys Keys = [ @@ -207,7 +243,75 @@ update_kv_size(TxDb, Sig, OldSize, NewSize) -> erlfdb:add(Tx, DbKey, NewSize - OldSize). -seq_key(DbPrefix, Sig) -> +maybe_upgrade_view(_Db, _Sig, #{version := ?CURRENT_VIEW_IMPL_VERSION} = St) -> + St; +maybe_upgrade_view(Db, Sig, #{version := not_found, view_seq := <<>>} = St) -> + % If we haven't started building the view yet + % then we don't change view_vs and build_status + % as they're still correct. + set_version(Db, Sig), + St#{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>> + }; +maybe_upgrade_view(Db, Sig, #{version := not_found} = St) -> + clear_index(Db, Sig), + set_version(Db, Sig), + {ViewVS, BuildStatus} = reset_interactive_index(Db, Sig, St), + #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => ViewVS, + build_status => BuildStatus + }. + + +set_version(Db, Sig) -> + #{ + tx := Tx + } = Db, + Key = version_key(Db, Sig), + Val = erlfdb_tuple:pack({?CURRENT_VIEW_IMPL_VERSION}), + erlfdb:set(Tx, Key, Val). + + +reset_interactive_index(_Db, _Sig, #{view_vs := not_found}) -> + % Not an interactive index + {not_found, not_found}; +reset_interactive_index(Db, Sig, _St) -> + % We have to reset the creation versionstamp + % to the current update seq of the database + % or else we'll not have indexed any documents + % inserted since the creation of the interactive + % index. + #{ + tx := Tx + } = Db, + + DbSeq = fabric2_db:get_update_seq(Db), + VS = fabric2_fdb:seq_to_vs(DbSeq), + Key = creation_vs_key(Db, Sig), + Val = erlfdb_tuple:pack({VS}), + ok = erlfdb:set(Tx, Key, Val), + + set_build_status(Db, Sig, ?INDEX_BUILDING), + + {VS, ?INDEX_BUILDING}. + + + +version_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_IMPL_VERSION, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). + + +seq_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}, erlfdb_tuple:pack(Key, DbPrefix). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index da2393999..2735f66b7 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -200,8 +200,8 @@ do_update(Db, Mrst0, State0) -> tx := Tx } = TxDb, - Mrst1 = couch_views_trees:open(TxDb, Mrst0), State1 = get_update_start_state(TxDb, Mrst0, State0), + Mrst1 = couch_views_trees:open(TxDb, Mrst0), {ok, State2} = fold_changes(State1), @@ -259,8 +259,10 @@ maybe_set_build_status(TxDb, Mrst1, _ViewVS, State) -> % In the first iteration of update we need % to populate our db and view sequences get_update_start_state(TxDb, Mrst, #{db_seq := undefined} = State) -> - ViewVS = couch_views_fdb:get_creation_vs(TxDb, Mrst), - ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + #{ + view_vs := ViewVS, + view_seq := ViewSeq + } = couch_views_fdb:get_view_state(TxDb, Mrst), State#{ tx_db := TxDb, diff --git a/src/couch_views/test/couch_views_upgrade_test.erl b/src/couch_views/test/couch_views_upgrade_test.erl new file mode 100644 index 000000000..556a76297 --- /dev/null +++ b/src/couch_views/test/couch_views_upgrade_test.erl @@ -0,0 +1,400 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_upgrade_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). + + +upgrade_test_() -> + { + "Test view upgrades", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(empty_state), + ?TDEF_FE(indexed_state), + ?TDEF_FE(upgrade_non_interactive), + ?TDEF_FE(upgrade_unbuilt_interactive), + ?TDEF_FE(upgrade_partially_built_interactive), + ?TDEF_FE(upgrade_built_interactive) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + config:delete("couch_views", "change_limit"), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_state(Db) -> + DDoc = create_ddoc(), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + State = fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + Expect = #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => not_found, + build_status => not_found + }, + ?assertEqual(Expect, State), + assert_fdb_state(Db, Mrst, Expect). + + +indexed_state(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => fabric2_db:get_update_seq(Db), + view_vs => not_found, + build_status => not_found + }). + + +upgrade_non_interactive(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1], []), + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{view_seq => DbSeq}), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => not_found, + build_status => not_found + }). + + +upgrade_unbuilt_interactive(Db) -> + DDoc = create_ddoc(), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1], []), + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{ + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Trigger an upgrade + fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Build the view + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }). + + +upgrade_partially_built_interactive(Db) -> + DDoc = create_ddoc(), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + + MidSeq = fabric2_db:get_update_seq(Db), + + Doc1 = doc(0), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{ + view_seq => MidSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Trigger an upgrade + fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Build the view + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }). + + +upgrade_built_interactive(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{ + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }), + + % Trigger an upgrade + fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Build the view + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }). + + +init_fdb_state(Db, #doc{} = DDoc, Values) -> + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + init_fdb_state(Db, Mrst, Values); +init_fdb_state(Db, #mrst{sig = Sig}, Values) -> + init_fdb_state(Db, Sig, Values); +init_fdb_state(Db, Sig, Values) -> + VersionRow = case maps:get(version, Values, undefined) of + undefined -> []; + Version -> [{pack(Db, key(version, Sig)), pack({Version})}] + end, + + SeqRow = case maps:get(view_seq, Values, undefined) of + undefined -> []; + Seq -> [{pack(Db, key(seq, Sig)), Seq}] + end, + + VSRow = case maps:get(view_vs, Values, undefined) of + undefined -> []; + VS -> [{pack(Db, key(vs, Sig)), pack({VS})}] + end, + + BSRow = case maps:get(build_status, Values, undefined) of + undefined -> []; + BS -> [{pack(Db, key(bs, Sig)), BS}] + end, + + Rows = VersionRow ++ SeqRow ++ VSRow ++ BSRow, + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx + } = TxDb, + lists:foreach(fun({K, V}) -> + erlfdb:set(Tx, K, V) + end, Rows) + end). + + +assert_fdb_state(Db, #doc{} = DDoc, Expect) -> + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + assert_fdb_state(Db, Mrst, Expect); +assert_fdb_state(Db, #mrst{sig = Sig}, Expect) -> + assert_fdb_state(Db, Sig, Expect); +assert_fdb_state(Db, Sig, Expect) -> + #{ + version := Version, + view_seq := ViewSeq, + view_vs := ViewVS, + build_status := BuildStatus + } = Expect, + + VersionRow = case Version of + not_found -> []; + _ -> [{pack(Db, key(version, Sig)), pack({Version})}] + end, + + SeqRow = case ViewSeq of + <<>> -> []; + _ -> [{pack(Db, key(seq, Sig)), ViewSeq}] + end, + + VSRow = case ViewVS of + not_found -> []; + _ -> [{pack(Db, key(vs, Sig)), pack({ViewVS})}] + end, + + BSRow = case BuildStatus of + not_found -> []; + _ -> [{pack(Db, key(bs, Sig)), BuildStatus}] + end, + + ExpectRows = lists:sort(VersionRow ++ SeqRow ++ VSRow ++ BSRow), + + RawExistingRows = fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + RangePrefix = erlfdb_tuple:pack({?DB_VIEWS, ?VIEW_INFO}, DbPrefix), + erlfdb:wait(erlfdb:get_range_startswith(Tx, RangePrefix)) + end), + + % Ignore the KV size key in the view info rows + KVSizeKey = pack(Db, key(kv_size, Sig)), + ExistingRows = lists:keydelete(KVSizeKey, 1, RawExistingRows), + + ?assertEqual(ExpectRows, ExistingRows). + + +key(version, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_IMPL_VERSION, Sig}; +key(seq, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}; +key(kv_size, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig}; +key(vs, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig}; +key(bs, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig}. + + +pack(Db, Key) -> + #{ + db_prefix := DbPrefix + } = Db, + erlfdb_tuple:pack(Key, DbPrefix). + + +pack(Value) -> + erlfdb_tuple:pack(Value). + + +row(Id, Key, Value) -> + {row, [ + {id, Id}, + {key, Key}, + {value, Value} + ]}. + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). \ No newline at end of file -- cgit v1.2.1 From 69f0ba1cc0af0c6008f63fb7342a60efa794634b Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 29 Jul 2020 10:34:48 -0500 Subject: Use ebtree for reduce functions --- src/couch_views/src/couch_views.erl | 6 - src/couch_views/src/couch_views_fdb.erl | 1 - src/couch_views/src/couch_views_reader.erl | 159 ++++++++++++++++++++++- src/couch_views/src/couch_views_trees.erl | 199 +++++++++++++++++++++++++---- 4 files changed, 327 insertions(+), 38 deletions(-) diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl index da8a142f9..2d916314f 100644 --- a/src/couch_views/src/couch_views.erl +++ b/src/couch_views/src/couch_views.erl @@ -161,12 +161,6 @@ maybe_update_view(TxDb, Mrst, false, _Args) -> end. -is_reduce_view(#mrargs{view_type = ViewType}) -> - ViewType =:= red; -is_reduce_view({Reduce, _, _}) -> - Reduce =:= red. - - to_mrargs(#mrargs{} = Args) -> Args; diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 28a60b872..b0fb82e85 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -299,7 +299,6 @@ reset_interactive_index(Db, Sig, _St) -> {VS, ?INDEX_BUILDING}. - version_key(Db, Sig) -> #{ db_prefix := DbPrefix diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl index a785c7b35..3c5862749 100644 --- a/src/couch_views/src/couch_views_reader.erl +++ b/src/couch_views/src/couch_views_reader.erl @@ -23,7 +23,15 @@ -include_lib("fabric/include/fabric2.hrl"). -read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> +read(Db, Mrst, ViewName, UserCallback, UserAcc, Args) -> + ReadFun = case Args of + #mrargs{view_type = map} -> fun read_map_view/6; + #mrargs{view_type = red} -> fun read_red_view/6 + end, + ReadFun(Db, Mrst, ViewName, UserCallback, UserAcc, Args). + + +read_map_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> try fabric2_fdb:transactional(Db, fun(TxDb) -> #mrst{ @@ -68,6 +76,79 @@ read(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> end. +read_red_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> + #mrst{ + language = Lang, + views = Views + } = Mrst0, + {Idx, Lang, View0} = get_red_view(Lang, Args, ViewName, Views), + Mrst1 = Mrst0#mrst{views = [View0]}, + ReadOpts = [{read_only, Idx}], + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + #mrst{ + language = Lang, + views = [View1] + } = Mrst = couch_views_trees:open(TxDb, Mrst1, ReadOpts), + + #mrargs{ + extra = Extra + } = Args, + + Fun = fun handle_red_row/3, + + Meta = get_red_meta(TxDb, Mrst, View1, Args), + UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)), + + Finalizer = case couch_util:get_value(finalizer, Extra) of + undefined -> + {_, FunSrc} = lists:nth(Idx, View1#mrview.reduce_funs), + FunSrc; + CustomFun-> + CustomFun + end, + + Acc0 = #{ + db => TxDb, + skip => Args#mrargs.skip, + limit => Args#mrargs.limit, + mrargs => undefined, + finalizer => Finalizer, + red_idx => Idx, + language => Lang, + callback => UserCallback, + acc => UserAcc1 + }, + + Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) -> + Opts = mrargs_to_fdb_options(KeyArgs), + KeyAcc1 = KeyAcc0#{ + mrargs := KeyArgs + }, + couch_views_trees:fold_red_idx( + TxDb, + View1, + Idx, + Opts, + Fun, + KeyAcc1 + ) + end, Acc0, expand_keys_args(Args)), + + #{ + acc := UserAcc2 + } = Acc1, + {ok, maybe_stop(UserCallback(complete, UserAcc2))} + end) + catch + throw:{complete, Out} -> + {_, Final} = UserCallback(complete, Out), + {ok, Final}; + throw:{done, Out} -> + {ok, Out} + end. + + get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) -> TotalRows = couch_views_trees:get_row_count(TxDb, View), ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), @@ -78,6 +159,14 @@ get_map_meta(TxDb, _Mrst, View, #mrargs{}) -> {meta, [{total, TotalRows}, {offset, null}]}. +get_red_meta(TxDb, Mrst, _View, #mrargs{update_seq = true}) -> + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + {meta, [{update_seq, ViewSeq}]}; + +get_red_meta(_TxDb, _Mrst, _View, #mrargs{}) -> + {meta, []}. + + handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 -> Acc#{skip := Skip - 1}; @@ -115,6 +204,38 @@ handle_map_row(DocId, Key, Value, Acc) -> Acc#{limit := Limit - 1, acc := UserAcc1}. +handle_red_row(_Key, _Red, #{skip := Skip} = Acc) when Skip > 0 -> + Acc#{skip := Skip - 1}; + +handle_red_row(_Key, _Value, #{limit := 0, acc := UserAcc}) -> + throw({complete, UserAcc}); + +handle_red_row(Key0, Value0, Acc) -> + #{ + limit := Limit, + finalizer := Finalizer, + callback := UserCallback, + acc := UserAcc0 + } = Acc, + + Key1 = case Key0 of + undefined -> null; + _ -> Key0 + end, + Value1 = maybe_finalize(Finalizer, Value0), + Row = [{key, Key1}, {value, Value1}], + + UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)), + Acc#{limit := Limit - 1, acc := UserAcc1}. + + +maybe_finalize(null, Red) -> + Red; +maybe_finalize(Finalizer, Red) -> + {ok, Finalized} = couch_query_servers:finalize(Finalizer, Red), + Finalized. + + get_map_view(Lang, Args, ViewName, Views) -> case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of {map, View, _Args} -> View; @@ -122,6 +243,13 @@ get_map_view(Lang, Args, ViewName, Views) -> end. +get_red_view(Lang, Args, ViewName, Views) -> + case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of + {red, {Idx, Lang, View}, _} -> {Idx, Lang, View}; + _ -> throw({not_found, missing_named_view}) + end. + + expand_keys_args(#mrargs{keys = undefined} = Args) -> [Args]; @@ -136,12 +264,14 @@ expand_keys_args(#mrargs{keys = Keys} = Args) -> mrargs_to_fdb_options(Args) -> #mrargs{ + view_type = ViewType, start_key = StartKey, start_key_docid = StartKeyDocId, end_key = EndKey, end_key_docid = EndKeyDocId0, direction = Direction, - inclusive_end = InclusiveEnd + inclusive_end = InclusiveEnd, + group_level = GroupLevel } = Args, StartKeyOpts = if StartKey == undefined -> []; true -> @@ -160,10 +290,33 @@ mrargs_to_fdb_options(Args) -> [{end_key, {EndKey, EndKeyDocId}}] end, + GroupFunOpt = make_group_key_fun(ViewType, GroupLevel), + [ {dir, Direction}, {inclusive_end, InclusiveEnd} - ] ++ StartKeyOpts ++ EndKeyOpts. + ] ++ StartKeyOpts ++ EndKeyOpts ++ GroupFunOpt. + + +make_group_key_fun(map, _) -> + []; + +make_group_key_fun(red, exact) -> + [ + {group_key_fun, fun({Key, _DocId}) -> Key end} + ]; + +make_group_key_fun(red, 0) -> + [ + {group_key_fun, group_all} + ]; + +make_group_key_fun(red, N) when is_integer(N), N > 0 -> + GKFun = fun + ({Key, _DocId}) when is_list(Key) -> lists:sublist(Key, N); + ({Key, _DocId}) -> Key + end, + [{group_key_fun, GKFun}]. maybe_stop({ok, Acc}) -> Acc; diff --git a/src/couch_views/src/couch_views_trees.erl b/src/couch_views/src/couch_views_trees.erl index 7ce350506..b45750be9 100644 --- a/src/couch_views/src/couch_views_trees.erl +++ b/src/couch_views/src/couch_views_trees.erl @@ -14,11 +14,13 @@ -export([ open/2, + open/3, get_row_count/2, get_kv_size/2, fold_map_idx/5, + fold_red_idx/6, update_views/3 ]). @@ -35,6 +37,10 @@ open(TxDb, Mrst) -> + open(TxDb, Mrst, []). + + +open(TxDb, Mrst, Options) -> #mrst{ sig = Sig, language = Lang, @@ -42,7 +48,7 @@ open(TxDb, Mrst) -> } = Mrst, Mrst#mrst{ id_btree = open_id_tree(TxDb, Sig), - views = [open_view_tree(TxDb, Sig, Lang, V) || V <- Views] + views = [open_view_tree(TxDb, Sig, Lang, V, Options) || V <- Views] }. @@ -50,7 +56,7 @@ get_row_count(TxDb, View) -> #{ tx := Tx } = TxDb, - {Count, _} = ebtree:full_reduce(Tx, View#mrview.btree), + {Count, _, _} = ebtree:full_reduce(Tx, View#mrview.btree), Count. @@ -58,7 +64,7 @@ get_kv_size(TxDb, View) -> #{ tx := Tx } = TxDb, - {_, TotalSize} = ebtree:full_reduce(Tx, View#mrview.btree), + {_, TotalSize, _} = ebtree:full_reduce(Tx, View#mrview.btree), TotalSize. @@ -122,6 +128,74 @@ fold_map_idx(TxDb, View, Options, Callback, Acc0) -> end. +fold_red_idx(TxDb, View, Idx, Options, Callback, Acc0) -> + #{ + tx := Tx + } = TxDb, + #mrview{ + btree = Btree + } = View, + + {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun} = to_red_opts(Options), + + Wrapper = fun({GroupKey, Reduction}, WAcc) -> + {_RowCount, _RowSize, UserReds} = Reduction, + RedValue = lists:nth(Idx, UserReds), + Callback(GroupKey, RedValue, WAcc) + end, + + case {GroupKeyFun, Dir} of + {group_all, fwd} -> + EBtreeOpts = [ + {dir, fwd}, + {inclusive_end, InclusiveEnd} + ], + Reduction = ebtree:reduce(Tx, Btree, StartKey, EndKey, EBtreeOpts), + Wrapper({null, Reduction}, Acc0); + {F, fwd} when is_function(F) -> + EBtreeOpts = [ + {dir, fwd}, + {inclusive_end, InclusiveEnd} + ], + ebtree:group_reduce( + Tx, + Btree, + StartKey, + EndKey, + GroupKeyFun, + Wrapper, + Acc0, + EBtreeOpts + ); + {group_all, rev} -> + % Start/End keys swapped on purpose because ebtree. Also + % inclusive_start for same reason. + EBtreeOpts = [ + {dir, rev}, + {inclusive_start, InclusiveEnd} + ], + Reduction = ebtree:reduce(Tx, Btree, EndKey, StartKey, EBtreeOpts), + Wrapper({null, Reduction}, Acc0); + {F, rev} when is_function(F) -> + % Start/End keys swapped on purpose because ebtree. Also + % inclusive_start for same reason. + EBtreeOpts = [ + {dir, rev}, + {inclusive_start, InclusiveEnd} + ], + ebtree:group_reduce( + Tx, + Btree, + EndKey, + StartKey, + GroupKeyFun, + Wrapper, + Acc0, + EBtreeOpts + ) + end. + + update_views(TxDb, Mrst, Docs) -> #{ tx := Tx @@ -129,7 +203,7 @@ update_views(TxDb, Mrst, Docs) -> % Get initial KV size OldKVSize = lists:foldl(fun(View, SizeAcc) -> - {_, Size} = ebtree:full_reduce(Tx, View#mrview.btree), + {_, Size, _} = ebtree:full_reduce(Tx, View#mrview.btree), SizeAcc + Size end, 0, Mrst#mrst.views), @@ -156,7 +230,7 @@ update_views(TxDb, Mrst, Docs) -> % Get new KV size after update NewKVSize = lists:foldl(fun(View, SizeAcc) -> - {_, Size} = ebtree:full_reduce(Tx, View#mrview.btree), + {_, Size, _} = ebtree:full_reduce(Tx, View#mrview.btree), SizeAcc + Size end, 0, Mrst#mrst.views), @@ -176,7 +250,7 @@ open_id_tree(TxDb, Sig) -> ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts). -open_view_tree(TxDb, Sig, Lang, View) -> +open_view_tree(TxDb, Sig, Lang, View, Options) -> #{ tx := Tx, db_prefix := DbPrefix @@ -185,12 +259,21 @@ open_view_tree(TxDb, Sig, Lang, View) -> id_num = ViewId } = View, Prefix = view_tree_prefix(DbPrefix, Sig, ViewId), - TreeOpts = [ + BaseOpts = [ {collate_fun, couch_views_util:collate_fun(View)}, - {reduce_fun, make_reduce_fun(Lang, View)}, - {persist_fun, fun couch_views_fdb:persist_chunks/3}, - {cache_fun, create_cache_fun({view, ViewId})} + {persist_fun, fun couch_views_fdb:persist_chunks/3} ], + ExtraOpts = case lists:keyfind(read_only, 1, Options) of + {read_only, Idx} -> + RedFun = make_read_only_reduce_fun(Lang, View, Idx), + [{reduce_fun, RedFun}]; + false -> + [ + {reduce_fun, make_reduce_fun(Lang, View)}, + {cache_fun, create_cache_fun({view, ViewId})} + ] + end, + TreeOpts = BaseOpts ++ ExtraOpts, View#mrview{ btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts) }. @@ -210,27 +293,60 @@ min_order(V) -> V + 1. -make_reduce_fun(_Lang, #mrview{}) -> +make_read_only_reduce_fun(Lang, View, NthRed) -> + RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs], + if RedFuns /= [] -> ok; true -> + io:format(standard_error, "~p~n", [process_info(self(), current_stacktrace)]) + end, + LPad = lists:duplicate(NthRed - 1, []), + RPad = lists:duplicate(length(RedFuns) - NthRed, []), + FunSrc = lists:nth(NthRed, RedFuns), fun - (KVs, _ReReduce = false) -> + (KVs0, _ReReduce = false) -> + KVs1 = detuple_kvs(expand_dupes(KVs0)), + {ok, Result} = couch_query_servers:reduce(Lang, [FunSrc], KVs1), + {0, 0, LPad ++ Result ++ RPad}; + (Reductions, _ReReduce = true) -> + ExtractFun = fun(Reds) -> + {_Count, _Size, UReds} = Reds, + [lists:nth(NthRed, UReds)] + end, + UReds = lists:map(ExtractFun, Reductions), + {ok, Result} = case UReds of + [RedVal] -> + {ok, RedVal}; + _ -> + couch_query_servers:rereduce(Lang, [FunSrc], UReds) + end, + {0, 0, LPad ++ Result ++ RPad} + end. + + +make_reduce_fun(Lang, #mrview{} = View) -> + RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs], + fun + (KVs0, _ReReduce = false) -> + KVs1 = expand_dupes(KVs0), TotalSize = lists:foldl(fun({{K, _DocId}, V}, Acc) -> KSize = couch_ejson_size:encoded_size(K), - Acc + case V of - {dups, Dups} -> - lists:foldl(fun(D, DAcc) -> - VSize = couch_ejson_size:encoded_size(D), - DAcc + KSize + VSize - end, 0, Dups); - _ -> - VSize = couch_ejson_size:encoded_size(V), - KSize + VSize - end - end, 0, KVs), - {length(KVs), TotalSize}; - (KRs, _ReReduce = true) -> - lists:foldl(fun({Count, Size}, {CountAcc, SizeAcc}) -> - {Count + CountAcc, Size + SizeAcc} - end, {0, 0}, KRs) + VSize = couch_ejson_size:encoded_size(V), + KSize + VSize + Acc + end, 0, KVs1), + KVs2 = detuple_kvs(KVs1), + {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs2), + {length(KVs1), TotalSize, UserReds}; + (Reductions, _ReReduce = true) -> + FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) -> + NewCAcc = Count + CAcc, + NewSAcc = Size + SAcc, + NewURedAcc = [UserReds | URedAcc], + {NewCAcc, NewSAcc, NewURedAcc} + end, + InitAcc = {0, 0, []}, + FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions), + {FinalCount, FinalSize, UReds} = FinalAcc, + {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds), + {FinalCount, FinalSize, Result} end. @@ -284,6 +400,17 @@ to_map_opts(Options) -> {Dir, StartKey, EndKey, InclusiveEnd}. +to_red_opts(Options) -> + {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options), + + GroupKeyFun = case lists:keyfind(group_key_fun, 1, Options) of + {group_key_fun, GKF} -> GKF; + false -> fun({_Key, _DocId}) -> global_group end + end, + + {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun}. + + gather_update_info(Tx, Mrst, Docs) -> % A special token used to indicate that the row should be deleted DeleteRef = erlang:make_ref(), @@ -420,6 +547,22 @@ combine_vals(V1, V2) -> {dups, [V1, V2]}. +expand_dupes([]) -> + []; +expand_dupes([{K, {dups, Dups}} | Rest]) -> + Expanded = [{K, D} || D <- Dups], + Expanded ++ expand_dupes(Rest); +expand_dupes([{K, V} | Rest]) -> + [{K, V} | expand_dupes(Rest)]. + + +detuple_kvs([]) -> + []; +detuple_kvs([KV | Rest]) -> + {{Key, Id}, Value} = KV, + [[[Key, Id], Value] | detuple_kvs(Rest)]. + + id_tree_prefix(DbPrefix, Sig) -> Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE}, erlfdb_tuple:pack(Key, DbPrefix). -- cgit v1.2.1 From 2f1a4562e98a423d3d93dab06682bd1be9b39435 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 5 Aug 2020 12:47:37 -0500 Subject: Add test suite for reduce views --- src/couch_views/test/couch_views_red_test.erl | 745 ++++++++++++++++++++++++++ 1 file changed, 745 insertions(+) create mode 100644 src/couch_views/test/couch_views_red_test.erl diff --git a/src/couch_views/test/couch_views_red_test.erl b/src/couch_views/test/couch_views_red_test.erl new file mode 100644 index 000000000..707611f6e --- /dev/null +++ b/src/couch_views/test/couch_views_red_test.erl @@ -0,0 +1,745 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_red_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). +-include("couch_views.hrl"). + + +-define(NUM_DOCS, 2000). + + +reduce_views_shraed_db_test_() -> + { + "Reduce views", + { + setup, + fun setup_db_with_docs/0, + fun teardown_db/1, + with([ + ?TDEF(should_reduce), + ?TDEF(should_reduce_rev), + ?TDEF(should_reduce_start_key), + ?TDEF(should_reduce_start_key_rev), + ?TDEF(should_reduce_end_key), + ?TDEF(should_reduce_end_key_rev), + ?TDEF(should_reduce_inclusive_end_false), + ?TDEF(should_reduce_inclusive_end_false_rev), + ?TDEF(should_reduce_start_and_end_key), + ?TDEF(should_reduce_start_and_end_key_rev), + ?TDEF(should_reduce_empty_range), + ?TDEF(should_reduce_empty_range_rev), + ?TDEF(should_reduce_grouped), + ?TDEF(should_reduce_grouped_rev), + ?TDEF(should_reduce_grouped_start_key), + ?TDEF(should_reduce_grouped_start_key_rev), + ?TDEF(should_reduce_grouped_end_key), + ?TDEF(should_reduce_grouped_end_key_rev), + ?TDEF(should_reduce_grouped_inclusive_end_false), + ?TDEF(should_reduce_grouped_inclusive_end_false_rev), + ?TDEF(should_reduce_grouped_start_and_end_key), + ?TDEF(should_reduce_grouped_start_and_end_key_rev), + ?TDEF(should_reduce_grouped_empty_range), + ?TDEF(should_reduce_grouped_empty_range_rev), + ?TDEF(should_reduce_array_keys), + ?TDEF(should_reduce_grouped_array_keys), + ?TDEF(should_reduce_group_1_array_keys), + ?TDEF(should_reduce_group_1_array_keys_start_key), + ?TDEF(should_reduce_group_1_array_keys_start_key_rev), + ?TDEF(should_reduce_group_1_array_keys_end_key), + ?TDEF(should_reduce_group_1_array_keys_end_key_rev), + ?TDEF(should_reduce_group_1_array_keys_inclusive_end_false), + ?TDEF(should_reduce_group_1_array_keys_inclusive_end_false_rev), + ?TDEF(should_reduce_group_1_array_keys_start_and_end_key), + ?TDEF(should_reduce_group_1_array_keys_start_and_end_key_rev), + ?TDEF(should_reduce_group_1_array_keys_sub_array_select), + ?TDEF(should_reduce_group_1_array_keys_sub_array_select_rev), + ?TDEF(should_reduce_group_1_array_keys_sub_array_inclusive_end), + ?TDEF(should_reduce_group_1_array_keys_empty_range), + ?TDEF(should_reduce_group_1_array_keys_empty_range_rev) + ]) + } + }. + + +reduce_views_collation_test_() -> + { + "Reduce collation test", + { + setup, + fun setup_db/0, + fun teardown_db/1, + with([ + ?TDEF(should_collate_group_keys) + ]) + } + }. + + +setup_db() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +setup_db_with_docs() -> + {Db, Ctx} = setup_db(), + fabric2_db:update_docs(Db, [create_ddoc()]), + make_docs(Db, ?NUM_DOCS), + run_query(Db, <<"baz">>, #{limit => 0}), + {Db, Ctx}. + + +teardown_db({Db, Ctx}) -> + fabric2_db:delete(fabric2_db:name(Db), [{user_ctx, ?ADMIN_USER}]), + test_util:stop_couch(Ctx). + + +should_reduce({Db, _}) -> + Result = run_query(Db, <<"baz_count">>, #{}), + Expect = {ok, [row(null, ?NUM_DOCS)]}, + ?assertEqual(Expect, Result). + + +should_reduce_rev({Db, _}) -> + Args = #{ + direction => rev + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_key({Db, _}) -> + Args = #{ + start_key => 4 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS - 3)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_key_rev({Db, _}) -> + Args = #{ + direction => rev, + start_key => 4 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 4)]}, + ?assertEqual(Expect, Result). + + +should_reduce_end_key({Db, _}) -> + Args = #{ + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 6)]}, + ?assertEqual(Expect, Result). + + +should_reduce_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS - 5)]}, + ?assertEqual(Expect, Result). + + +should_reduce_inclusive_end_false({Db, _}) -> + Args = #{ + end_key => 6, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 5)]}, + ?assertEqual(Expect, Result). + + +should_reduce_inclusive_end_false_rev({Db, _}) -> + Args = #{ + direction => rev, + end_key => 6, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS - 6)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_and_end_key({Db, _}) -> + Args = #{ + start_key => 3, + end_key => 5 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 3)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_and_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + start_key => 5, + end_key => 3 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 3)]}, + ?assertEqual(Expect, Result). + + +should_reduce_empty_range({Db, _}) -> + Args = #{ + start_key => 100000, + end_key => 100001 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 0)]}, + ?assertEqual(Expect, Result). + + +should_reduce_empty_range_rev({Db, _}) -> + Args = #{ + direction => rev, + start_key => 100001, + end_key => 100000 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 0)]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped({Db, _}) -> + Args = #{ + group_level => exact + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(1, ?NUM_DOCS) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(?NUM_DOCS, 1, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_key({Db, _}) -> + Args = #{ + group_level => exact, + start_key => 3 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(3, ?NUM_DOCS) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + start_key => 3 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(3, 1), + row(2, 1), + row(1, 1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_end_key({Db, _}) -> + Args = #{ + group_level => exact, + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(1, 6) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(?NUM_DOCS, 6, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_inclusive_end_false({Db, _}) -> + Args = #{ + group_level => exact, + end_key => 4, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(1, 3) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_inclusive_end_false_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + end_key => 4, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(?NUM_DOCS, 5, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_and_end_key({Db, _}) -> + Args = #{ + group_level => exact, + start_key => 2, + end_key => 4 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(2, 4) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_and_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + start_key => 4, + end_key => 2 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(4, 2, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_empty_range({Db, _}) -> + Args = #{ + group_level => exact, + start_key => 100000, + end_key => 100001 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_empty_range_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + start_key => 100001, + end_key => 100000 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_reduce_array_keys({Db, _}) -> + Result = run_query(Db, <<"boom">>, #{}), + Expect = {ok, [row(null, 1.5 * ?NUM_DOCS)]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_array_keys({Db, _}) -> + Args = #{ + group_level => exact + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, lists:sort([ + row([I rem 3, I], 1.5) || I <- lists:seq(1, ?NUM_DOCS) + ])}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys({Db, _}) -> + Args = #{ + group_level => 1 + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], rem_count(0, ?NUM_DOCS) * 1.5), + row([1], rem_count(1, ?NUM_DOCS) * 1.5), + row([2], rem_count(2, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_key({Db, _}) -> + Args = #{ + group_level => 1, + start_key => [1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5), + row([2], rem_count(2, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + start_key => [1, ?NUM_DOCS + 1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5), + row([0], rem_count(0, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_end_key({Db, _}) -> + Args = #{ + group_level => 1, + end_key => [1, ?NUM_DOCS + 1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], rem_count(0, ?NUM_DOCS) * 1.5), + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + end_key => [1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([2], rem_count(2, ?NUM_DOCS) * 1.5), + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_inclusive_end_false({Db, _}) -> + Args = #{ + group_level => 1, + end_key => [1], + inclusive_end => false + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], rem_count(0, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_inclusive_end_false_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + end_key => [1, ?NUM_DOCS + 1], + inclusive_end => false + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([2], rem_count(2, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_and_end_key({Db, _}) -> + Args = #{ + group_level => 1, + start_key => [1], + end_key => [1, ?NUM_DOCS + 1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_and_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + start_key => [1, ?NUM_DOCS + 1], + end_key => [1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_sub_array_select({Db, _}) -> + % Test that keys are applied below the key grouping + Args = #{ + group_level => 1, + start_key => [0, ?NUM_DOCS - 6], + end_key => [1, 4] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], 3.0), + row([1], 3.0) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_sub_array_select_rev({Db, _}) -> + % Test that keys are applied below the key grouping + Args = #{ + direction => rev, + group_level => 1, + start_key => [1, 4], + end_key => [0, ?NUM_DOCS - 6] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], 3.0), + row([0], 3.0) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_sub_array_inclusive_end({Db, _}) -> + % Test that keys are applied below the key grouping + Args = #{ + group_level => 1, + start_key => [0, ?NUM_DOCS - 6], + end_key => [1, 4], + inclusive_end => false + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], 3.0), + row([1], 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_empty_range({Db, _}) -> + Args = #{ + group_level => 1, + start_key => [100], + end_key => [101] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_empty_range_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + start_key => [101], + end_key => [100] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_collate_group_keys({Db, _}) -> + DDoc = couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"group">>, {[ + {<<"map">>, <<"function(doc) {emit([doc.val], 1);}">>}, + {<<"reduce">>, <<"_count">>} + ]}} + ]}} + ]}), + + % val is "föö" without combining characters + Doc1 = couch_doc:from_json_obj({[ + {<<"_id">>, <<"a">>}, + {<<"val">>, <<16#66, 16#C3, 16#B6, 16#C3, 16#B6>>} + ]}), + + % val is "föö" without combining characters + Doc2 = couch_doc:from_json_obj({[ + {<<"_id">>, <<"b">>}, + {<<"val">>, <<16#66, 16#6F, 16#CC, 16#88, 16#6F, 16#CC, 16#88>>} + ]}), + {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1, Doc2]), + + % An implementation detail we have is that depending on + % the direction of the view read we'll get the first + % or last key to represent a group. In this particular + % implementation the document ID breaks the sort tie + % in the map view data. + + ArgsFwd = #{ + group_level => exact + }, + ResultFwd = run_query(Db, DDoc, <<"group">>, ArgsFwd), + ExpectFwd = {ok, [ + row([<<16#66, 16#C3, 16#B6, 16#C3, 16#B6>>], 2) + ]}, + ?assertEqual(ExpectFwd, ResultFwd), + + ArgsRev = #{ + direction => rev, + group_level => exact + }, + ResultRev = run_query(Db, DDoc, <<"group">>, ArgsRev), + ExpectRev = {ok, [ + row([<<16#66, 16#6F, 16#CC, 16#88, 16#6F, 16#CC, 16#88>>], 2) + ]}, + ?assertEqual(ExpectRev, ResultRev). + + +rem_count(Rem, Count) -> + Members = [I || I <- lists:seq(1, Count), I rem 3 == Rem], + length(Members). + + +run_query(Db, Idx, Args) -> + DDoc = create_ddoc(), + run_query(Db, DDoc, Idx, Args). + + +run_query(Db, DDoc, Idx, Args) -> + couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], Args). + + +default_cb(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_cb({final, Info}, []) -> + {ok, [Info]}; +default_cb({final, _}, Acc) -> + {ok, Acc}; +default_cb({meta, _}, Acc) -> + {ok, Acc}; +default_cb(ok, ddoc_updated) -> + {ok, ddoc_updated}; +default_cb(Row, Acc) -> + {ok, [Row | Acc]}. + + +row(Key, Value) -> + {row, [{key, Key}, {value, Value}]}. + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"baz">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"baz_count">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>}, + {<<"reduce">>, <<"_count">>} + ]}}, + {<<"baz_size">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>}, + {<<"reduce">>, <<"_sum">>} + ]}}, + {<<"boom">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit([doc.val % 3, doc.val], 1.5);\n" + "}" + >>}, + {<<"reduce">>, <<"_sum">>} + ]}}, + {<<"bing">>, {[ + {<<"map">>, <<"function(doc) {}">>}, + {<<"reduce">>, <<"_count">>} + ]}}, + {<<"bing_hyper">>, {[ + {<<"map">>, <<"function(doc) {}">>}, + {<<"reduce">>, <<"_approx_count_distinct">>} + ]}}, + {<<"doc_emit">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc)}">>} + ]}}, + {<<"duplicate_keys">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit(doc._id, doc.val);\n" + " emit(doc._id, doc.val + 1);\n" + "}">>}, + {<<"reduce">>, <<"_count">>} + ]}}, + {<<"zing">>, {[ + {<<"map">>, << + "function(doc) {\n" + " if(doc.foo !== undefined)\n" + " emit(doc.foo, 0);\n" + "}" + >>} + ]}} + ]}} + ]}). + + +make_docs(Db, TotalDocs) when TotalDocs > 0 -> + make_docs(Db, TotalDocs, 0). + + +make_docs(Db, TotalDocs, DocsMade) when TotalDocs > DocsMade -> + DocCount = min(TotalDocs - DocsMade, 500), + Docs = [doc(I + DocsMade) || I <- lists:seq(1, DocCount)], + fabric2_db:update_docs(Db, Docs), + make_docs(Db, TotalDocs, DocsMade + DocCount); + +make_docs(_Db, TotalDocs, DocsMade) when TotalDocs =< DocsMade -> + ok. + + +doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Id} + ]}). -- cgit v1.2.1 From 811741d9b6dce3403b0330d9bdb860518764e1a9 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 24 Sep 2020 12:10:29 -0500 Subject: Add elixir tests for builtin reduce group levels --- .../test/reduce_builtin_group_level_tests.exs | 549 +++++++++++++++++++++ 1 file changed, 549 insertions(+) create mode 100644 test/elixir/test/reduce_builtin_group_level_tests.exs diff --git a/test/elixir/test/reduce_builtin_group_level_tests.exs b/test/elixir/test/reduce_builtin_group_level_tests.exs new file mode 100644 index 000000000..2a183494e --- /dev/null +++ b/test/elixir/test/reduce_builtin_group_level_tests.exs @@ -0,0 +1,549 @@ +defmodule ReduceBuiltinGroupLevelTests do + use CouchTestCase + + setup do + db_name = random_db_name() + {:ok, _} = create_db(db_name) + on_exit(fn -> delete_db(db_name) end) + + docs = create_docs() + ddoc = create_ddoc() + + body = %{ + docs: [ddoc | docs] + } + + resp = Couch.post("/#{db_name}/_bulk_docs", body: body) + Enum.each(resp.body, &assert(&1["ok"])) + + %{ + :db_name => db_name, + :ddoc => ddoc + } + end + + test "group_level=0 reduce startkey/endkey", context do + args = %{ + reduce: true, + group_level: 0, + start_key: [2018, 3, 2], + end_key: [2019, 5, 1] + } + + correct = [ + %{"key" => :null, "value" => 31} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=0 reduce", context do + args = %{ + reduce: true, + group_level: 0 + } + + correct = [ + %{"key" => :null, "value" => 68} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=1 reduce", context do + args = %{ + reduce: true, + group_level: 1 + } + + correct = [ + %{"key" => [2017], "value" => 31}, + %{"key" => [2018], "value" => 20}, + %{"key" => [2019], "value" => 17} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=1 reduce with startkey/endkey", context do + args = %{ + reduce: true, + group_level: 1, + start_key: [2017, 4, 1], + end_key: [2018, 3, 1] + } + + correct = [ + %{"key" => [2017], "value" => 22}, + %{"key" => [2018], "value" => 6} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=1 reduce with startkey/endkey take 2", context do + args = %{ + reduce: true, + group_level: 1, + start_key: [2017, 4, 1], + end_key: [2019, 3, 2] + } + + correct = [ + %{"key" => [2017], "value" => 22}, + %{"key" => [2018], "value" => 20}, + %{"key" => [2019], "value" => 4} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=1 reduce with startkey/endkey take 3", context do + args = %{ + reduce: true, + group_level: 1, + start_key: [2017, 4, 1], + end_key: [2019, 05, 1] + } + + correct = [ + %{"key" => [2017], "value" => 22}, + %{"key" => [2018], "value" => 20}, + %{"key" => [2019], "value" => 17} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=1 reduce with startkey", context do + args = %{ + reduce: true, + group_level: 1, + start_key: [2017, 4, 1] + } + + correct = [ + %{"key" => [2017], "value" => 22}, + %{"key" => [2018], "value" => 20}, + %{"key" => [2019], "value" => 17} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group_level=1 reduce with endkey", context do + args = %{ + reduce: true, + group_level: 1, + end_key: [2018, 5, 2] + } + + correct = [ + %{"key" => [2017], "value" => 31}, + %{"key" => [2018], "value" => 20} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "group=true reduce with startkey/endkey", context do + args = %{ + reduce: true, + group: true, + start_key: [2018, 5, 1], + end_key: [2019, 04, 1] + } + + correct = [ + %{"key" => [2018, 5, 1], "value" => 7}, + %{"key" => [2019, 3, 1], "value" => 4}, + %{"key" => [2019, 4, 1], "value" => 6} + ] + + run_query(context, args, "dates_sum", correct) + end + + test "mixed count reduce group_level=1", context do + args = %{ + reduce: true, + group_level: 1, + limit: 6 + } + + correct = [ + %{"key" => 1, "value" => 2}, + %{"key" => 2, "value" => 2}, + %{"key" => 3, "value" => 2}, + %{"key" => [1], "value" => 3}, + %{"key" => [2], "value" => 2}, + %{"key" => [3], "value" => 3} + ] + + run_query(context, args, "count", correct) + end + + test "mixed count reduce group_level=2", context do + args = %{ + :reduce => true, + :group_level => 2, + :limit => 9 + } + + correct = [ + %{"key" => 1, "value" => 2}, + %{"key" => 2, "value" => 2}, + %{"key" => 3, "value" => 2}, + %{"key" => [1, 1], "value" => 2}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [3, 1], "value" => 2}, + %{"key" => [3, 4], "value" => 1} + ] + + run_query(context, args, "count", correct) + end + + test "mixed _count reduce group=2 reduce with startkey/endkey", context do + args = %{ + reduce: true, + group_level: 2, + start_key: 3, + end_key: [3, 1] + } + + correct = [ + %{"key" => 3, "value" => 2}, + %{"key" => [1, 1], "value" => 2}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [3, 1], "value" => 1} + ] + + run_query(context, args, "count", correct) + end + + test "mixed _count reduce group=2 reduce with startkey/endkey direction = rev", + context do + args = %{ + reduce: true, + group_level: 2, + start_key: [3, 1], + end_key: [1, 1], + descending: true + } + + correct = [ + %{"key" => [3, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [1, 1], "value" => 2} + ] + + run_query(context, args, "count", correct) + + args1 = %{ + reduce: true, + group_level: 2, + start_key: [3, 1], + descending: true + } + + correct1 = [ + %{"key" => [3, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [1, 1], "value" => 2}, + %{"key" => 3, "value" => 2}, + %{"key" => 2, "value" => 2}, + %{"key" => 1, "value" => 2} + ] + + run_query(context, args1, "count", correct1) + + args2 = %{ + reduce: true, + group_level: 2, + end_key: [1, 1], + descending: true + } + + correct2 = [ + %{"key" => [3, 4], "value" => 1}, + %{"key" => [3, 1], "value" => 2}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [1, 1], "value" => 2} + ] + + run_query(context, args2, "count", correct2) + + args3 = %{ + reduce: true, + group_level: 0, + descending: true + } + + correct3 = [ + %{"key" => :null, "value" => 14} + ] + + run_query(context, args3, "count", correct3) + end + + test "mixed _count reduce group=2 reduce with skip", context do + args = %{ + reduce: true, + group_level: 2, + start_key: 3, + skip: 2, + end_key: [3, 1] + } + + correct = [ + %{"key" => [1, 2], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [3, 1], "value" => 1} + ] + + run_query(context, args, "count", correct) + end + + test "mixed _count reduce group=2 reduce inclusive_end = false", context do + args = %{ + reduce: true, + group_level: 2, + start_key: [1, 1], + end_key: [3, 1], + inclusive_end: false + } + + correct = [ + %{"key" => [1, 1], "value" => 2}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1} + ] + + run_query(context, args, "count", correct) + + end + + test "mixed _count reduce group=2 reduce inclusive_end = false descending", context do + + args1 = %{ + reduce: true, + group_level: 2, + start_key: [3, 1], + end_key: [1, 1], + descending: true, + inclusive_end: false + } + + correct1 = [ + %{"key" => [3, 1], "value" => 1}, + %{"key" => [2, 3], "value" => 1}, + %{"key" => [2, 1], "value" => 1}, + %{"key" => [1, 2], "value" => 1}, + %{"key" => [1, 1], "value" => 1} + ] + + run_query(context, args1, "count", correct1) + end + + test "strings count reduce group_level=1", context do + args = %{ + reduce: true, + group_level: 1, + start_key: "4" + } + + correct = [ + %{"key" => "5", "value" => 1}, + %{"key" => "6", "value" => 1}, + %{"key" => "7", "value" => 1}, + %{"key" => "8", "value" => 2}, + %{"key" => "9", "value" => 1} + ] + + run_query(context, args, "count_strings", correct) + end + + test "_stats reduce works", context do + args = %{ + reduce: true, + group_level: 1 + } + + correct = [ + %{ + "key" => [2017], + "value" => %{"sum" => 31, "count" => 4, "min" => 6, "max" => 9, "sumsqr" => 247} + }, + %{ + "key" => [2018], + "value" => %{"sum" => 20, "count" => 4, "min" => 3, "max" => 7, "sumsqr" => 110} + }, + %{ + "key" => [2019], + "value" => %{"sum" => 17, "count" => 3, "min" => 4, "max" => 7, "sumsqr" => 101} + } + ] + + run_query(context, args, "stats", correct) + end + + test "_approx_count_distinct reduce works", context do + args = %{ + reduce: true, + group_level: 1 + } + + correct = [ + %{"key" => [2017], "value" => 4}, + %{"key" => [2018], "value" => 3}, + %{"key" => [2019], "value" => 3} + ] + + run_query(context, args, "distinct", correct) + end + + test "get view info with reduce size", context do + db_name = context[:db_name] + + resp = Couch.post("/#{db_name}/_design/bar/_view/distinct/", body: %{reduce: true}) + assert resp.status_code == 200 + + resp = Couch.get("/#{db_name}/_design/bar/_info") + assert resp.status_code == 200 + assert resp.body["view_index"]["sizes"]["active"] == 1073 + end + + defp run_query(context, args, view, correct_resp) do + db_name = context[:db_name] + + resp = Couch.post("/#{db_name}/_design/bar/_view/#{view}/", body: args) + assert resp.status_code == 200 + rows = resp.body["rows"] + + assert(rows == correct_resp) + end + + defp create_docs() do + dates = [ + {[2017, 3, 1], 9}, + {[2017, 4, 1], 7}, + # out of order check + {[2019, 3, 1], 4}, + {[2017, 4, 15], 6}, + {[2018, 4, 1], 3}, + {[2017, 5, 1], 9}, + {[2018, 3, 1], 6}, + # duplicate check + {[2018, 4, 1], 4}, + {[2018, 5, 1], 7}, + {[2019, 4, 1], 6}, + {[2019, 5, 1], 7} + ] + + for i <- 1..11 do + group = + if rem(i, 3) == 0 do + "first" + else + "second" + end + + {date_key, date_val} = Enum.at(dates, i - 1) + + val = + if i == 4 do + 8 + else + i + end + + %{ + _id: "doc-id-#{i}", + value: i, + some: "field", + group: group, + date: date_key, + date_val: date_val, + random_val: val + } + end + end + + defp create_ddoc() do + %{ + "_id" => "_design/bar", + "views" => %{ + "dates_sum" => %{ + "map" => """ + + function(doc) { + emit(doc.date, doc.date_val); + } + """, + "reduce" => "_sum" + }, + "count_strings" => %{ + "map" => """ + function(doc) { + emit(doc.random_val.toString(), 1); + } + """, + "reduce" => "_count" + }, + "count" => %{ + "map" => """ + function(doc) { + if (doc.value > 3) { + return; + } + emit(doc.value, doc.value); + emit(doc.value, doc.value); + emit([doc.value, 1], doc.value); + emit([doc.value, doc.value + 1, doc.group.length], doc.value); + + if (doc.value === 3) { + emit([1, 1, 5], 1); + emit([doc.value, 1, 5], 1); + } + } + """, + "reduce" => "_count" + }, + "distinct" => %{ + "map" => """ + function(doc) { + emit(doc.date, doc.date_val); + } + """, + "reduce" => "_approx_count_distinct" + }, + "stats" => %{ + "map" => """ + function(doc) { + emit(doc.date, doc.date_val); + } + """, + "reduce" => "_stats" + }, + "no_reduce" => %{ + "map" => """ + function (doc) { + emit(doc._id, doc.value); + } + """ + } + } + } + end +end -- cgit v1.2.1 From 4a4515e01d82a6f88ecd3a6b61b5beeab6127424 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 29 Sep 2020 15:23:32 -0500 Subject: Optimizations for reading reduce views These are a few micro optimizations to avoid unnecessary work when reading from a single reduce function during a view read. --- src/couch/src/couch_query_servers.erl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index 447daea61..8eb07abbf 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -111,6 +111,8 @@ rereduce(Lang, RedSrcs, ReducedValues) -> reduce(_Lang, [], _KVs) -> {ok, []}; +reduce(Lang, [<<"_", _/binary>>] = RedSrcs, KVs) -> + builtin_reduce(reduce, RedSrcs, KVs, []); reduce(Lang, RedSrcs, KVs) -> {OsRedSrcs, BuiltinReds} = lists:partition(fun (<<"_", _/binary>>) -> false; @@ -171,7 +173,10 @@ builtin_reduce(_Re, [], _KVs, Acc) -> {ok, lists:reverse(Acc)}; builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> Sum = builtin_sum_rows(KVs, 0), - Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum), + Red = case is_number(Sum) of + true -> Sum; + false -> check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum) + end, builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]); builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> Count = length(KVs), -- cgit v1.2.1 From a07413385b741e044ce9d7aa588c2918faacbebf Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 30 Sep 2020 15:41:25 -0400 Subject: Fix transient replication job state wait logic Make sure to handle both `finished` and `pending` states when waiting for a transient jobs. A transient job will go to the `failed` state if it cannot fetch the filter from the source endpoint. For completeness, we also account for `pending` states in there in the remote chance the job get rescheduled again. --- src/couch_replicator/src/couch_replicator_jobs.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/couch_replicator/src/couch_replicator_jobs.erl b/src/couch_replicator/src/couch_replicator_jobs.erl index a602b0c62..51f441caf 100644 --- a/src/couch_replicator/src/couch_replicator_jobs.erl +++ b/src/couch_replicator/src/couch_replicator_jobs.erl @@ -170,12 +170,14 @@ wait_running(JobId) -> wait_running(JobId, SubId) -> - case couch_jobs:wait(SubId, running, infinity) of + case couch_jobs:wait(SubId, infinity) of {?REP_JOBS, _, running, #{?STATE := ?ST_PENDING}} -> wait_running(JobId, SubId); {?REP_JOBS, _, running, JobData} -> ok = couch_jobs:unsubscribe(SubId), {ok, JobData}; + {?REP_JOBS, _, pending, _} -> + wait_running(JobId, SubId); {?REP_JOBS, _, finished, JobData} -> ok = couch_jobs:unsubscribe(SubId), {ok, JobData} -- cgit v1.2.1 From eaf13d35338fa4eecf660879c0b469c1db850a5b Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 30 Sep 2020 15:49:25 -0400 Subject: Fix error reporting when fetching replication filters Don't unnecessarily unwrap the fetch error since `error_info/1` can already handle the current shape. Also, make sure to translate the reason to binary for consistency with the other filter fetching errors in the `couch_replicator_filters` module. Add a test to ensure we return the `filter_fetch_error` term as that is explicitly turned into a 404 error in chttpd, so we try to maintain compatibility with CouchDB <= 3.x code. --- src/couch_replicator/src/couch_replicator_filters.erl | 2 +- src/couch_replicator/src/couch_replicator_job.erl | 5 ++--- .../eunit/couch_replicator_transient_jobs_tests.erl | 17 +++++++++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_filters.erl b/src/couch_replicator/src/couch_replicator_filters.erl index 50c37335d..1cadce1dd 100644 --- a/src/couch_replicator/src/couch_replicator_filters.erl +++ b/src/couch_replicator/src/couch_replicator_filters.erl @@ -150,7 +150,7 @@ fetch_internal(DDocName, FilterName, Source) -> couch_replicator_api_wrap:db_uri(Source), couch_util:to_binary(CodeError)] ), - throw({fetch_error, CodeErrorMsg}) + throw({fetch_error, iolist_to_binary(CodeErrorMsg)}) end after couch_replicator_api_wrap:db_close(Db) diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl index ed3d00d7b..eaa661e77 100644 --- a/src/couch_replicator/src/couch_replicator_job.erl +++ b/src/couch_replicator/src/couch_replicator_job.erl @@ -810,9 +810,8 @@ get_rep_id(JTx, Job, #{} = JobData) -> try couch_replicator_ids:replication_id(Rep) catch - throw:{filter_fetch_error, Error} -> - Error1 = io_lib:format("Filter fetch error ~p", [Error]), - reschedule_on_error(JTx, Job, JobData, Error1), + throw:{filter_fetch_error, _} = Error -> + reschedule_on_error(JTx, Job, JobData, {error, Error}), exit({shutdown, finished}) end. diff --git a/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl index 25fc6a3ff..222d13809 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl @@ -31,7 +31,8 @@ transient_jobs_test_() -> fun teardown/1, [ ?TDEF_FE(transient_job_is_removed, 10), - ?TDEF_FE(posting_same_job_is_a_noop, 10) + ?TDEF_FE(posting_same_job_is_a_noop, 10), + ?TDEF_FE(transient_job_with_a_bad_filter, 10) ] } } @@ -79,7 +80,19 @@ posting_same_job_is_a_noop({Source, Target}) -> ?assertEqual(Pid1, Pid2), couch_replicator_test_helper:cancel(RepId1). - + +transient_job_with_a_bad_filter({Source, Target}) -> + DDoc = #{<<"_id">> => <<"_design/myddoc">>}, + couch_replicator_test_helper:create_docs(Source, [DDoc]), + Result = couch_replicator:replicate(#{ + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target), + <<"continuous">> => true, + <<"filter">> => <<"myddoc/myfilter">> + }, ?ADMIN_USER), + ?assertMatch({error, #{<<"error">> := <<"filter_fetch_error">>}}, Result). + + get_rep_id(Source, Target) -> {ok, Id, _} = couch_replicator_parse:parse_transient_rep(#{ <<"source">> => couch_replicator_test_helper:db_url(Source), -- cgit v1.2.1 From f143beb96546931314fe636cc485b1d4c84f0f4a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Wed, 30 Sep 2020 18:10:10 -0400 Subject: Add node and pid to _scheduler/jobs output This is mainly for compatibility with CouchDB 3.x Ref: https://docs.couchdb.org/en/stable/api/server/common.html#scheduler-jobs --- src/couch_replicator/src/couch_replicator.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator.erl b/src/couch_replicator/src/couch_replicator.erl index 8ab36e587..a690d37c3 100644 --- a/src/couch_replicator/src/couch_replicator.erl +++ b/src/couch_replicator/src/couch_replicator.erl @@ -438,7 +438,9 @@ job_ejson(#{} = JobData) -> ?STATE := State, ?STATE_INFO := Info0, ?JOB_HISTORY := History, - ?REP_STATS := Stats + ?REP_STATS := Stats, + ?REP_NODE := Node, + ?REP_PID := Pid } = JobData, #{ @@ -470,7 +472,9 @@ job_ejson(#{} = JobData) -> <<"info">> => Info, <<"user">> => User, <<"history">> => History1, - <<"start_time">> => StartISO8601 + <<"start_time">> => StartISO8601, + <<"node">> => Node, + <<"pid">> => Pid }. -- cgit v1.2.1 From d560cb7c251f5f9a3c8a02138244ba3c0c4a9fe9 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Mon, 5 Oct 2020 15:25:12 -0400 Subject: Keep auth properties as a map in replicator's httpdb record Previously there was an attempt to keep backwards compatibility with 3.x replicator plugins by transforming the auth into a proplist with `maps:to_list/1`. However, that didn't account for nested properties, so we could have ended up with a top level of props with maps for some values. Instead of making things too complicating, and doing a nested transform to proplists, just keep the auth object as a map and let the plugins handle the compatibility issue. --- src/couch_replicator/include/couch_replicator_api_wrap.hrl | 2 +- src/couch_replicator/src/couch_replicator_api_wrap.erl | 2 +- src/couch_replicator/src/couch_replicator_utils.erl | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/couch_replicator/include/couch_replicator_api_wrap.hrl b/src/couch_replicator/include/couch_replicator_api_wrap.hrl index 0f8213c51..6212ab412 100644 --- a/src/couch_replicator/include/couch_replicator_api_wrap.hrl +++ b/src/couch_replicator/include/couch_replicator_api_wrap.hrl @@ -14,7 +14,7 @@ -record(httpdb, { url, - auth_props = [], + auth_props = #{}, headers = [ {"Accept", "application/json"}, {"User-Agent", "CouchDB-Replicator/" ++ couch_server:get_version()} diff --git a/src/couch_replicator/src/couch_replicator_api_wrap.erl b/src/couch_replicator/src/couch_replicator_api_wrap.erl index da6f28800..1df8ee0c7 100644 --- a/src/couch_replicator/src/couch_replicator_api_wrap.erl +++ b/src/couch_replicator/src/couch_replicator_api_wrap.erl @@ -917,7 +917,7 @@ db_from_json(#{} = DbMap) -> end, #httpdb{ url = binary_to_list(Url), - auth_props = maps:to_list(Auth), + auth_props = Auth, headers = Headers, ibrowse_options = IBrowseOptions, timeout = Timeout, diff --git a/src/couch_replicator/src/couch_replicator_utils.erl b/src/couch_replicator/src/couch_replicator_utils.erl index cbed78ead..523de5f54 100644 --- a/src/couch_replicator/src/couch_replicator_utils.erl +++ b/src/couch_replicator/src/couch_replicator_utils.erl @@ -281,7 +281,10 @@ normalize_rep_test_() -> normalize_endpoint() -> HttpDb = #httpdb{ url = "http://host/db", - auth_props = [{"key", "val"}], + auth_props = #{ + "key" => "val", + "nested" => #{<<"other_key">> => "other_val"} + }, headers = [{"k2","v2"}, {"k1","v1"}], timeout = 30000, ibrowse_options = [{k2, v2}, {k1, v1}], -- cgit v1.2.1 From 990a3bd7240971cc0274b26fa496b88fd6c66f44 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Tue, 6 Oct 2020 15:18:31 +0100 Subject: simplify max_document_size comment --- rel/overlay/etc/default.ini | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index 3a377c7c8..8970be572 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -49,10 +49,8 @@ changes_doc_ids_optimization_threshold = 100 ; Limit maximum document size. Requests to create / update documents with a body ; size larger than this will fail with a 413 http error. This limit applies to ; requests which update a single document as well as individual documents from -; a _bulk_docs request. Since there is no canonical size of json encoded data, -; due to variabiliy in what is escaped or how floats are encoded, this limit is -; applied conservatively. For example 1.0e+16 could be encoded as 1e16, so 4 used -; for size calculation instead of 7. +; a _bulk_docs request. The size limit is approximate due to the nature of JSON +; encoding. max_document_size = 8000000 ; bytes ; ; Maximum number of documents in a _bulk_docs request. Anything larger -- cgit v1.2.1 From e42d33fcc77a09e99d1bc70557810a8536a80027 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 6 Oct 2020 17:48:07 -0400 Subject: Properly initialize `user` in replication job's state This value is emitted in _active_tasks and was previously emitting `null` values from the state record's defaults. --- src/couch_replicator/src/couch_replicator_job.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl index eaa661e77..e13fb1120 100644 --- a/src/couch_replicator/src/couch_replicator_job.erl +++ b/src/couch_replicator/src/couch_replicator_job.erl @@ -947,7 +947,8 @@ init_state(#{} = Job, #{} = JobData) -> ?SOURCE := Src0, ?TARGET := Tgt, ?START_TIME := StartTime, - ?OPTIONS := Options0 + ?OPTIONS := Options0, + ?REP_USER := User } = Rep, % Optimize replication parameters if last time the jobs crashed because it @@ -1013,7 +1014,8 @@ init_state(#{} = Job, #{} = JobData) -> stats_timer = nil, doc_id = DocId, db_name = DbName, - db_uuid = DbUUID + db_uuid = DbUUID, + user = User }, start_checkpoint_timer(State). -- cgit v1.2.1 From 666d391a743562afe386a24d38afc81e75aa8d49 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Thu, 24 Sep 2020 07:45:05 +0200 Subject: Preserve query string rewrite when the request contains a body --- src/chttpd/src/chttpd_rewrite.erl | 3 ++- test/javascript/tests/rewrite_js.js | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/chttpd/src/chttpd_rewrite.erl b/src/chttpd/src/chttpd_rewrite.erl index 019651374..1c2c1f333 100644 --- a/src/chttpd/src/chttpd_rewrite.erl +++ b/src/chttpd/src/chttpd_rewrite.erl @@ -71,8 +71,9 @@ do_rewrite(#httpd{mochi_req=MochiReq}=Req, {Props}=Rewrite) when is_list(Props) undefined -> erlang:get(mochiweb_request_body); B -> B end, + NewMochiReq:cleanup(), case Body of - undefined -> NewMochiReq:cleanup(); + undefined -> []; _ -> erlang:put(mochiweb_request_body, Body) end, couch_log:debug("rewrite to ~p", [Path]), diff --git a/test/javascript/tests/rewrite_js.js b/test/javascript/tests/rewrite_js.js index 22de6c940..0ccdd6515 100644 --- a/test/javascript/tests/rewrite_js.js +++ b/test/javascript/tests/rewrite_js.js @@ -345,6 +345,22 @@ couchTests.rewrite = function(debug) { var xhr = CouchDB.request("GET", url); TEquals(400, xhr.status); + // test requests with body preserve the query string rewrite + var ddoc_qs = { + "_id": "_design/qs", + "rewrites": "function (r) { return {path: '../../_changes', query: {'filter': '_doc_ids'}};};" + } + db.save(ddoc_qs); + db.save({"_id": "qs1", "foo": "bar"}); + db.save({"_id": "qs2", "foo": "bar"}); + + var url = "/"+dbName+"/_design/qs/_rewrite"; + + var xhr = CouchDB.request("POST", url, {body: JSON.stringify({"doc_ids": ["qs2"]})}); + var result = JSON.parse(xhr.responseText); + T(xhr.status == 200); + T(result.results.length == 1, "Only one doc is expected"); + TEquals(result.results[0].id, "qs2"); // cleanup db.deleteDb(); } -- cgit v1.2.1 From b616cc17873bb45bdbe88efed6a93ecaafe39669 Mon Sep 17 00:00:00 2001 From: Juanjo Rodriguez Date: Thu, 23 Jul 2020 08:26:36 +0200 Subject: port rewrite and rewrite_js tests into elixir --- test/elixir/README.md | 2 +- test/elixir/lib/couch_raw.ex | 105 ++++++++ test/elixir/test/changes_async_test.exs | 103 -------- test/elixir/test/rewrite_js_test.exs | 411 ++++++++++++++++++++++++++++++++ test/elixir/test/rewrite_test.exs | 181 +++++++++++++- test/javascript/tests/rewrite.js | 2 +- test/javascript/tests/rewrite_js.js | 3 +- 7 files changed, 691 insertions(+), 116 deletions(-) create mode 100644 test/elixir/lib/couch_raw.ex create mode 100644 test/elixir/test/rewrite_js_test.exs diff --git a/test/elixir/README.md b/test/elixir/README.md index 52ce45a75..7e19d3964 100644 --- a/test/elixir/README.md +++ b/test/elixir/README.md @@ -89,7 +89,7 @@ X means done, - means partially - [ ] Port replicator_db_write_auth.js - [X] Port rev_stemming.js - [X] Port rewrite.js - - [ ] Port rewrite_js.js + - [X] Port rewrite_js.js - [X] Port security_validation.js - [ ] Port show_documents.js - [ ] Port stats.js diff --git a/test/elixir/lib/couch_raw.ex b/test/elixir/lib/couch_raw.ex new file mode 100644 index 000000000..62a0bbd0e --- /dev/null +++ b/test/elixir/lib/couch_raw.ex @@ -0,0 +1,105 @@ +defmodule Rawresp do + use HTTPotion.Base + + @moduledoc """ + HTTP client that provides raw response as result + """ + @request_timeout 60_000 + @inactivity_timeout 55_000 + + def process_url("http://" <> _ = url) do + url + end + + def process_url(url) do + base_url = System.get_env("EX_COUCH_URL") || "http://127.0.0.1:15984" + base_url <> url + end + + def process_request_headers(headers, _body, options) do + headers = + headers + |> Keyword.put(:"User-Agent", "couch-potion") + + headers = + if headers[:"Content-Type"] do + headers + else + Keyword.put(headers, :"Content-Type", "application/json") + end + + case Keyword.get(options, :cookie) do + nil -> + headers + + cookie -> + Keyword.put(headers, :Cookie, cookie) + end + end + + def process_options(options) do + options + |> set_auth_options() + |> set_inactivity_timeout() + |> set_request_timeout() + end + + def process_request_body(body) do + if is_map(body) do + :jiffy.encode(body) + else + body + end + end + + def set_auth_options(options) do + if Keyword.get(options, :cookie) == nil do + headers = Keyword.get(options, :headers, []) + + if headers[:basic_auth] != nil or headers[:authorization] != nil do + options + else + username = System.get_env("EX_USERNAME") || "adm" + password = System.get_env("EX_PASSWORD") || "pass" + Keyword.put(options, :basic_auth, {username, password}) + end + else + options + end + end + + def set_inactivity_timeout(options) do + Keyword.update( + options, + :ibrowse, + [{:inactivity_timeout, @inactivity_timeout}], + fn ibrowse -> + Keyword.put_new(ibrowse, :inactivity_timeout, @inactivity_timeout) + end + ) + end + + def set_request_timeout(options) do + timeout = Application.get_env(:httpotion, :default_timeout, @request_timeout) + Keyword.put_new(options, :timeout, timeout) + end + + def login(userinfo) do + [user, pass] = String.split(userinfo, ":", parts: 2) + login(user, pass) + end + + def login(user, pass, expect \\ :success) do + resp = Couch.post("/_session", body: %{:username => user, :password => pass}) + + if expect == :success do + true = resp.body["ok"] + cookie = resp.headers[:"set-cookie"] + [token | _] = String.split(cookie, ";") + %Couch.Session{cookie: token} + else + true = Map.has_key?(resp.body, "error") + %Couch.Session{error: resp.body["error"]} + end + end +end diff --git a/test/elixir/test/changes_async_test.exs b/test/elixir/test/changes_async_test.exs index 36876aedf..001c5d58c 100644 --- a/test/elixir/test/changes_async_test.exs +++ b/test/elixir/test/changes_async_test.exs @@ -441,106 +441,3 @@ defmodule ChangesAsyncTest do create_doc(db_name, ddoc) end end - -defmodule Rawresp do - use HTTPotion.Base - - @request_timeout 60_000 - @inactivity_timeout 55_000 - - def process_url("http://" <> _ = url) do - url - end - - def process_url(url) do - base_url = System.get_env("EX_COUCH_URL") || "http://127.0.0.1:15984" - base_url <> url - end - - def process_request_headers(headers, _body, options) do - headers = - headers - |> Keyword.put(:"User-Agent", "couch-potion") - - headers = - if headers[:"Content-Type"] do - headers - else - Keyword.put(headers, :"Content-Type", "application/json") - end - - case Keyword.get(options, :cookie) do - nil -> - headers - - cookie -> - Keyword.put(headers, :Cookie, cookie) - end - end - - def process_options(options) do - options - |> set_auth_options() - |> set_inactivity_timeout() - |> set_request_timeout() - end - - def process_request_body(body) do - if is_map(body) do - :jiffy.encode(body) - else - body - end - end - - def set_auth_options(options) do - if Keyword.get(options, :cookie) == nil do - headers = Keyword.get(options, :headers, []) - - if headers[:basic_auth] != nil or headers[:authorization] != nil do - options - else - username = System.get_env("EX_USERNAME") || "adm" - password = System.get_env("EX_PASSWORD") || "pass" - Keyword.put(options, :basic_auth, {username, password}) - end - else - options - end - end - - def set_inactivity_timeout(options) do - Keyword.update( - options, - :ibrowse, - [{:inactivity_timeout, @inactivity_timeout}], - fn ibrowse -> - Keyword.put_new(ibrowse, :inactivity_timeout, @inactivity_timeout) - end - ) - end - - def set_request_timeout(options) do - timeout = Application.get_env(:httpotion, :default_timeout, @request_timeout) - Keyword.put_new(options, :timeout, timeout) - end - - def login(userinfo) do - [user, pass] = String.split(userinfo, ":", parts: 2) - login(user, pass) - end - - def login(user, pass, expect \\ :success) do - resp = Couch.post("/_session", body: %{:username => user, :password => pass}) - - if expect == :success do - true = resp.body["ok"] - cookie = resp.headers[:"set-cookie"] - [token | _] = String.split(cookie, ";") - %Couch.Session{cookie: token} - else - true = Map.has_key?(resp.body, "error") - %Couch.Session{error: resp.body["error"]} - end - end -end diff --git a/test/elixir/test/rewrite_js_test.exs b/test/elixir/test/rewrite_js_test.exs new file mode 100644 index 000000000..a3adb3e7d --- /dev/null +++ b/test/elixir/test/rewrite_js_test.exs @@ -0,0 +1,411 @@ +defmodule RewriteJSTest do + use CouchTestCase + + @moduletag :js_engine + @moduletag kind: :single_node + + @moduledoc """ + Test CouchDB rewrites JS + This is a port of the rewrite_js.js suite + """ + + @ddoc %{ + _id: "_design/test", + language: "javascript", + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + } + }, + rewrites: """ + function(req) { + prefix = req.path[4]; + if (prefix === 'foo') { + return 'foo.txt'; + } + if (prefix === 'foo2') { + return {path: 'foo.txt', method: 'GET'}; + } + if (prefix === 'hello') { + if (req.method != 'PUT') { + return + } + id = req.path[5]; + return {path: '_update/hello/' + id}; + } + if (prefix === 'welcome') { + if (req.path.length == 6){ + name = req.path[5]; + return {path: '_show/welcome', query: {'name': name}}; + } + return '_show/welcome'; + } + if (prefix === 'welcome2') { + return {path: '_show/welcome', query: {'name': 'user'}}; + } + if (prefix === 'welcome3') { + name = req.path[5]; + if (req.method == 'PUT') { + path = '_update/welcome2/' + name; + } else if (req.method == 'GET') { + path = '_show/welcome2/' + name; + } else { + return; + } + return path; + } + if (prefix === 'welcome4') { + return {path: '_show/welcome3', query: {name: req.path[5]}}; + } + if (prefix === 'welcome5') { + rest = req.path.slice(5).join('/'); + return {path: '_show/' + rest, query: {name: rest}}; + } + if (prefix === 'basicView') { + rest = req.path.slice(5).join('/'); + return {path: '_view/basicView'}; + } + if (req.path.slice(4).join('/') === 'simpleForm/basicView') { + return {path: '_list/simpleForm/basicView'}; + } + if (req.path.slice(4).join('/') === 'simpleForm/basicViewFixed') { + return {path: '_list/simpleForm/basicView', + query: {startkey: '"3"', endkey: '"8"'}}; + } + if (req.path.slice(4).join('/') === 'simpleForm/complexView') { + return {path: '_list/simpleForm/complexView', + query: {key: JSON.stringify([1,2])}}; + } + if (req.path.slice(4).join('/') === 'simpleForm/complexView2') { + return {path: '_list/simpleForm/complexView', + query: {key: JSON.stringify(['test', {}])}}; + } + if (req.path.slice(4).join('/') === 'simpleForm/complexView3') { + return {path: '_list/simpleForm/complexView', + query: {key: JSON.stringify(['test', ['test', 'essai']])}}; + } + if (req.path.slice(4).join('/') === 'simpleForm/complexView4') { + return {path: '_list/simpleForm/complexView2', + query: {key: JSON.stringify({"c": 1})}}; + } + if (req.path.slice(4).join('/') === 'simpleForm/sendBody1') { + return {path: '_list/simpleForm/complexView2', + method: 'POST', + query: {limit: '1'}, + headers:{'Content-type':'application/json'}, + body: JSON.stringify( {keys: [{"c": 1}]} )}; + } + if (req.path.slice(4).join('/') === '/') { + return {path: '_view/basicView'}; + } + if (prefix === 'db') { + return {path: '../../' + req.path.slice(5).join('/')}; + } + } + """, + lists: %{ + simpleForm: """ + function(head, req) { + send('
    '); + var row, row_number = 0, prevKey, firstKey = null; + while (row = getRow()) { + row_number += 1; + if (!firstKey) firstKey = row.key; + prevKey = row.key; + send('\\n
  • Key: '+row.key + +' Value: '+row.value + +' LineNo: '+row_number+'
  • '); + } + return '

FirstKey: '+ firstKey + ' LastKey: '+ prevKey+'

'; + } + """ + }, + shows: %{ + welcome: """ + function(doc,req) { + return "Welcome " + req.query["name"]; + } + """, + welcome2: """ + function(doc, req) { + return "Welcome " + doc.name; + } + """, + welcome3: """ + function(doc,req) { + return "Welcome " + req.query["name"]; + } + """ + }, + updates: %{ + hello: """ + function(doc, req) { + if (!doc) { + if (req.id) { + return [{ + _id : req.id + }, "New World"] + } + return [null, "Empty World"]; + } + doc.world = "hello"; + doc.edited_by = req.userCtx; + return [doc, "hello doc"]; + } + """, + welcome2: """ + function(doc, req) { + if (!doc) { + if (req.id) { + return [{ + _id: req.id, + name: req.id + }, "New World"] + } + return [null, "Empty World"]; + } + return [doc, "hello doc"]; + } + """ + }, + views: %{ + basicView: %{ + map: """ + function(doc) { + if (doc.integer) { + emit(doc.integer, doc.string); + } + } + """ + }, + complexView: %{ + map: """ + function(doc) { + if (doc.type == "complex") { + emit([doc.a, doc.b], doc.string); + } + } + """ + }, + complexView2: %{ + map: """ + function(doc) { + if (doc.type == "complex") { + emit(doc.a, doc.string); + } + } + """ + }, + complexView3: %{ + map: """ + function(doc) { + if (doc.type == "complex") { + emit(doc.b, doc.string); + } + } + """ + } + } + } + + Enum.each( + ["test_rewrite_suite_db", "test_rewrite_suite_db%2Fwith_slashes"], + fn db_name -> + @tag with_random_db: db_name + test "Test basic js rewrites on #{db_name}", context do + db_name = context[:db_name] + + create_doc(db_name, @ddoc) + + docs1 = make_docs(0..9) + bulk_save(db_name, docs1) + + docs2 = [ + %{"a" => 1, "b" => 1, "string" => "doc 1", "type" => "complex"}, + %{"a" => 1, "b" => 2, "string" => "doc 2", "type" => "complex"}, + %{"a" => "test", "b" => %{}, "string" => "doc 3", "type" => "complex"}, + %{ + "a" => "test", + "b" => ["test", "essai"], + "string" => "doc 4", + "type" => "complex" + }, + %{"a" => %{"c" => 1}, "b" => "", "string" => "doc 5", "type" => "complex"} + ] + + bulk_save(db_name, docs2) + + # Test simple rewriting + resp = Couch.get("/#{db_name}/_design/test/_rewrite/foo") + assert resp.body == "This is a base64 encoded text" + assert resp.headers["Content-Type"] == "text/plain" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/foo2") + assert resp.body == "This is a base64 encoded text" + assert resp.headers["Content-Type"] == "text/plain" + + # Test POST, hello update world + resp = + Couch.post("/#{db_name}", body: %{"word" => "plankton", "name" => "Rusty"}).body + + assert resp["ok"] + doc_id = resp["id"] + assert doc_id + + resp = Couch.put("/#{db_name}/_design/test/_rewrite/hello/#{doc_id}") + assert resp.status_code in [201, 202] + assert resp.body == "hello doc" + assert String.match?(resp.headers["Content-Type"], ~r/charset=utf-8/) + + assert Couch.get("/#{db_name}/#{doc_id}").body["world"] == "hello" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/welcome?name=user") + assert resp.body == "Welcome user" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/welcome/user") + assert resp.body == "Welcome user" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/welcome2") + assert resp.body == "Welcome user" + + resp = Couch.put("/#{db_name}/_design/test/_rewrite/welcome3/test") + assert resp.status_code in [201, 202] + assert resp.body == "New World" + assert String.match?(resp.headers["Content-Type"], ~r/charset=utf-8/) + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/welcome3/test") + assert resp.body == "Welcome test" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/welcome4/user") + assert resp.body == "Welcome user" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/welcome5/welcome3") + assert resp.body == "Welcome welcome3" + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/basicView") + assert resp.status_code == 200 + assert resp.body["total_rows"] == 9 + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/FirstKey: [1, 2]/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView2") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 3/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView3") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 4/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView4") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 5/) + + # COUCHDB-1612 - send body rewriting get to post + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/sendBody1") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 5 LineNo: 1/) + + resp = Couch.get("/#{db_name}/_design/test/_rewrite/db/_design/test?meta=true") + assert resp.status_code == 200 + assert resp.body["_id"] == "_design/test" + assert Map.has_key?(resp.body, "_revs_info") + end + + @tag with_random_db: db_name + test "early response on #{db_name}", context do + db_name = context[:db_name] + + ddoc = %{ + _id: "_design/response", + rewrites: """ + function(req){ + status = parseInt(req.query.status); + return {code: status, + body: JSON.stringify({"status": status}), + headers: {'x-foo': 'bar', 'Content-Type': 'application/json'}}; + } + """ + } + + create_doc(db_name, ddoc) + + resp = Couch.get("/#{db_name}/_design/response/_rewrite?status=200") + assert resp.status_code == 200 + assert resp.headers["x-foo"] == "bar" + assert resp.body["status"] == 200 + + resp = Couch.get("/#{db_name}/_design/response/_rewrite?status=451") + assert resp.status_code == 451 + assert resp.headers["Content-Type"] == "application/json" + + resp = Couch.get("/#{db_name}/_design/response/_rewrite?status=500") + assert resp.status_code == 500 + end + + @tag with_random_db: db_name + test "path relative to server on #{db_name}", context do + db_name = context[:db_name] + + ddoc = %{ + _id: "_design/relative", + rewrites: """ + function(req){ + return '../../../_uuids' + } + """ + } + + create_doc(db_name, ddoc) + resp = Couch.get("/#{db_name}/_design/relative/_rewrite/uuids") + assert resp.status_code == 200 + assert length(resp.body["uuids"]) == 1 + end + + @tag with_random_db: db_name + test "loop on #{db_name}", context do + db_name = context[:db_name] + + ddoc_loop = %{ + _id: "_design/loop", + rewrites: """ + function(req) { + return '_rewrite/loop'; + } + """ + } + + create_doc(db_name, ddoc_loop) + resp = Couch.get("/#{db_name}/_design/loop/_rewrite/loop") + assert resp.status_code == 400 + end + + @tag with_random_db: db_name + test "requests with body preserve the query string rewrite on #{db_name}", + context do + db_name = context[:db_name] + + ddoc_qs = %{ + _id: "_design/qs", + rewrites: + "function (r) { return {path: '../../_changes', query: {'filter': '_doc_ids'}};};" + } + + create_doc(db_name, ddoc_qs) + create_doc(db_name, %{_id: "qs1"}) + create_doc(db_name, %{_id: "qs2"}) + + resp = + Couch.post("/#{db_name}/_design/qs/_rewrite", + body: %{doc_ids: ["qs2"]} + ) + + assert resp.status_code == 200 + assert length(resp.body["results"]) == 1 + assert Enum.at(resp.body["results"], 0)["id"] == "qs2" + end + end + ) +end diff --git a/test/elixir/test/rewrite_test.exs b/test/elixir/test/rewrite_test.exs index daa2a80a8..75f198568 100644 --- a/test/elixir/test/rewrite_test.exs +++ b/test/elixir/test/rewrite_test.exs @@ -349,15 +349,178 @@ defmodule RewriteTest do assert resp.status_code == 200 assert resp.body["total_rows"] == 9 - # TODO: port _list function tests and everything below in rewrite.js - # This is currently broken because _list funcitons default to application/json - # response bodies and my attempts to change the content-type from within the - # _list function have not yet succeeded. - # - # Test GET with query params - # resp = Couch.get("/#{db_name}/_design/test/_rewrite/simpleForm/basicView", query: %{startkey: 3, endkey: 8}) - # Logger.error("GOT RESP: #{inspect resp.body}") - # assert resp.status_code == 200 + resp = + Rawresp.get( + "/#{db_name}/_design/test/_rewrite/simpleForm/basicView?startkey=3&endkey=8" + ) + + assert resp.status_code == 200 + assert not String.match?(resp.body, ~r/Key: 1/) + assert String.match?(resp.body, ~r/FirstKey: 3/) + assert String.match?(resp.body, ~r/LastKey: 8/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/basicViewFixed") + assert resp.status_code == 200 + assert not String.match?(resp.body, ~r/Key: 1/) + assert String.match?(resp.body, ~r/FirstKey: 3/) + assert String.match?(resp.body, ~r/LastKey: 8/) + + resp = + Rawresp.get( + "/#{db_name}/_design/test/_rewrite/simpleForm/basicViewFixed?startkey=4" + ) + + assert resp.status_code == 200 + assert not String.match?(resp.body, ~r/Key: 1/) + assert String.match?(resp.body, ~r/FirstKey: 3/) + assert String.match?(resp.body, ~r/LastKey: 8/) + + resp = + Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/basicViewPath/3/8") + + assert resp.status_code == 200 + assert not String.match?(resp.body, ~r/Key: 1/) + assert String.match?(resp.body, ~r/FirstKey: 3/) + assert String.match?(resp.body, ~r/LastKey: 8/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/FirstKey: [1, 2]/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView2") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 3/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView3") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 4/) + + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/simpleForm/complexView4") + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 5/) + + resp = + Rawresp.get( + "/#{db_name}/_design/test/_rewrite/simpleForm/complexView5/test/essai" + ) + + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 4/) + + resp = + Rawresp.get( + "/#{db_name}/_design/test/_rewrite/simpleForm/complexView6?a=test&b=essai" + ) + + assert resp.status_code == 200 + assert String.match?(resp.body, ~r/Value: doc 4/) + + resp = + Rawresp.get( + "/#{db_name}/_design/test/_rewrite/simpleForm/complexView7/test/essai?doc=true" + ) + + assert resp.status_code == 200 + result = resp.body |> IO.iodata_to_binary() |> :jiffy.decode([:return_maps]) + first_row = Enum.at(result["rows"], 0) + assert Map.has_key?(first_row, "doc") + + # COUCHDB-2031 - path normalization versus qs params + resp = Rawresp.get("/#{db_name}/_design/test/_rewrite/db/_design/test?meta=true") + assert resp.status_code == 200 + result = resp.body |> IO.iodata_to_binary() |> :jiffy.decode([:return_maps]) + assert result["_id"] == "_design/test" + assert Map.has_key?(result, "_revs_info") + + ddoc2 = %{ + _id: "_design/test2", + rewrites: [ + %{ + from: "uuids", + to: "../../../_uuids" + } + ] + } + + create_doc(db_name, ddoc2) + resp = Couch.get("/#{db_name}/_design/test2/_rewrite/uuids") + assert resp.status_code == 500 + assert resp.body["error"] == "insecure_rewrite_rule" + end + + @tag with_random_db: db_name + @tag config: [ + {"httpd", "secure_rewrites", "false"} + ] + test "path relative to server on #{db_name}", context do + db_name = context[:db_name] + + ddoc = %{ + _id: "_design/test2", + rewrites: [ + %{ + from: "uuids", + to: "../../../_uuids" + } + ] + } + + create_doc(db_name, ddoc) + + resp = Couch.get("/#{db_name}/_design/test2/_rewrite/uuids") + assert resp.status_code == 200 + assert length(resp.body["uuids"]) == 1 + end + + @tag with_random_db: db_name + @tag config: [ + {"httpd", "rewrite_limit", "2"} + ] + test "loop detection on #{db_name}", context do + db_name = context[:db_name] + + ddoc_loop = %{ + _id: "_design/loop", + rewrites: [%{from: "loop", to: "_rewrite/loop"}] + } + + create_doc(db_name, ddoc_loop) + + resp = Couch.get("/#{db_name}/_design/loop/_rewrite/loop") + assert resp.status_code == 400 + end + + @tag with_random_db: db_name + @tag config: [ + {"httpd", "rewrite_limit", "2"}, + {"httpd", "secure_rewrites", "false"} + ] + test "serial execution is not spuriously counted as loop on #{db_name}", context do + db_name = context[:db_name] + + ddoc = %{ + _id: "_design/test", + language: "javascript", + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + } + }, + rewrites: [ + %{ + from: "foo", + to: "foo.txt" + } + ] + } + + create_doc(db_name, ddoc) + + for _i <- 0..4 do + resp = Couch.get("/#{db_name}/_design/test/_rewrite/foo") + assert resp.status_code == 200 + end end end ) diff --git a/test/javascript/tests/rewrite.js b/test/javascript/tests/rewrite.js index a984936d1..88479b877 100644 --- a/test/javascript/tests/rewrite.js +++ b/test/javascript/tests/rewrite.js @@ -10,7 +10,7 @@ // License for the specific language governing permissions and limitations under // the License. - +couchTests.elixir = true; couchTests.rewrite = function(debug) { if (debug) debugger; diff --git a/test/javascript/tests/rewrite_js.js b/test/javascript/tests/rewrite_js.js index 0ccdd6515..7179fc9f5 100644 --- a/test/javascript/tests/rewrite_js.js +++ b/test/javascript/tests/rewrite_js.js @@ -11,7 +11,7 @@ // the License. - +couchTests.elixir = true; couchTests.rewrite = function(debug) { if (debug) debugger; var dbNames = [get_random_db_name(), get_random_db_name() + "test_suite_db/with_slashes"]; @@ -116,7 +116,6 @@ couchTests.rewrite = function(debug) { }), lists: { simpleForm: stringFun(function(head, req) { - log("simpleForm"); send('