diff options
Diffstat (limited to 'src')
400 files changed, 52178 insertions, 16546 deletions
diff --git a/src/aegis/rebar.config.script b/src/aegis/rebar.config.script new file mode 100644 index 000000000..ef148bfbe --- /dev/null +++ b/src/aegis/rebar.config.script @@ -0,0 +1,35 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +CouchConfig = case filelib:is_file(os:getenv("COUCHDB_CONFIG")) of + true -> + {ok, Result} = file:consult(os:getenv("COUCHDB_CONFIG")), + Result; + false -> + [] +end. + +AegisKeyManager = case lists:keyfind(aegis_key_manager, 1, CouchConfig) of + {aegis_key_manager, Module} when Module /= "" -> + list_to_atom(Module); + _ -> + aegis_noop_key_manager +end, + +CurrentOpts = case lists:keyfind(erl_opts, 1, CONFIG) of + {erl_opts, Opts} -> Opts; + false -> [] +end, + +AegisOpts = {d, 'AEGIS_KEY_MANAGER', AegisKeyManager}, +lists:keystore(erl_opts, 1, CONFIG, {erl_opts, [AegisOpts | CurrentOpts]}). diff --git a/src/aegis/src/aegis.app.src.script b/src/aegis/src/aegis.app.src.script new file mode 100644 index 000000000..f54688cf2 --- /dev/null +++ b/src/aegis/src/aegis.app.src.script @@ -0,0 +1,53 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +CouchConfig = case filelib:is_file(os:getenv("COUCHDB_CONFIG")) of + true -> + {ok, Result} = file:consult(os:getenv("COUCHDB_CONFIG")), + Result; + false -> + [] +end. + +AegisKeyManagerApp = case lists:keyfind(aegis_key_manager_app, 1, CouchConfig) of + {aegis_key_manager_app, AppName} when AppName /= "" -> + [list_to_atom(AppName)]; + _ -> + [] +end. + +BaseApplications = [ + kernel, + stdlib, + crypto, + couch_log, + erlfdb +]. + +Applications = AegisKeyManagerApp ++ BaseApplications. + +{application, aegis, + [ + {description, "If it's good enough for Zeus, it's good enough for CouchDB"}, + {vsn, git}, + {mod, {aegis_app, []}}, + {registered, [ + aegis_server + ]}, + {applications, Applications}, + {env,[]}, + {modules, []}, + {maintainers, []}, + {licenses, []}, + {links, []} + ] +}. diff --git a/src/aegis/src/aegis.erl b/src/aegis/src/aegis.erl new file mode 100644 index 000000000..e8a0b4bfb --- /dev/null +++ b/src/aegis/src/aegis.erl @@ -0,0 +1,72 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis). +-include_lib("fabric/include/fabric2.hrl"). + + +-define(WRAPPED_KEY, {?DB_AEGIS, 1}). + + +-export([ + init_db/2, + open_db/1, + + decrypt/2, + decrypt/3, + encrypt/3, + wrap_fold_fun/2 +]). + +init_db(#{} = Db, Options) -> + Db#{ + is_encrypted => aegis_server:init_db(Db, Options) + }. + + +open_db(#{} = Db) -> + Db#{ + is_encrypted => aegis_server:open_db(Db) + }. + + +encrypt(#{} = _Db, _Key, <<>>) -> + <<>>; + +encrypt(#{is_encrypted := false}, _Key, Value) when is_binary(Value) -> + Value; + +encrypt(#{is_encrypted := true} = Db, Key, Value) + when is_binary(Key), is_binary(Value) -> + aegis_server:encrypt(Db, Key, Value). + + +decrypt(#{} = Db, Rows) when is_list(Rows) -> + lists:map(fun({Key, Value}) -> + {Key, decrypt(Db, Key, Value)} + end, Rows). + +decrypt(#{} = _Db, _Key, <<>>) -> + <<>>; + +decrypt(#{is_encrypted := false}, _Key, Value) when is_binary(Value) -> + Value; + +decrypt(#{is_encrypted := true} = Db, Key, Value) + when is_binary(Key), is_binary(Value) -> + aegis_server:decrypt(Db, Key, Value). + + +wrap_fold_fun(Db, Fun) when is_function(Fun, 2) -> + fun({Key, Value}, Acc) -> + Fun({Key, decrypt(Db, Key, Value)}, Acc) + end. diff --git a/src/aegis/src/aegis.hrl b/src/aegis/src/aegis.hrl new file mode 100644 index 000000000..2a2a2dcde --- /dev/null +++ b/src/aegis/src/aegis.hrl @@ -0,0 +1,57 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +%% Assume old crypto api + +-define(sha256_hmac(Key, PlainText), crypto:hmac(sha256, Key, PlainText)). + +-define(aes_gcm_encrypt(Key, IV, AAD, Data), + crypto:block_encrypt(aes_gcm, Key, IV, {AAD, Data, 16})). + +-define(aes_gcm_decrypt(Key, IV, AAD, CipherText, CipherTag), + crypto:block_decrypt(aes_gcm, Key, IV, {AAD, CipherText, CipherTag})). + +-define(aes_ecb_encrypt(Key, Data), + crypto:block_encrypt(aes_ecb, Key, Data)). + +-define(aes_ecb_decrypt(Key, Data), + crypto:block_decrypt(aes_ecb, Key, Data)). + +%% Replace macros if new crypto api is available +-ifdef(OTP_RELEASE). +-if(?OTP_RELEASE >= 22). + +-undef(sha256_hmac). +-define(sha256_hmac(Key, PlainText), crypto:mac(hmac, sha256, Key, PlainText)). + +-undef(aes_gcm_encrypt). +-define(aes_gcm_encrypt(Key, IV, AAD, Data), + crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, Data, AAD, 16, true)). + +-undef(aes_gcm_decrypt). +-define(aes_gcm_decrypt(Key, IV, AAD, CipherText, CipherTag), + crypto:crypto_one_time_aead(aes_256_gcm, Key, IV, CipherText, + AAD, CipherTag, false)). + +-define(key_alg(Key), case bit_size(Key) of + 128 -> aes_128_ecb; 192 -> aes_192_ecb; 256 -> aes_256_ecb end). + +-undef(aes_ecb_encrypt). +-define(aes_ecb_encrypt(Key, Data), + crypto:crypto_one_time(?key_alg(Key), Key, Data, true)). + +-undef(aes_ecb_decrypt). +-define(aes_ecb_decrypt(Key, Data), + crypto:crypto_one_time(?key_alg(Key), Key, Data, false)). + +-endif. +-endif.
\ No newline at end of file diff --git a/src/aegis/src/aegis_app.erl b/src/aegis/src/aegis_app.erl new file mode 100644 index 000000000..4a5a11f0c --- /dev/null +++ b/src/aegis/src/aegis_app.erl @@ -0,0 +1,26 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_app). + +-behaviour(application). + + +-export([start/2, stop/1]). + + +start(_StartType, _StartArgs) -> + aegis_sup:start_link(). + + +stop(_State) -> + ok. diff --git a/src/aegis/src/aegis_key_manager.erl b/src/aegis/src/aegis_key_manager.erl new file mode 100644 index 000000000..aa9e3429a --- /dev/null +++ b/src/aegis/src/aegis_key_manager.erl @@ -0,0 +1,22 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_key_manager). + + + +-callback init_db( + Db :: #{}, + DbOptions :: list()) -> {ok, binary()} | false. + + +-callback open_db(Db :: #{}) -> {ok, binary()} | false. diff --git a/src/aegis/src/aegis_keywrap.erl b/src/aegis/src/aegis_keywrap.erl new file mode 100644 index 000000000..58c7668e8 --- /dev/null +++ b/src/aegis/src/aegis_keywrap.erl @@ -0,0 +1,97 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_keywrap). +-include("aegis.hrl"). + +%% Implementation of NIST Special Publication 800-38F +%% For wrapping and unwrapping keys with AES. + +-export([key_wrap/2, key_unwrap/2]). + +-define(ICV1, 16#A6A6A6A6A6A6A6A6). + +-spec key_wrap(WrappingKey :: binary(), KeyToWrap :: binary()) -> binary(). +key_wrap(WrappingKey, KeyToWrap) + when is_binary(WrappingKey), bit_size(KeyToWrap) rem 64 == 0 -> + N = bit_size(KeyToWrap) div 64, + wrap(WrappingKey, <<?ICV1:64>>, KeyToWrap, 1, 6 * N). + +wrap(_WrappingKey, A, R, T, End) when T > End -> + <<A/binary, R/binary>>; +wrap(WrappingKey, A, R, T, End) -> + <<R1:64, Rest/binary>> = R, + <<MSB_B:64, LSB_B:64>> = ?aes_ecb_encrypt(WrappingKey, <<A/binary, R1:64>>), + wrap(WrappingKey, <<(MSB_B bxor T):64>>, <<Rest/binary, LSB_B:64>>, T + 1, End). + + +-spec key_unwrap(WrappingKey :: binary(), KeyToUnwrap :: binary()) -> binary() | fail. +key_unwrap(WrappingKey, KeyToUnwrap) + when is_binary(WrappingKey), bit_size(KeyToUnwrap) rem 64 == 0 -> + N = (bit_size(KeyToUnwrap) div 64), + <<A:64, R/binary>> = KeyToUnwrap, + case unwrap(WrappingKey, <<A:64>>, R, 6 * (N - 1)) of + <<?ICV1:64, UnwrappedKey/binary>> -> + UnwrappedKey; + _ -> + fail + end. + +unwrap(_WrappingKey, A, R, 0) -> + <<A/binary, R/binary>>; +unwrap(WrappingKey, <<A:64>>, R, T) -> + RestSize = bit_size(R) - 64, + <<Rest:RestSize, R2: 64>> = R, + <<MSB_B:64, LSB_B:64>> = ?aes_ecb_decrypt(WrappingKey, <<(A bxor T):64, R2:64>>), + unwrap(WrappingKey, <<MSB_B:64>>, <<LSB_B:64, Rest:RestSize>>, T - 1). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +wrap_test_() -> + [ + %% 128 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F:128>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5:192>>), + %% 192 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D:192>>), + %% 256 KEK / 128 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF:128>>, + <<16#64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7:192>>), + %% 192 KEK / 192 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F1011121314151617:192>>, + <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, + <<16#031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2:256>>), + %% 256 KEK / 192 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF0001020304050607:192>>, + <<16#A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1:256>>), + %% 256 KEK / 256 DATA + test_wrap_unwrap(<<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + <<16#00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:256>>, + <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21:320>>)]. + +test_wrap_unwrap(WrappingKey, KeyToWrap, ExpectedWrappedKey) -> + [?_assertEqual(ExpectedWrappedKey, key_wrap(WrappingKey, KeyToWrap)), + ?_assertEqual(KeyToWrap, key_unwrap(WrappingKey, key_wrap(WrappingKey, KeyToWrap)))]. + +fail_test() -> + KEK = <<16#000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F:256>>, + CipherText = <<16#28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD20:320>>, + ?assertEqual(fail, key_unwrap(KEK, CipherText)). + +-endif. diff --git a/src/aegis/src/aegis_noop_key_manager.erl b/src/aegis/src/aegis_noop_key_manager.erl new file mode 100644 index 000000000..2b61f1d29 --- /dev/null +++ b/src/aegis/src/aegis_noop_key_manager.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_noop_key_manager). + + +-behaviour(aegis_key_manager). + + +-export([ + init_db/2, + open_db/1 +]). + + + +init_db(#{} = _Db, _Options) -> + false. + + +open_db(#{} = _Db) -> + false. diff --git a/src/aegis/src/aegis_server.erl b/src/aegis/src/aegis_server.erl new file mode 100644 index 000000000..15fea4c63 --- /dev/null +++ b/src/aegis/src/aegis_server.erl @@ -0,0 +1,421 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_server). + +-behaviour(gen_server). + +-vsn(1). + + +-include("aegis.hrl"). + + +%% aegis_server API +-export([ + start_link/0, + init_db/2, + open_db/1, + encrypt/3, + decrypt/3 +]). + +%% gen_server callbacks +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + + +-define(KEY_CHECK, aegis_key_check). +-define(INIT_TIMEOUT, 60000). +-define(TIMEOUT, 10000). +-define(CACHE_LIMIT, 100000). +-define(CACHE_MAX_AGE_SEC, 1800). +-define(CACHE_EXPIRATION_CHECK_SEC, 10). +-define(LAST_ACCESSED_INACTIVITY_SEC, 10). + + +-record(entry, {uuid, encryption_key, counter, last_accessed, expires_at}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +-spec init_db(Db :: #{}, Options :: list()) -> boolean(). +init_db(#{uuid := UUID} = Db, Options) -> + sensitive(fun() -> + case ?AEGIS_KEY_MANAGER:init_db(Db, Options) of + {ok, DbKey} -> + gen_server:call(?MODULE, {insert_key, UUID, DbKey}), + true; + false -> + false + end + end). + + +-spec open_db(Db :: #{}) -> boolean(). +open_db(#{} = Db) -> + sensitive(fun() -> + case do_open_db(Db) of + {ok, _DbKey} -> + true; + false -> + false + end + end). + + +-spec encrypt(Db :: #{}, Key :: binary(), Value :: binary()) -> binary(). +encrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> + #{ + uuid := UUID + } = Db, + + case is_key_fresh(UUID) of + true -> + case gen_server:call(?MODULE, {encrypt, Db, Key, Value}) of + CipherText when is_binary(CipherText) -> + CipherText; + {error, {_Tag, {_C_FileName,_LineNumber}, _Desc} = Reason} -> + couch_log:error("aegis encryption failure: ~p ", [Reason]), + erlang:error(decryption_failed); + {error, Reason} -> + erlang:error(Reason) + end; + false -> + sensitive(fun() -> + {ok, DbKey} = do_open_db(Db), + do_encrypt(DbKey, Db, Key, Value) + end) + end. + + +-spec decrypt(Db :: #{}, Key :: binary(), Value :: binary()) -> binary(). +decrypt(#{} = Db, Key, Value) when is_binary(Key), is_binary(Value) -> + #{ + uuid := UUID + } = Db, + + case is_key_fresh(UUID) of + true -> + case gen_server:call(?MODULE, {decrypt, Db, Key, Value}) of + PlainText when is_binary(PlainText) -> + PlainText; + {error, {_Tag, {_C_FileName,_LineNumber}, _Desc} = Reason} -> + couch_log:error("aegis decryption failure: ~p ", [Reason]), + erlang:error(decryption_failed); + {error, Reason} -> + erlang:error(Reason) + end; + false -> + sensitive(fun() -> + {ok, DbKey} = do_open_db(Db), + do_decrypt(DbKey, Db, Key, Value) + end) + end. + + +%% gen_server functions + +init([]) -> + process_flag(sensitive, true), + Cache = ets:new(?MODULE, [set, private, {keypos, #entry.uuid}]), + ByAccess = ets:new(?MODULE, + [ordered_set, private, {keypos, #entry.counter}]), + ets:new(?KEY_CHECK, [named_table, protected, {read_concurrency, true}]), + + erlang:send_after(0, self(), maybe_remove_expired), + + St = #{ + cache => Cache, + by_access => ByAccess, + counter => 0 + }, + {ok, St, ?INIT_TIMEOUT}. + + +terminate(_Reason, _St) -> + ok. + + +handle_call({insert_key, UUID, DbKey}, _From, #{cache := Cache} = St) -> + case ets:lookup(Cache, UUID) of + [#entry{uuid = UUID} = Entry] -> + delete(St, Entry); + [] -> + ok + end, + NewSt = insert(St, UUID, DbKey), + {reply, ok, NewSt, ?TIMEOUT}; + +handle_call({encrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> + + {ok, DbKey} = lookup(St, UUID), + + erlang:spawn(fun() -> + process_flag(sensitive, true), + try + do_encrypt(DbKey, Db, Key, Value) + of + Resp -> + gen_server:reply(From, Resp) + catch + _:Error -> + gen_server:reply(From, {error, Error}) + end + end), + + {noreply, St, ?TIMEOUT}; + +handle_call({decrypt, #{uuid := UUID} = Db, Key, Value}, From, St) -> + + {ok, DbKey} = lookup(St, UUID), + + erlang:spawn(fun() -> + process_flag(sensitive, true), + try + do_decrypt(DbKey, Db, Key, Value) + of + Resp -> + gen_server:reply(From, Resp) + catch + _:Error -> + gen_server:reply(From, {error, Error}) + end + end), + + {noreply, St, ?TIMEOUT}; + +handle_call(_Msg, _From, St) -> + {noreply, St}. + + +handle_cast({accessed, UUID}, St) -> + NewSt = bump_last_accessed(St, UUID), + {noreply, NewSt}; + + +handle_cast(_Msg, St) -> + {noreply, St}. + + +handle_info(maybe_remove_expired, St) -> + remove_expired_entries(St), + CheckInterval = erlang:convert_time_unit( + expiration_check_interval(), second, millisecond), + erlang:send_after(CheckInterval, self(), maybe_remove_expired), + {noreply, St}; + +handle_info(_Msg, St) -> + {noreply, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +%% private functions + +do_open_db(#{uuid := UUID} = Db) -> + case ?AEGIS_KEY_MANAGER:open_db(Db) of + {ok, DbKey} -> + gen_server:call(?MODULE, {insert_key, UUID, DbKey}), + {ok, DbKey}; + false -> + false + end. + + +do_encrypt(DbKey, #{uuid := UUID}, Key, Value) -> + EncryptionKey = crypto:strong_rand_bytes(32), + <<WrappedKey:320>> = aegis_keywrap:key_wrap(DbKey, EncryptionKey), + + {CipherText, <<CipherTag:128>>} = + ?aes_gcm_encrypt( + EncryptionKey, + <<0:96>>, + <<UUID/binary, 0:8, Key/binary>>, + Value), + <<1:8, WrappedKey:320, CipherTag:128, CipherText/binary>>. + + +do_decrypt(DbKey, #{uuid := UUID}, Key, Value) -> + case Value of + <<1:8, WrappedKey:320, CipherTag:128, CipherText/binary>> -> + case aegis_keywrap:key_unwrap(DbKey, <<WrappedKey:320>>) of + fail -> + erlang:error(decryption_failed); + DecryptionKey -> + Decrypted = + ?aes_gcm_decrypt( + DecryptionKey, + <<0:96>>, + <<UUID/binary, 0:8, Key/binary>>, + CipherText, + <<CipherTag:128>>), + if Decrypted /= error -> Decrypted; true -> + erlang:error(decryption_failed) + end + end; + _ -> + erlang:error(not_ciphertext) + end. + + +is_key_fresh(UUID) -> + Now = fabric2_util:now(sec), + + case ets:lookup(?KEY_CHECK, UUID) of + [{UUID, ExpiresAt}] when ExpiresAt >= Now -> true; + _ -> false + end. + + +%% cache functions + +insert(St, UUID, DbKey) -> + #{ + cache := Cache, + by_access := ByAccess, + counter := Counter + } = St, + + Now = fabric2_util:now(sec), + ExpiresAt = Now + max_age(), + + Entry = #entry{ + uuid = UUID, + encryption_key = DbKey, + counter = Counter, + last_accessed = Now, + expires_at = ExpiresAt + }, + + true = ets:insert(Cache, Entry), + true = ets:insert_new(ByAccess, Entry), + true = ets:insert(?KEY_CHECK, {UUID, ExpiresAt}), + + CacheLimit = cache_limit(), + CacheSize = ets:info(Cache, size), + + case CacheSize > CacheLimit of + true -> + LRUKey = ets:first(ByAccess), + [LRUEntry] = ets:lookup(ByAccess, LRUKey), + delete(St, LRUEntry); + false -> + ok + end, + + St#{counter := Counter + 1}. + + +lookup(#{cache := Cache}, UUID) -> + case ets:lookup(Cache, UUID) of + [#entry{uuid = UUID, encryption_key = DbKey} = Entry] -> + maybe_bump_last_accessed(Entry), + {ok, DbKey}; + [] -> + {error, not_found} + end. + + +delete(St, #entry{uuid = UUID} = Entry) -> + #{ + cache := Cache, + by_access := ByAccess + } = St, + + true = ets:delete(?KEY_CHECK, UUID), + true = ets:delete_object(Cache, Entry), + true = ets:delete_object(ByAccess, Entry). + + +maybe_bump_last_accessed(#entry{last_accessed = LastAccessed} = Entry) -> + case fabric2_util:now(sec) > LastAccessed + ?LAST_ACCESSED_INACTIVITY_SEC of + true -> + gen_server:cast(?MODULE, {accessed, Entry#entry.uuid}); + false -> + ok + end. + + +bump_last_accessed(St, UUID) -> + #{ + cache := Cache, + by_access := ByAccess, + counter := Counter + } = St, + + + [#entry{counter = OldCounter} = Entry0] = ets:lookup(Cache, UUID), + + Entry = Entry0#entry{ + last_accessed = fabric2_util:now(sec), + counter = Counter + }, + + true = ets:insert(Cache, Entry), + true = ets:insert_new(ByAccess, Entry), + + ets:delete(ByAccess, OldCounter), + + St#{counter := Counter + 1}. + + +remove_expired_entries(St) -> + #{ + cache := Cache, + by_access := ByAccess + } = St, + + MatchConditions = [{'=<', '$1', fabric2_util:now(sec)}], + + KeyCheckMatchHead = {'_', '$1'}, + KeyCheckExpired = [{KeyCheckMatchHead, MatchConditions, [true]}], + Count = ets:select_delete(?KEY_CHECK, KeyCheckExpired), + + CacheMatchHead = #entry{expires_at = '$1', _ = '_'}, + CacheExpired = [{CacheMatchHead, MatchConditions, [true]}], + Count = ets:select_delete(Cache, CacheExpired), + Count = ets:select_delete(ByAccess, CacheExpired). + + + +max_age() -> + config:get_integer("aegis", "cache_max_age_sec", ?CACHE_MAX_AGE_SEC). + + +expiration_check_interval() -> + config:get_integer( + "aegis", "cache_expiration_check_sec", ?CACHE_EXPIRATION_CHECK_SEC). + + +cache_limit() -> + config:get_integer("aegis", "cache_limit", ?CACHE_LIMIT). + + +sensitive(Fun) when is_function(Fun, 0) -> + OldValue = process_flag(sensitive, true), + try + Fun() + after + process_flag(sensitive, OldValue) + end. diff --git a/src/couch_replicator/src/couch_replicator_job_sup.erl b/src/aegis/src/aegis_sup.erl index 9ea65e85f..6d3ee83d8 100644 --- a/src/couch_replicator/src/couch_replicator_job_sup.erl +++ b/src/aegis/src/aegis_sup.erl @@ -10,25 +10,37 @@ % License for the specific language governing permissions and limitations under % the License. --module(couch_replicator_job_sup). +-module(aegis_sup). -behaviour(supervisor). +-vsn(1). + + -export([ - init/1, start_link/0 ]). +-export([ + init/1 +]). + + start_link() -> - supervisor:start_link({local,?MODULE}, ?MODULE, []). + supervisor:start_link({local, ?MODULE}, ?MODULE, []). -%%============================================================================= -%% supervisor callbacks -%%============================================================================= init([]) -> - {ok, {{one_for_one, 3, 10}, []}}. - -%%============================================================================= -%% internal functions -%%============================================================================= + Flags = #{ + strategy => one_for_one, + intensity => 5, + period => 10 + }, + Children = [ + #{ + id => aegis_server, + start => {aegis_server, start_link, []}, + shutdown => 5000 + } + ], + {ok, {Flags, Children}}. diff --git a/src/aegis/test/aegis_basic_test.erl b/src/aegis/test/aegis_basic_test.erl new file mode 100644 index 000000000..61d9737dd --- /dev/null +++ b/src/aegis/test/aegis_basic_test.erl @@ -0,0 +1,17 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_basic_test). + +-include_lib("eunit/include/eunit.hrl"). + +-define(DB, #{uuid => <<"foo">>}). diff --git a/src/aegis/test/aegis_server_test.erl b/src/aegis/test/aegis_server_test.erl new file mode 100644 index 000000000..0f96798b7 --- /dev/null +++ b/src/aegis/test/aegis_server_test.erl @@ -0,0 +1,314 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(aegis_server_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). + +-define(DB, #{uuid => <<0:64>>}). +-define(VALUE, <<0:8>>). +-define(ENCRYPTED, <<1,155,242,89,190,54,112,151,18,145,25,251,217, + 49,147,125,14,162,146,201,189,100,232,38,239,111,163,84,25,60, + 147,167,237,107,24,204,171,232,227,16,72,203,101,118,150,252, + 204,80,245,66,98,213,223,63,111,105,101,154>>). +-define(TIMEOUT, 10000). + + + +basic_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + {"init_db returns true when encryption enabled", + {timeout, ?TIMEOUT, fun test_init_db/0}}, + {"open_db returns true when encryption enabled", + {timeout, ?TIMEOUT, fun test_open_db/0}}, + {"init_db caches key", + {timeout, ?TIMEOUT, fun test_init_db_cache/0}}, + {"open_db caches key", + {timeout, ?TIMEOUT, fun test_open_db_cache/0}}, + {"encrypt fetches and caches key when it's missing", + {timeout, ?TIMEOUT, fun test_encrypt_cache/0}}, + {"decrypt fetches and caches key when it's missing", + {timeout, ?TIMEOUT, fun test_decrypt_cache/0}} + ] + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + meck:new([?AEGIS_KEY_MANAGER], [passthrough]), + ok = meck:expect(?AEGIS_KEY_MANAGER, init_db, 2, {ok, <<0:256>>}), + ok = meck:expect(?AEGIS_KEY_MANAGER, open_db, 1, {ok, <<0:256>>}), + Ctx. + + +teardown(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +test_init_db() -> + ?assert(aegis_server:init_db(?DB, [])), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)). + + +test_open_db() -> + ?assert(aegis_server:open_db(?DB)), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_init_db_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)), + + ?assert(aegis_server:init_db(?DB, [])), + + lists:foreach(fun(I) -> + Encrypted = aegis_server:encrypt(?DB, <<I:64>>, ?VALUE), + ?assertNotEqual(?VALUE, Encrypted), + ?assertMatch(<<1:8, _/binary>>, Encrypted) + end, lists:seq(1, 12)), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)). + + +test_open_db_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + ?assert(aegis_server:open_db(?DB)), + + lists:foreach(fun(I) -> + Encrypted = aegis_server:encrypt(?DB, <<I:64>>, ?VALUE), + ?assertNotEqual(?VALUE, Encrypted), + ?assertMatch(<<1:8, _/binary>>, Encrypted) + end, lists:seq(1, 12)), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_encrypt_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + Encrypted = aegis_server:encrypt(?DB, <<1:64>>, ?VALUE), + ?assertNotEqual(?VALUE, Encrypted), + ?assertMatch(<<1:8, _/binary>>, Encrypted), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_decrypt_cache() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + Decrypted = aegis_server:decrypt(?DB, <<1:64>>, ?ENCRYPTED), + ?assertEqual(<<0>>, Decrypted), + + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + + +disabled_test_() -> + { + foreach, + fun() -> + Ctx = setup(), + ok = meck:delete(?AEGIS_KEY_MANAGER, init_db, 2), + ok = meck:expect(?AEGIS_KEY_MANAGER, init_db, 2, false), + ok = meck:delete(?AEGIS_KEY_MANAGER, open_db, 1), + ok = meck:expect(?AEGIS_KEY_MANAGER, open_db, 1, false), + Ctx + end, + fun teardown/1, + [ + {"init_db returns false when encryptions disabled", + {timeout, ?TIMEOUT, fun test_disabled_init_db/0}}, + {"open_db returns false when encryptions disabled", + {timeout, ?TIMEOUT, fun test_disabled_open_db/0}}, + {"pass through on encrypt when encryption disabled", + {timeout, ?TIMEOUT, fun test_disabled_encrypt/0}}, + {"pass through on decrypt when encryption disabled", + {timeout, ?TIMEOUT, fun test_disabled_decrypt/0}} + ] + }. + + +test_disabled_init_db() -> + ?assertNot(aegis_server:init_db(?DB, [])), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, init_db, 2)). + + +test_disabled_open_db() -> + ?assertNot(aegis_server:open_db(?DB)), + ?assertEqual(1, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_disabled_encrypt() -> + Db = ?DB#{is_encrypted => aegis_server:open_db(?DB)}, + Encrypted = aegis:encrypt(Db, <<1:64>>, ?VALUE), + ?assertEqual(?VALUE, Encrypted). + + +test_disabled_decrypt() -> + Db = ?DB#{is_encrypted => aegis_server:open_db(?DB)}, + Decrypted = aegis:decrypt(Db, <<1:64>>, ?ENCRYPTED), + ?assertEqual(?ENCRYPTED, Decrypted). + + + +lru_cache_with_expiration_test_() -> + { + foreach, + fun() -> + %% this has to be be set before start of aegis server + %% for config param "cache_expiration_check_sec" to be picked up + meck:new([config, aegis_server, fabric2_util], [passthrough]), + ok = meck:expect(config, get_integer, fun + ("aegis", "cache_limit", _) -> 5; + ("aegis", "cache_max_age_sec", _) -> 130; + ("aegis", "cache_expiration_check_sec", _) -> 1; + (_, _, Default) -> Default + end), + Ctx = setup(), + ok = meck:expect(fabric2_util, now, fun(sec) -> + get(time) == undefined andalso put(time, 10), + Now = get(time), + put(time, Now + 10), + Now + end), + Ctx + end, + fun teardown/1, + [ + {"counter moves forward on access bump", + {timeout, ?TIMEOUT, fun test_advance_counter/0}}, + {"oldest entries evicted", + {timeout, ?TIMEOUT, fun test_evict_old_entries/0}}, + {"access bump preserves entries", + {timeout, ?TIMEOUT, fun test_bump_accessed/0}}, + {"expired entries removed", + {timeout, ?TIMEOUT, fun test_remove_expired/0}} + ] + }. + + +test_advance_counter() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + ok = meck:expect(aegis_server, handle_cast, fun({accessed, _} = Msg, St) -> + #{counter := Counter} = St, + get(counter) == undefined andalso put(counter, 0), + OldCounter = get(counter), + put(counter, Counter), + ?assert(Counter > OldCounter), + meck:passthrough([Msg, St]) + end), + + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<I:64>>, ?VALUE), + aegis_server:encrypt(Db, <<(I+1):64>>, ?VALUE) + end, lists:seq(1, 10)), + + ?assertEqual(10, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_evict_old_entries() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% overflow cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<I:64>>, ?VALUE) + end, lists:seq(1, 10)), + + ?assertEqual(10, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that newest keys are still in cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<(I+1):64>>, ?VALUE) + end, lists:seq(6, 10)), + + ?assertEqual(10, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that oldest keys been eviced and needed re-fetch + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<(I+1):64>>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(15, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_bump_accessed() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% fill the cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<I:64>>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(5, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% bump oldest key and then insert a new key to trigger eviction + aegis_server:encrypt(?DB#{uuid => <<1:64>>}, <<1:64>>, ?VALUE), + aegis_server:encrypt(?DB#{uuid => <<6:64>>}, <<6:64>>, ?VALUE), + ?assertEqual(6, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that former oldest key is still in cache + aegis_server:encrypt(?DB#{uuid => <<1:64>>}, <<2:64>>, ?VALUE), + ?assertEqual(6, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm that the second oldest key been evicted by the new insert + aegis_server:encrypt(?DB#{uuid => <<2:64>>}, <<3:64>>, ?VALUE), + ?assertEqual(7, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). + + +test_remove_expired() -> + ?assertEqual(0, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% to detect when maybe_remove_expired called + ok = meck:expect(aegis_server, handle_info,fun + (maybe_remove_expired, St) -> + meck:passthrough([maybe_remove_expired, St]) + end), + + %% fill the cache. first key expires a 140, last at 180 of "our" time + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<I:64>>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(5, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% confirm enties are still in cache and wind up our "clock" to 160 + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<I:64>>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(5, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)), + + %% wait for remove_expired_entries to be triggered + meck:reset(aegis_server), + meck:wait(aegis_server, handle_info, [maybe_remove_expired, '_'], 2500), + + %% 3 "oldest" entries should be removed, 2 yet to expire still in cache + lists:foreach(fun(I) -> + Db = ?DB#{uuid => <<I:64>>}, + aegis_server:encrypt(Db, <<I:64>>, ?VALUE) + end, lists:seq(1, 5)), + + ?assertEqual(8, meck:num_calls(?AEGIS_KEY_MANAGER, open_db, 1)). diff --git a/src/chttpd/src/chttpd.app.src b/src/chttpd/src/chttpd.app.src index 3526745df..af330e0df 100644 --- a/src/chttpd/src/chttpd.app.src +++ b/src/chttpd/src/chttpd.app.src @@ -26,6 +26,7 @@ couch_stats, config, couch, + ctrace, ets_lru, fabric ]}, diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index adde0730f..1a9b19bb1 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -25,7 +25,7 @@ error_info/1, parse_form/1, json_body/1, json_body_obj/1, body/1, doc_etag/1, make_etag/1, etag_respond/3, etag_match/2, partition/1, serve_file/3, serve_file/4, - server_header/0, start_chunked_response/3,send_chunk/2, + server_header/0, start_chunked_response/3,send_chunk/2,last_chunk/1, start_response_length/4, send/2, start_json_response/2, start_json_response/3, end_json_response/1, send_response/4, send_response_no_cors/4, @@ -52,8 +52,9 @@ req, code, headers, - first_chunk, - resp=nil + chunks, + resp=nil, + buffer_response=false }). start_link() -> @@ -238,6 +239,8 @@ handle_request_int(MochiReq) -> erlang:put(dont_log_request, true), erlang:put(dont_log_response, true), + maybe_trace_fdb(MochiReq:get_header_value("x-couchdb-fdb-trace")), + {HttpReq2, Response} = case before_request(HttpReq0) of {ok, HttpReq1} -> process_request(HttpReq1); @@ -257,6 +260,7 @@ handle_request_int(MochiReq) -> case after_request(HttpReq2, HttpResp) of #httpd_resp{status = ok, response = Resp} -> + span_ok(HttpResp), {ok, Resp}; #httpd_resp{status = aborted, reason = Reason} -> couch_log:error("Response abnormally terminated: ~p", [Reason]), @@ -264,9 +268,11 @@ handle_request_int(MochiReq) -> end. before_request(HttpReq) -> + ctrace:is_enabled() andalso start_span(HttpReq), try - chttpd_stats:init(), - chttpd_plugin:before_request(HttpReq) + {ok, HttpReq1} = chttpd_plugin:before_request(HttpReq), + chttpd_stats:init(HttpReq1), + {ok, HttpReq1} catch Tag:Error -> {error, catch_error(HttpReq, Tag, Error)} end. @@ -281,7 +287,7 @@ after_request(HttpReq, HttpResp0) -> {ok, HttpResp0#httpd_resp{status = aborted}} end, HttpResp2 = update_stats(HttpReq, HttpResp1), - chttpd_stats:report(HttpReq, HttpResp2), + chttpd_stats:report(HttpResp2), maybe_log(HttpReq, HttpResp2), HttpResp2. @@ -314,6 +320,8 @@ process_request(#httpd{mochi_req = MochiReq} = HttpReq) -> end. handle_req_after_auth(HandlerKey, HttpReq) -> + #httpd{user_ctx = #user_ctx{name = User}} = HttpReq, + ctrace:tag(#{user => User}), try HandlerFun = chttpd_handlers:url_handler(HandlerKey, fun chttpd_db:handle_request/1), @@ -350,6 +358,10 @@ catch_error(HttpReq, throw, Error) -> send_error(HttpReq, Error); catch_error(HttpReq, error, database_does_not_exist) -> send_error(HttpReq, database_does_not_exist); +catch_error(HttpReq, error, decryption_failed) -> + send_error(HttpReq, decryption_failed); +catch_error(HttpReq, error, not_ciphertext) -> + send_error(HttpReq, not_ciphertext); catch_error(HttpReq, Tag, Error) -> Stack = erlang:get_stacktrace(), % TODO improve logging and metrics collection for client disconnects @@ -412,8 +424,7 @@ possibly_hack(#httpd{path_parts=[<<"_replicate">>]}=Req) -> {Props0} = chttpd:json_body_obj(Req), Props1 = fix_uri(Req, Props0, <<"source">>), Props2 = fix_uri(Req, Props1, <<"target">>), - put(post_body, {Props2}), - Req; + Req#httpd{req_body={Props2}}; possibly_hack(Req) -> Req. @@ -666,13 +677,16 @@ body(#httpd{mochi_req=MochiReq, req_body=ReqBody}) -> validate_ctype(Req, Ctype) -> couch_httpd:validate_ctype(Req, Ctype). -json_body(Httpd) -> +json_body(#httpd{req_body=undefined} = Httpd) -> case body(Httpd) of undefined -> throw({bad_request, "Missing request body"}); Body -> ?JSON_DECODE(maybe_decompress(Httpd, Body)) - end. + end; + +json_body(#httpd{req_body=ReqBody}) -> + ReqBody. json_body_obj(Httpd) -> case json_body(Httpd) of @@ -745,7 +759,14 @@ start_chunked_response(#httpd{mochi_req=MochiReq}=Req, Code, Headers0) -> send_chunk({remote, _Pid, _Ref} = Resp, Data) -> couch_httpd:send_chunk(Resp, Data); send_chunk(Resp, Data) -> - Resp:write_chunk(Data), + case iolist_size(Data) of + 0 -> ok; % do nothing + _ -> Resp:write_chunk(Data) + end, + {ok, Resp}. + +last_chunk(Resp) -> + Resp:write_chunk([]), {ok, Resp}. send_response(Req, Code, Headers0, Body) -> @@ -780,40 +801,54 @@ start_json_response(Req, Code, Headers0) -> end_json_response(Resp) -> couch_httpd:end_json_response(Resp). + start_delayed_json_response(Req, Code) -> start_delayed_json_response(Req, Code, []). + start_delayed_json_response(Req, Code, Headers) -> start_delayed_json_response(Req, Code, Headers, ""). + start_delayed_json_response(Req, Code, Headers, FirstChunk) -> {ok, #delayed_resp{ start_fun = fun start_json_response/3, req = Req, code = Code, headers = Headers, - first_chunk = FirstChunk}}. + chunks = [FirstChunk], + buffer_response = buffer_response(Req)}}. + start_delayed_chunked_response(Req, Code, Headers) -> start_delayed_chunked_response(Req, Code, Headers, ""). + start_delayed_chunked_response(Req, Code, Headers, FirstChunk) -> {ok, #delayed_resp{ start_fun = fun start_chunked_response/3, req = Req, code = Code, headers = Headers, - first_chunk = FirstChunk}}. + chunks = [FirstChunk], + buffer_response = buffer_response(Req)}}. + -send_delayed_chunk(#delayed_resp{}=DelayedResp, Chunk) -> +send_delayed_chunk(#delayed_resp{buffer_response=false}=DelayedResp, Chunk) -> {ok, #delayed_resp{resp=Resp}=DelayedResp1} = start_delayed_response(DelayedResp), {ok, Resp} = send_chunk(Resp, Chunk), - {ok, DelayedResp1}. + {ok, DelayedResp1}; + +send_delayed_chunk(#delayed_resp{buffer_response=true}=DelayedResp, Chunk) -> + #delayed_resp{chunks = Chunks} = DelayedResp, + {ok, DelayedResp#delayed_resp{chunks = [Chunk | Chunks]}}. + send_delayed_last_chunk(Req) -> send_delayed_chunk(Req, []). + send_delayed_error(#delayed_resp{req=Req,resp=nil}=DelayedResp, Reason) -> {Code, ErrorStr, ReasonStr} = error_info(Reason), {ok, Resp} = send_error(Req, Code, ErrorStr, ReasonStr), @@ -823,6 +858,7 @@ send_delayed_error(#delayed_resp{resp=Resp, req=Req}, Reason) -> log_error_with_stack_trace(Reason), throw({http_abort, Resp, Reason}). + close_delayed_json_object(Resp, Buffer, Terminator, 0) -> % Use a separate chunk to close the streamed array to maintain strict % compatibility with earlier versions. See COUCHDB-2724 @@ -831,11 +867,28 @@ close_delayed_json_object(Resp, Buffer, Terminator, 0) -> close_delayed_json_object(Resp, Buffer, Terminator, _Threshold) -> send_delayed_chunk(Resp, [Buffer | Terminator]). -end_delayed_json_response(#delayed_resp{}=DelayedResp) -> + +end_delayed_json_response(#delayed_resp{buffer_response=false}=DelayedResp) -> {ok, #delayed_resp{resp=Resp}} = start_delayed_response(DelayedResp), + end_json_response(Resp); + +end_delayed_json_response(#delayed_resp{buffer_response=true}=DelayedResp) -> + #delayed_resp{ + start_fun = StartFun, + req = Req, + code = Code, + headers = Headers, + chunks = Chunks + } = DelayedResp, + {ok, Resp} = StartFun(Req, Code, Headers), + lists:foreach(fun + ([]) -> ok; + (Chunk) -> send_chunk(Resp, Chunk) + end, lists:reverse(Chunks)), end_json_response(Resp). + get_delayed_req(#delayed_resp{req=#httpd{mochi_req=MochiReq}}) -> MochiReq; get_delayed_req(Resp) -> @@ -847,7 +900,7 @@ start_delayed_response(#delayed_resp{resp=nil}=DelayedResp) -> req=Req, code=Code, headers=Headers, - first_chunk=FirstChunk + chunks=[FirstChunk] }=DelayedResp, {ok, Resp} = StartFun(Req, Code, Headers), case FirstChunk of @@ -858,6 +911,18 @@ start_delayed_response(#delayed_resp{resp=nil}=DelayedResp) -> start_delayed_response(#delayed_resp{}=DelayedResp) -> {ok, DelayedResp}. + +buffer_response(Req) -> + case chttpd:qs_value(Req, "buffer_response") of + "false" -> + false; + "true" -> + true; + _ -> + config:get_boolean("chttpd", "buffer_response", false) + end. + + error_info({Error, Reason}) when is_list(Reason) -> error_info({Error, couch_util:to_binary(Reason)}); error_info(bad_request) -> @@ -930,12 +995,18 @@ error_info({error, {database_name_too_long, DbName}}) -> <<"At least one path segment of `", DbName/binary, "` is too long.">>}; error_info({doc_validation, Reason}) -> {400, <<"doc_validation">>, Reason}; +error_info({invalid_since_seq, Reason}) -> + {400, <<"invalid_since_seq">>, Reason}; error_info({missing_stub, Reason}) -> {412, <<"missing_stub">>, Reason}; error_info(request_entity_too_large) -> {413, <<"too_large">>, <<"the request entity is too large">>}; error_info({request_entity_too_large, {attachment, AttName}}) -> {413, <<"attachment_too_large">>, AttName}; +error_info({request_entity_too_large, {bulk_docs, Max}}) when is_integer(Max) -> + {413, <<"max_bulk_docs_count_exceeded">>, integer_to_binary(Max)}; +error_info({request_entity_too_large, {bulk_get, Max}}) when is_integer(Max) -> + {413, <<"max_bulk_get_count_exceeded">>, integer_to_binary(Max)}; error_info({request_entity_too_large, DocID}) -> {413, <<"document_too_large">>, DocID}; error_info({error, security_migration_updates_disabled}) -> @@ -949,6 +1020,10 @@ error_info(not_implemented) -> error_info(timeout) -> {500, <<"timeout">>, <<"The request could not be processed in a reasonable" " amount of time.">>}; +error_info(decryption_failed) -> + {500, <<"decryption_failed">>, <<"Decryption failed">>}; +error_info(not_ciphertext) -> + {500, <<"not_ciphertext">>, <<"Not Ciphertext">>}; error_info({service_unavailable, Reason}) -> {503, <<"service unavailable">>, Reason}; error_info({timeout, _Reason}) -> @@ -970,6 +1045,8 @@ maybe_handle_error(Error) -> Result; {Err, Reason} -> {500, couch_util:to_binary(Err), couch_util:to_binary(Reason)}; + normal -> + exit(normal); Error -> {500, <<"unknown_error">>, couch_util:to_binary(Error)} end. @@ -1043,16 +1120,20 @@ send_error(#httpd{} = Req, Code, ErrorStr, ReasonStr) -> send_error(Req, Code, [], ErrorStr, ReasonStr, []). send_error(Req, Code, Headers, ErrorStr, ReasonStr, []) -> - send_json(Req, Code, Headers, + Return = send_json(Req, Code, Headers, {[{<<"error">>, ErrorStr}, - {<<"reason">>, ReasonStr}]}); + {<<"reason">>, ReasonStr}]}), + span_error(Code, ErrorStr, ReasonStr, []), + Return; send_error(Req, Code, Headers, ErrorStr, ReasonStr, Stack) -> log_error_with_stack_trace({ErrorStr, ReasonStr, Stack}), - send_json(Req, Code, [stack_trace_id(Stack) | Headers], + Return = send_json(Req, Code, [stack_trace_id(Stack) | Headers], {[{<<"error">>, ErrorStr}, {<<"reason">>, ReasonStr} | case Stack of [] -> []; _ -> [{<<"ref">>, stack_hash(Stack)}] end - ]}). + ]}), + span_error(Code, ErrorStr, ReasonStr, Stack), + Return. update_timeout_stats(<<"timeout">>, #httpd{requested_path_parts = PathParts}) -> update_timeout_stats(PathParts); @@ -1206,6 +1287,126 @@ get_user(#httpd{user_ctx = #user_ctx{name = User}}) -> get_user(#httpd{user_ctx = undefined}) -> "undefined". +maybe_trace_fdb("true") -> + % Remember to also enable tracing in erlfdb application environment: + % network_options = [{trace_enable, ...}] + % Or via the OS environment variable: + % FDB_NETWORK_OPTION_TRACE_ENABLE = "" + case config:get_boolean("fabric", "fdb_trace", false) of + true -> + Nonce = erlang:get(nonce), + erlang:put(erlfdb_trace, list_to_binary(Nonce)); + false -> + ok + end; +maybe_trace_fdb(_) -> + ok. + +start_span(Req) -> + #httpd{ + mochi_req = MochiReq, + begin_ts = Begin, + peer = Peer, + nonce = Nonce, + method = Method, + path_parts = PathParts + } = Req, + {OperationName, ExtraTags} = get_action(Req), + Path = case PathParts of + [] -> <<"">>; + [_ | _] -> filename:join(PathParts) + end, + {IsExternalSpan, RootOptions} = root_span_options(MochiReq), + Tags = maps:merge(#{ + peer => Peer, + 'http.method' => Method, + nonce => Nonce, + 'http.url' => MochiReq:get(raw_path), + path_parts => Path, + 'span.kind' => <<"server">>, + component => <<"couchdb.chttpd">>, + external => IsExternalSpan + }, ExtraTags), + + ctrace:start_span(OperationName, [ + {tags, Tags}, + {time, Begin} + ] ++ RootOptions). + +root_span_options(MochiReq) -> + case get_trace_headers(MochiReq) of + [undefined, _, _] -> + {false, []}; + [TraceId, SpanId, ParentSpanId] -> + Span = ctrace:external_span(TraceId, SpanId, ParentSpanId), + {true, [{root, Span}]} + end. + +parse_trace_id(undefined) -> + undefined; +parse_trace_id(Hex) -> + to_int(Hex, 32). + +parse_span_id(undefined) -> + undefined; +parse_span_id(Hex) -> + to_int(Hex, 16). + +to_int(Hex, N) when length(Hex) =:= N -> + try + list_to_integer(Hex, 16) + catch error:badarg -> + undefined + end. + +get_trace_headers(MochiReq) -> + case MochiReq:get_header_value("b3") of + undefined -> + [ + parse_trace_id(MochiReq:get_header_value("X-B3-TraceId")), + parse_span_id(MochiReq:get_header_value("X-B3-SpanId")), + parse_span_id(MochiReq:get_header_value("X-B3-ParentSpanId")) + ]; + Value -> + case string:split(Value, "-", all) of + [TraceIdStr, SpanIdStr, _SampledStr, ParentSpanIdStr] -> + [ + parse_trace_id(TraceIdStr), + parse_span_id(SpanIdStr), + parse_span_id(ParentSpanIdStr) + ]; + _ -> + [undefined, undefined, undefined] + end + end. + +get_action(#httpd{} = Req) -> + try + chttpd_handlers:handler_info(Req) + catch Tag:Error -> + couch_log:error("Cannot set tracing action ~p:~p", [Tag, Error]), + {undefind, #{}} + end. + +span_ok(#httpd_resp{code = Code}) -> + ctrace:tag(#{ + error => false, + 'http.status_code' => Code + }), + ctrace:finish_span(). + +span_error(Code, ErrorStr, ReasonStr, Stack) -> + ctrace:tag(#{ + error => true, + 'http.status_code' => Code + }), + ctrace:log(#{ + 'error.kind' => ErrorStr, + message => ReasonStr, + stack => Stack + }), + ctrace:finish_span(). + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/chttpd/src/chttpd_app.erl b/src/chttpd/src/chttpd_app.erl index d7a5aef86..770b78ef9 100644 --- a/src/chttpd/src/chttpd_app.erl +++ b/src/chttpd/src/chttpd_app.erl @@ -14,8 +14,8 @@ -behaviour(application). -export([start/2, stop/1]). -start(_Type, StartArgs) -> - chttpd_sup:start_link(StartArgs). +start(_Type, _StartArgs) -> + chttpd_sup:start_link(). stop(_State) -> ok. diff --git a/src/chttpd/src/chttpd_auth.erl b/src/chttpd/src/chttpd_auth.erl index 607f09a8a..ffae78171 100644 --- a/src/chttpd/src/chttpd_auth.erl +++ b/src/chttpd/src/chttpd_auth.erl @@ -18,6 +18,7 @@ -export([default_authentication_handler/1]). -export([cookie_authentication_handler/1]). -export([proxy_authentication_handler/1]). +-export([jwt_authentication_handler/1]). -export([party_mode_handler/1]). -export([handle_session_req/1]). @@ -51,22 +52,30 @@ cookie_authentication_handler(Req) -> proxy_authentication_handler(Req) -> couch_httpd_auth:proxy_authentication_handler(Req). +jwt_authentication_handler(Req) -> + couch_httpd_auth:jwt_authentication_handler(Req). + party_mode_handler(#httpd{method='POST', path_parts=[<<"_session">>]} = Req) -> % See #1947 - users should always be able to attempt a login Req#httpd{user_ctx=#user_ctx{}}; +party_mode_handler(#httpd{path_parts=[<<"_up">>]} = Req) -> + RequireValidUser = config:get_boolean("chttpd", "require_valid_user", false), + RequireValidUserExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", false), + require_valid_user(Req, RequireValidUser andalso not RequireValidUserExceptUp); + party_mode_handler(Req) -> RequireValidUser = config:get_boolean("chttpd", "require_valid_user", false), - ExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", true), - case RequireValidUser andalso not ExceptUp of - true -> - throw({unauthorized, <<"Authentication required.">>}); - false -> - case config:get("admins") of + RequireValidUserExceptUp = config:get_boolean("chttpd", "require_valid_user_except_for_up", false), + require_valid_user(Req, RequireValidUser orelse RequireValidUserExceptUp). + +require_valid_user(_Req, true) -> + throw({unauthorized, <<"Authentication required.">>}); +require_valid_user(Req, false) -> + case config:get("admins") of [] -> Req#httpd{user_ctx = ?ADMIN_USER}; _ -> Req#httpd{user_ctx=#user_ctx{}} - end end. handle_session_req(Req) -> diff --git a/src/chttpd/src/chttpd_auth_cache.erl b/src/chttpd/src/chttpd_auth_cache.erl index fdae27b79..c5a56bddb 100644 --- a/src/chttpd/src/chttpd_auth_cache.erl +++ b/src/chttpd/src/chttpd_auth_cache.erl @@ -12,16 +12,19 @@ -module(chttpd_auth_cache). -behaviour(gen_server). +-behaviour(config_listener). -export([start_link/0, get_user_creds/2, update_user_creds/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -export([listen_for_changes/1, changes_callback/2]). +-export([handle_config_change/5, handle_config_terminate/3]). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch/include/couch_js_functions.hrl"). -define(CACHE, chttpd_auth_cache_lru). +-define(RELISTEN_DELAY, 5000). -record(state, { changes_pid, @@ -52,7 +55,8 @@ get_user_creds(_Req, UserName) when is_binary(UserName) -> update_user_creds(_Req, UserDoc, _Ctx) -> {_, Ref} = spawn_monitor(fun() -> - case fabric:update_doc(dbname(), UserDoc, []) of + {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), + case fabric2_db:update_doc(Db, UserDoc) of {ok, _} -> exit(ok); Else -> @@ -100,9 +104,28 @@ maybe_increment_auth_cache_miss(UserName) -> %% gen_server callbacks init([]) -> + ensure_auth_db(), + ok = config:listen_for_changes(?MODULE, nil), self() ! {start_listener, 0}, {ok, #state{}}. +handle_call(reinit_cache, _From, State) -> + #state{ + changes_pid = Pid + } = State, + + % The database may currently be cached. This + % ensures that we've removed it so that the + % system db callbacks are installed. + fabric2_server:remove(dbname()), + + ensure_auth_db(), + ets_lru:clear(?CACHE), + exit(Pid, shutdown), + self() ! {start_listener, 0}, + + {reply, ok, State#state{changes_pid = undefined}}; + handle_call(_Call, _From, State) -> {noreply, State}. @@ -124,6 +147,9 @@ handle_info({'DOWN', _, _, Pid, Reason}, #state{changes_pid=Pid} = State) -> {noreply, State#state{last_seq=Seq}}; handle_info({start_listener, Seq}, State) -> {noreply, State#state{changes_pid = spawn_changes(Seq)}}; +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; handle_info(_Msg, State) -> {noreply, State}. @@ -142,7 +168,8 @@ spawn_changes(Since) -> Pid. listen_for_changes(Since) -> - ensure_auth_ddoc_exists(dbname(), <<"_design/_auth">>), + {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), + ensure_auth_ddoc_exists(Db, <<"_design/_auth">>), CBFun = fun ?MODULE:changes_callback/2, Args = #changes_args{ feed = "continuous", @@ -150,7 +177,8 @@ listen_for_changes(Since) -> heartbeat = true, filter = {default, main_only} }, - fabric:changes(dbname(), CBFun, Since, Args). + ChangesFun = chttpd_changes:handle_db_changes(Args, nil, Db), + ChangesFun({CBFun, Since}). changes_callback(waiting_for_updates, Acc) -> {ok, Acc}; @@ -159,7 +187,7 @@ changes_callback(start, Since) -> changes_callback({stop, EndSeq, _Pending}, _) -> exit({seq, EndSeq}); changes_callback({change, {Change}}, _) -> - case couch_util:get_value(id, Change) of + case couch_util:get_value(<<"id">>, Change) of <<"_design/", _/binary>> -> ok; DocId -> @@ -168,13 +196,27 @@ changes_callback({change, {Change}}, _) -> ets_lru:remove(?CACHE, UserName) end, {ok, couch_util:get_value(seq, Change)}; -changes_callback(timeout, Acc) -> +changes_callback({timeout, _ResponseType}, Acc) -> {ok, Acc}; changes_callback({error, _}, EndSeq) -> exit({seq, EndSeq}). + +handle_config_change("chttpd_auth", "authentication_db", _DbName, _, _) -> + {ok, gen_server:call(?MODULE, reinit_cache, infinity)}; +handle_config_change(_, _, _, _, _) -> + {ok, nil}. + +handle_config_terminate(_, stop, _) -> + ok; +handle_config_terminate(_Server, _Reason, _State) -> + Dst = whereis(?MODULE), + erlang:send_after(?RELISTEN_DELAY, Dst, restart_config_listener). + + load_user_from_db(UserName) -> - try fabric:open_doc(dbname(), docid(UserName), [?ADMIN_CTX, ejson_body, conflicts]) of + {ok, Db} = fabric2_db:open(dbname(), [?ADMIN_CTX]), + try fabric2_db:open_doc(Db, docid(UserName), [conflicts]) of {ok, Doc} -> {Props} = couch_doc:to_json_obj(Doc, []), Props; @@ -185,8 +227,21 @@ load_user_from_db(UserName) -> nil end. + +ensure_auth_db() -> + try + fabric2_db:open(dbname(), [?ADMIN_CTX]) + catch error:database_does_not_exist -> + case fabric2_db:create(dbname(), [?ADMIN_CTX]) of + {ok, _} -> ok; + {error, file_exists} -> ok + end + end. + + dbname() -> - config:get("chttpd_auth", "authentication_db", "_users"). + DbNameStr = config:get("chttpd_auth", "authentication_db", "_users"), + iolist_to_binary(DbNameStr). docid(UserName) -> <<"org.couchdb.user:", UserName/binary>>. @@ -194,11 +249,11 @@ docid(UserName) -> username(<<"org.couchdb.user:", UserName/binary>>) -> UserName. -ensure_auth_ddoc_exists(DbName, DDocId) -> - case fabric:open_doc(DbName, DDocId, [?ADMIN_CTX, ejson_body]) of +ensure_auth_ddoc_exists(Db, DDocId) -> + case fabric2_db:open_doc(Db, DDocId) of {not_found, _Reason} -> {ok, AuthDesign} = couch_auth_cache:auth_design_doc(DDocId), - update_doc_ignoring_conflict(DbName, AuthDesign, [?ADMIN_CTX]); + update_doc_ignoring_conflict(Db, AuthDesign); {ok, Doc} -> {Props} = couch_doc:to_json_obj(Doc, []), case couch_util:get_value(<<"validate_doc_update">>, Props, []) of @@ -208,19 +263,20 @@ ensure_auth_ddoc_exists(DbName, DDocId) -> Props1 = lists:keyreplace(<<"validate_doc_update">>, 1, Props, {<<"validate_doc_update">>, ?AUTH_DB_DOC_VALIDATE_FUNCTION}), - update_doc_ignoring_conflict(DbName, couch_doc:from_json_obj({Props1}), [?ADMIN_CTX]) + NewDoc = couch_doc:from_json_obj({Props1}), + update_doc_ignoring_conflict(Db, NewDoc) end; {error, Reason} -> - couch_log:notice("Failed to ensure auth ddoc ~s/~s exists for reason: ~p", [DbName, DDocId, Reason]), + couch_log:notice("Failed to ensure auth ddoc ~s/~s exists for reason: ~p", [dbname(), DDocId, Reason]), ok end, ok. -update_doc_ignoring_conflict(DbName, Doc, Options) -> +update_doc_ignoring_conflict(DbName, Doc) -> try - fabric:update_doc(DbName, Doc, Options) + fabric2_db:update_doc(DbName, Doc) catch - throw:conflict -> + error:conflict -> ok end. diff --git a/src/chttpd/src/chttpd_auth_request.erl b/src/chttpd/src/chttpd_auth_request.erl index fa47f5bfa..3f6f97602 100644 --- a/src/chttpd/src/chttpd_auth_request.erl +++ b/src/chttpd/src/chttpd_auth_request.erl @@ -34,7 +34,7 @@ authorize_request_int(#httpd{path_parts=[]}=Req) -> authorize_request_int(#httpd{path_parts=[<<"favicon.ico">>|_]}=Req) -> Req; authorize_request_int(#httpd{path_parts=[<<"_all_dbs">>|_]}=Req) -> - case config:get_boolean("chttpd", "admin_only_all_dbs", false) of + case config:get_boolean("chttpd", "admin_only_all_dbs", true) of true -> require_admin(Req); false -> Req end; @@ -106,8 +106,8 @@ server_authorization_check(#httpd{path_parts=[<<"_node">>,_ , <<"_system">>|_]}= server_authorization_check(#httpd{path_parts=[<<"_", _/binary>>|_]}=Req) -> require_admin(Req). -db_authorization_check(#httpd{path_parts=[DbName|_],user_ctx=Ctx}=Req) -> - {_} = fabric:get_security(DbName, [{user_ctx, Ctx}]), +db_authorization_check(#httpd{path_parts=[_DbName|_]}=Req) -> + % Db authorization checks are performed in fabric before every FDB operation Req. @@ -125,8 +125,8 @@ require_admin(Req) -> Req. require_db_admin(#httpd{path_parts=[DbName|_],user_ctx=Ctx}=Req) -> - Sec = fabric:get_security(DbName, [{user_ctx, Ctx}]), - + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, Ctx}]), + Sec = fabric2_db:get_security(Db), case is_db_admin(Ctx,Sec) of true -> Req; false -> throw({unauthorized, <<"You are not a server or db admin.">>}) diff --git a/src/chttpd/src/chttpd_changes.erl b/src/chttpd/src/chttpd_changes.erl new file mode 100644 index 000000000..45c7d57b9 --- /dev/null +++ b/src/chttpd/src/chttpd_changes.erl @@ -0,0 +1,760 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_changes). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +-export([ + handle_db_changes/3, + get_changes_timeout/2, + wait_updated/3, + get_rest_updated/1, + configure_filter/4, + filter/3, + handle_db_event/3, + handle_view_event/3, + send_changes_doc_ids/6, + send_changes_design_docs/6 +]). + +-export([changes_enumerator/2]). + +%% export so we can use fully qualified call to facilitate hot-code upgrade +-export([ + keep_sending_changes/3 +]). + +-record(changes_acc, { + db, + seq, + prepend, + filter, + callback, + user_acc, + resp_type, + limit, + include_docs, + doc_options, + conflicts, + timeout, + timeout_fun, + aggregation_kvs, + aggregation_results +}). + +handle_db_changes(Args0, Req, Db0) -> + #changes_args{ + style = Style, + filter = FilterName, + feed = Feed, + dir = Dir, + since = Since + } = Args0, + Filter = configure_filter(FilterName, Style, Req, Db0), + Args = Args0#changes_args{filter_fun = Filter}, + DbName = fabric2_db:name(Db0), + StartListenerFun = fun() -> + fabric2_events:link_listener( + ?MODULE, handle_db_event, self(), [{dbname, DbName}] + ) + end, + Start = fun() -> + StartSeq = case Dir =:= rev orelse Since =:= now of + true -> fabric2_db:get_update_seq(Db0); + false -> Since + end, + {Db0, StartSeq} + end, + % begin timer to deal with heartbeat when filter function fails + case Args#changes_args.heartbeat of + undefined -> + erlang:erase(last_changes_heartbeat); + Val when is_integer(Val); Val =:= true -> + put(last_changes_heartbeat, os:timestamp()) + end, + + case lists:member(Feed, ["continuous", "longpoll", "eventsource"]) of + true -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + {ok, Listener} = StartListenerFun(), + + {Db, StartSeq} = Start(), + UserAcc2 = start_sending_changes(Callback, UserAcc), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + Acc0 = build_acc(Args, Callback, UserAcc2, Db, StartSeq, + <<"">>, Timeout, TimeoutFun), + try + keep_sending_changes( + Args#changes_args{dir=fwd}, + Acc0, + true) + after + fabric2_events:stop_listener(Listener), + get_rest_updated(ok) % clean out any remaining update messages + end + end; + false -> + fun(CallbackAcc) -> + {Callback, UserAcc} = get_callback_acc(CallbackAcc), + UserAcc2 = start_sending_changes(Callback, UserAcc), + {Timeout, TimeoutFun} = get_changes_timeout(Args, Callback), + {Db, StartSeq} = Start(), + Acc0 = build_acc(Args#changes_args{feed="normal"}, Callback, + UserAcc2, Db, StartSeq, <<>>, + Timeout, TimeoutFun), + {ok, #changes_acc{seq = LastSeq, user_acc = UserAcc3}} = + send_changes( + Acc0, + Dir, + true), + end_sending_changes(Callback, UserAcc3, LastSeq) + end + end. + + +handle_db_event(_DbName, updated, Parent) -> + Parent ! updated, + {ok, Parent}; +handle_db_event(_DbName, deleted, Parent) -> + Parent ! deleted, + {ok, Parent}; +handle_db_event(_DbName, _Event, Parent) -> + {ok, Parent}. + + +handle_view_event(_DbName, Msg, {Parent, DDocId}) -> + case Msg of + {index_commit, DDocId} -> + Parent ! updated; + {index_delete, DDocId} -> + Parent ! deleted; + _ -> + ok + end, + {ok, {Parent, DDocId}}. + +get_callback_acc({Callback, _UserAcc} = Pair) when is_function(Callback, 2) -> + Pair; +get_callback_acc(Callback) when is_function(Callback, 1) -> + {fun(Ev, _) -> Callback(Ev) end, ok}. + + +configure_filter(Filter, _Style, _Req, _Db) when is_tuple(Filter) -> + % Filter has already been configured + Filter; +configure_filter("_doc_ids", Style, Req, _Db) -> + {doc_ids, Style, get_doc_ids(Req)}; +configure_filter("_selector", Style, Req, _Db) -> + {selector, Style, get_selector_and_fields(Req)}; +configure_filter("_design", Style, _Req, _Db) -> + {design_docs, Style}; +configure_filter("_view", Style, Req, Db) -> + ViewName = get_view_qs(Req), + if ViewName /= "" -> ok; true -> + throw({bad_request, "`view` filter parameter is not provided."}) + end, + ViewNameParts = string:tokens(ViewName, "/"), + case [?l2b(couch_httpd:unquote(Part)) || Part <- ViewNameParts] of + [DName, VName] -> + {ok, DDoc} = open_ddoc(Db, <<"_design/", DName/binary>>), + check_member_exists(DDoc, [<<"views">>, VName]), + case fabric2_db:is_clustered(Db) of + true -> + DIR = fabric_util:doc_id_and_rev(DDoc), + {fetch, view, Style, DIR, VName}; + false -> + {view, Style, DDoc, VName} + end; + [] -> + Msg = "`view` must be of the form `designname/viewname`", + throw({bad_request, Msg}) + end; +configure_filter([$_ | _], _Style, _Req, _Db) -> + throw({bad_request, "unknown builtin filter name"}); +configure_filter("", main_only, _Req, _Db) -> + {default, main_only}; +configure_filter("", all_docs, _Req, _Db) -> + {default, all_docs}; +configure_filter(FilterName, Style, Req, Db) -> + FilterNameParts = string:tokens(FilterName, "/"), + case [?l2b(couch_httpd:unquote(Part)) || Part <- FilterNameParts] of + [DName, FName] -> + {ok, DDoc} = open_ddoc(Db, <<"_design/", DName/binary>>), + check_member_exists(DDoc, [<<"filters">>, FName]), + {custom, Style, Req, DDoc, FName}; + [] -> + {default, Style}; + _Else -> + Msg = "`filter` must be of the form `designname/filtername`", + throw({bad_request, Msg}) + end. + + +filter(Db, Change, {default, Style}) -> + apply_style(Db, Change, Style); +filter(Db, Change, {doc_ids, Style, DocIds}) -> + case lists:member(maps:get(id, Change), DocIds) of + true -> + apply_style(Db, Change, Style); + false -> + [] + end; +filter(Db, Change, {selector, Style, {Selector, _Fields}}) -> + Docs = open_revs(Db, Change, Style), + Passes = [mango_selector:match(Selector, couch_doc:to_json_obj(Doc, [])) + || Doc <- Docs], + filter_revs(Passes, Docs); +filter(Db, Change, {design_docs, Style}) -> + case maps:get(id, Change) of + <<"_design", _/binary>> -> + apply_style(Db, Change, Style); + _ -> + [] + end; +filter(Db, Change, {view, Style, DDoc, VName}) -> + Docs = open_revs(Db, Change, Style), + {ok, Passes} = couch_query_servers:filter_view(DDoc, VName, Docs), + filter_revs(Passes, Docs); +filter(Db, Change, {custom, Style, Req0, DDoc, FName}) -> + Req = case Req0 of + {json_req, _} -> Req0; + #httpd{} -> {json_req, chttpd_external:json_req_obj(Req0, Db)} + end, + Docs = open_revs(Db, Change, Style), + {ok, Passes} = couch_query_servers:filter_docs(Req, Db, DDoc, FName, Docs), + filter_revs(Passes, Docs); +filter(Db, Change, Filter) -> + erlang:error({filter_error, Db, Change, Filter}). + + +get_view_qs({json_req, {Props}}) -> + {Query} = couch_util:get_value(<<"query">>, Props, {[]}), + binary_to_list(couch_util:get_value(<<"view">>, Query, "")); +get_view_qs(Req) -> + couch_httpd:qs_value(Req, "view", ""). + +get_doc_ids({json_req, {Props}}) -> + check_docids(couch_util:get_value(<<"doc_ids">>, Props)); +get_doc_ids(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + {Props} = couch_httpd:json_body_obj(Req), + check_docids(couch_util:get_value(<<"doc_ids">>, Props)); +get_doc_ids(#httpd{method='GET'}=Req) -> + DocIds = ?JSON_DECODE(couch_httpd:qs_value(Req, "doc_ids", "null")), + check_docids(DocIds); +get_doc_ids(_) -> + throw({bad_request, no_doc_ids_provided}). + + +get_selector_and_fields({json_req, {Props}}) -> + Selector = check_selector(couch_util:get_value(<<"selector">>, Props)), + Fields = check_fields(couch_util:get_value(<<"fields">>, Props, nil)), + {Selector, Fields}; +get_selector_and_fields(#httpd{method='POST'}=Req) -> + couch_httpd:validate_ctype(Req, "application/json"), + get_selector_and_fields({json_req, couch_httpd:json_body_obj(Req)}); +get_selector_and_fields(_) -> + throw({bad_request, "Selector must be specified in POST payload"}). + + +check_docids(DocIds) when is_list(DocIds) -> + lists:foreach(fun + (DocId) when not is_binary(DocId) -> + Msg = "`doc_ids` filter parameter is not a list of doc ids.", + throw({bad_request, Msg}); + (_) -> ok + end, DocIds), + DocIds; +check_docids(_) -> + Msg = "`doc_ids` filter parameter is not a list of doc ids.", + throw({bad_request, Msg}). + + +check_selector(Selector={_}) -> + try + mango_selector:normalize(Selector) + catch + {mango_error, Mod, Reason0} -> + {_StatusCode, _Error, Reason} = mango_error:info(Mod, Reason0), + throw({bad_request, Reason}) + end; +check_selector(_Selector) -> + throw({bad_request, "Selector error: expected a JSON object"}). + + +check_fields(nil) -> + nil; +check_fields(Fields) when is_list(Fields) -> + try + {ok, Fields1} = mango_fields:new(Fields), + Fields1 + catch + {mango_error, Mod, Reason0} -> + {_StatusCode, _Error, Reason} = mango_error:info(Mod, Reason0), + throw({bad_request, Reason}) + end; +check_fields(_Fields) -> + throw({bad_request, "Selector error: fields must be JSON array"}). + + +open_ddoc(Db, DDocId) -> + case fabric2_db:open_doc(Db, DDocId, [ejson_body, ?ADMIN_CTX]) of + {ok, _} = Resp -> Resp; + Else -> throw(Else) + end. + + +check_member_exists(#doc{body={Props}}, Path) -> + couch_util:get_nested_json_value({Props}, Path). + + +apply_style(_Db, Change, main_only) -> + #{rev_id := RevId} = Change, + [{[{<<"rev">>, couch_doc:rev_to_str(RevId)}]}]; +apply_style(Db, Change, all_docs) -> + % We have to fetch all revs for this row + #{id := DocId} = Change, + {ok, Resps} = fabric2_db:open_doc_revs(Db, DocId, all, [deleted]), + lists:flatmap(fun(Resp) -> + case Resp of + {ok, #doc{revs = {Pos, [Rev | _]}}} -> + [{[{<<"rev">>, couch_doc:rev_to_str({Pos, Rev})}]}]; + _ -> + [] + end + end, Resps); +apply_style(Db, Change, Style) -> + erlang:error({changes_apply_style, Db, Change, Style}). + + +open_revs(Db, Change, Style) -> + #{id := DocId} = Change, + Options = [deleted, conflicts], + try + case Style of + main_only -> + {ok, Doc} = fabric2_db:open_doc(Db, DocId, Options), + [Doc]; + all_docs -> + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, all, Options), + [Doc || {ok, Doc} <- Docs] + end + catch _:_ -> + % We didn't log this before, should we now? + [] + end. + + +filter_revs(Passes, Docs) -> + lists:flatmap(fun + ({true, #doc{revs={RevPos, [RevId | _]}}}) -> + RevStr = couch_doc:rev_to_str({RevPos, RevId}), + Change = {[{<<"rev">>, RevStr}]}, + [Change]; + (_) -> + [] + end, lists:zip(Passes, Docs)). + + +get_changes_timeout(Args, Callback) -> + #changes_args{ + heartbeat = Heartbeat, + timeout = Timeout, + feed = ResponseType + } = Args, + DefaultTimeout = list_to_integer( + config:get("httpd", "changes_timeout", "60000") + ), + case Heartbeat of + undefined -> + case Timeout of + undefined -> + {DefaultTimeout, fun(UserAcc) -> {stop, UserAcc} end}; + infinity -> + {infinity, fun(UserAcc) -> {stop, UserAcc} end}; + _ -> + {lists:min([DefaultTimeout, Timeout]), + fun(UserAcc) -> {stop, UserAcc} end} + end; + true -> + {DefaultTimeout, + fun(UserAcc) -> Callback({timeout, ResponseType}, UserAcc) end}; + _ -> + {lists:min([DefaultTimeout, Heartbeat]), + fun(UserAcc) -> Callback({timeout, ResponseType}, UserAcc) end} + end. + +start_sending_changes(Callback, UserAcc) -> + {_, NewUserAcc} = Callback(start, UserAcc), + NewUserAcc. + +build_acc(Args, Callback, UserAcc, Db, StartSeq, Prepend, Timeout, TimeoutFun) -> + #changes_args{ + include_docs = IncludeDocs, + doc_options = DocOpts, + conflicts = Conflicts, + limit = Limit, + feed = ResponseType, + filter_fun = Filter + } = Args, + #changes_acc{ + db = Db, + seq = StartSeq, + prepend = Prepend, + filter = Filter, + callback = Callback, + user_acc = UserAcc, + resp_type = ResponseType, + limit = Limit, + include_docs = IncludeDocs, + doc_options = DocOpts, + conflicts = Conflicts, + timeout = Timeout, + timeout_fun = TimeoutFun, + aggregation_results=[], + aggregation_kvs=[] + }. + +send_changes(Acc, Dir, FirstRound) -> + #changes_acc{ + db = Db, + seq = StartSeq, + filter = Filter + } = maybe_upgrade_changes_acc(Acc), + DbEnumFun = fun changes_enumerator/2, + case can_optimize(FirstRound, Filter) of + {true, Fun} -> + Fun(Db, StartSeq, Dir, DbEnumFun, Acc, Filter); + _ -> + Opts = [{dir, Dir}], + fabric2_db:fold_changes(Db, StartSeq, DbEnumFun, Acc, Opts) + end. + + +can_optimize(true, {doc_ids, _Style, DocIds}) -> + MaxDocIds = config:get_integer("couchdb", + "changes_doc_ids_optimization_threshold", 100), + if length(DocIds) =< MaxDocIds -> + {true, fun send_changes_doc_ids/6}; + true -> + false + end; +can_optimize(true, {design_docs, _Style}) -> + {true, fun send_changes_design_docs/6}; +can_optimize(_, _) -> + false. + + +send_changes_doc_ids(Db, StartSeq, Dir, Fun, Acc0, {doc_ids, _Style, DocIds}) -> + Results = fabric2_db:get_full_doc_infos(Db, DocIds), + FullInfos = lists:foldl(fun + (#full_doc_info{}=FDI, Acc) -> [FDI | Acc]; + (not_found, Acc) -> Acc + end, [], Results), + send_lookup_changes(FullInfos, StartSeq, Dir, Db, Fun, Acc0). + + +send_changes_design_docs(Db, StartSeq, Dir, Fun, Acc0, {design_docs, _Style}) -> + FoldFun = fun(FDI, Acc) -> {ok, [FDI | Acc]} end, + Opts = [ + include_deleted, + {start_key, <<"_design/">>}, + {end_key_gt, <<"_design0">>} + ], + {ok, FullInfos} = fabric2_db:fold_docs(Db, FoldFun, [], Opts), + send_lookup_changes(FullInfos, StartSeq, Dir, Db, Fun, Acc0). + + +send_lookup_changes(FullDocInfos, StartSeq, Dir, Db, Fun, Acc0) -> + FoldFun = case Dir of + fwd -> fun lists:foldl/3; + rev -> fun lists:foldr/3 + end, + GreaterFun = case Dir of + fwd -> fun(A, B) -> A > B end; + rev -> fun(A, B) -> A =< B end + end, + DocInfos = lists:foldl(fun(FDI, Acc) -> + DI = couch_doc:to_doc_info(FDI), + case GreaterFun(DI#doc_info.high_seq, StartSeq) of + true -> [DI | Acc]; + false -> Acc + end + end, [], FullDocInfos), + SortedDocInfos = lists:keysort(#doc_info.high_seq, DocInfos), + FinalAcc = try + FoldFun(fun(DocInfo, Acc) -> + % Kinda gross that we're munging this back to a map + % that will then have to re-read and rebuild the FDI + % for all_docs style. But c'est la vie. + #doc_info{ + id = DocId, + high_seq = Seq, + revs = [#rev_info{rev = Rev, deleted = Deleted} | _] + } = DocInfo, + Change = #{ + id => DocId, + sequence => Seq, + rev_id => Rev, + deleted => Deleted + }, + case Fun(Change, Acc) of + {ok, NewAcc} -> + NewAcc; + {stop, NewAcc} -> + throw({stop, NewAcc}) + end + end, Acc0, SortedDocInfos) + catch + {stop, Acc} -> Acc + end, + case Dir of + fwd -> + FinalAcc0 = case element(1, FinalAcc) of + changes_acc -> % we came here via couch_http or internal call + FinalAcc#changes_acc{seq = fabric2_db:get_update_seq(Db)}; + fabric_changes_acc -> % we came here via chttpd / fabric / rexi + FinalAcc#fabric_changes_acc{seq = couch_db:get_update_seq(Db)} + end, + {ok, FinalAcc0}; + rev -> {ok, FinalAcc} + end. + + +keep_sending_changes(Args, Acc0, FirstRound) -> + #changes_args{ + feed = ResponseType, + limit = Limit, + db_open_options = DbOptions + } = Args, + + {ok, ChangesAcc} = send_changes(Acc0, fwd, FirstRound), + + #changes_acc{ + db = Db, callback = Callback, + timeout = Timeout, timeout_fun = TimeoutFun, seq = EndSeq, + prepend = Prepend2, user_acc = UserAcc2, limit = NewLimit + } = maybe_upgrade_changes_acc(ChangesAcc), + + if Limit > NewLimit, ResponseType == "longpoll" -> + end_sending_changes(Callback, UserAcc2, EndSeq); + true -> + {Go, UserAcc3} = notify_waiting_for_updates(Callback, UserAcc2), + if Go /= ok -> end_sending_changes(Callback, UserAcc3, EndSeq); true -> + case wait_updated(Timeout, TimeoutFun, UserAcc3) of + {updated, UserAcc4} -> + UserCtx = fabric2_db:get_user_ctx(Db), + DbOptions1 = [{user_ctx, UserCtx} | DbOptions], + case fabric2_db:open(fabric2_db:name(Db), DbOptions1) of + {ok, Db2} -> + ?MODULE:keep_sending_changes( + Args#changes_args{limit=NewLimit}, + ChangesAcc#changes_acc{ + db = Db2, + user_acc = UserAcc4, + seq = EndSeq, + prepend = Prepend2, + timeout = Timeout, + timeout_fun = TimeoutFun}, + false); + _Else -> + end_sending_changes(Callback, UserAcc3, EndSeq) + end; + {stop, UserAcc4} -> + end_sending_changes(Callback, UserAcc4, EndSeq) + end + end + end. + +notify_waiting_for_updates(Callback, UserAcc) -> + Callback(waiting_for_updates, UserAcc). + +end_sending_changes(Callback, UserAcc, EndSeq) -> + Callback({stop, EndSeq, null}, UserAcc). + +changes_enumerator(Change, Acc) -> + #changes_acc{ + filter = Filter, + callback = Callback, + user_acc = UserAcc, + limit = Limit, + db = Db, + timeout = Timeout, + timeout_fun = TimeoutFun + } = maybe_upgrade_changes_acc(Acc), + Results0 = filter(Db, Change, Filter), + Results = [Result || Result <- Results0, Result /= null], + Seq = maps:get(sequence, Change), + Go = if (Limit =< 1) andalso Results =/= [] -> stop; true -> ok end, + case Results of + [] -> + {Done, UserAcc2} = maybe_heartbeat(Timeout, TimeoutFun, UserAcc), + case Done of + stop -> + {stop, Acc#changes_acc{seq = Seq, user_acc = UserAcc2}}; + ok -> + {Go, Acc#changes_acc{seq = Seq, user_acc = UserAcc2}} + end; + _ -> + ChangesRow = changes_row(Results, Change, Acc), + {UserGo, UserAcc2} = Callback({change, ChangesRow}, UserAcc), + RealGo = case UserGo of + ok -> Go; + stop -> stop + end, + reset_heartbeat(), + {RealGo, Acc#changes_acc{ + seq = Seq, + user_acc = UserAcc2, + limit = Limit - 1 + }} + end. + + +changes_row(Results, Change, Acc) -> + #{ + id := Id, + sequence := Seq, + deleted := Del + } = Change, + {[ + {<<"seq">>, Seq}, + {<<"id">>, Id}, + {<<"changes">>, Results} + ] ++ deleted_item(Del) ++ maybe_get_changes_doc(Change, Acc)}. + +maybe_get_changes_doc(Value, #changes_acc{include_docs=true}=Acc) -> + #changes_acc{ + db = Db, + doc_options = DocOpts0, + conflicts = Conflicts, + filter = Filter + } = Acc, + OpenOpts = case Conflicts of + true -> [deleted, conflicts]; + false -> [deleted] + end, + DocOpts1 = case Conflicts of + true -> [conflicts | DocOpts0]; + false -> DocOpts0 + end, + load_doc(Db, Value, OpenOpts, DocOpts1, Filter); + +maybe_get_changes_doc(_Value, _Acc) -> + []. + + +load_doc(Db, Value, Opts, DocOpts, Filter) -> + case load_doc(Db, Value, Opts) of + null -> + [{doc, null}]; + Doc -> + [{doc, doc_to_json(Doc, DocOpts, Filter)}] + end. + + +load_doc(Db, Change, Opts) -> + #{ + id := Id, + rev_id := RevId + } = Change, + case fabric2_db:open_doc_revs(Db, Id, [RevId], Opts) of + {ok, [{ok, Doc}]} -> + Doc; + _ -> + null + end. + + +doc_to_json(Doc, DocOpts, {selector, _Style, {_Selector, Fields}}) + when Fields =/= nil -> + mango_fields:extract(couch_doc:to_json_obj(Doc, DocOpts), Fields); +doc_to_json(Doc, DocOpts, _Filter) -> + couch_doc:to_json_obj(Doc, DocOpts). + + +deleted_item(true) -> [{<<"deleted">>, true}]; +deleted_item(_) -> []. + +% waits for a updated msg, if there are multiple msgs, collects them. +wait_updated(Timeout, TimeoutFun, UserAcc) -> + receive + updated -> + get_rest_updated(UserAcc); + deleted -> + {stop, UserAcc} + after Timeout -> + {Go, UserAcc2} = TimeoutFun(UserAcc), + case Go of + ok -> + ?MODULE:wait_updated(Timeout, TimeoutFun, UserAcc2); + stop -> + {stop, UserAcc2} + end + end. + +get_rest_updated(UserAcc) -> + receive + updated -> + get_rest_updated(UserAcc) + after 0 -> + {updated, UserAcc} + end. + +reset_heartbeat() -> + case get(last_changes_heartbeat) of + undefined -> + ok; + _ -> + put(last_changes_heartbeat, os:timestamp()) + end. + +maybe_heartbeat(Timeout, TimeoutFun, Acc) -> + Before = get(last_changes_heartbeat), + case Before of + undefined -> + {ok, Acc}; + _ -> + Now = os:timestamp(), + case timer:now_diff(Now, Before) div 1000 >= Timeout of + true -> + {StopOrGo, Acc2} = TimeoutFun(Acc), + put(last_changes_heartbeat, Now), + {StopOrGo, Acc2}; + false -> + {ok, Acc} + end + end. + + +maybe_upgrade_changes_acc(#changes_acc{} = Acc) -> + Acc; +maybe_upgrade_changes_acc(Acc) when tuple_size(Acc) == 19 -> + #changes_acc{ + db = element(2, Acc), + seq = element(6, Acc), + prepend = element(7, Acc), + filter = element(8, Acc), + callback = element(9, Acc), + user_acc = element(10, Acc), + resp_type = element(11, Acc), + limit = element(12, Acc), + include_docs = element(13, Acc), + doc_options = element(14, Acc), + conflicts = element(15, Acc), + timeout = element(16, Acc), + timeout_fun = element(17, Acc), + aggregation_kvs = element(18, Acc), + aggregation_results = element(19, Acc) + }. diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 6a3df6def..bfd8f9fc2 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric.hrl"). -include_lib("mem3/include/mem3.hrl"). -export([handle_request/1, handle_compact_req/2, handle_design_req/2, @@ -41,7 +42,6 @@ % Accumulator for changes_callback function -record(cacc, { - etag, feed, mochi, prepend = "", @@ -49,7 +49,8 @@ chunks_sent = 0, buffer = [], bufsize = 0, - threshold + threshold, + include_docs }). -define(IS_ALL_DOCS(T), ( @@ -85,45 +86,38 @@ handle_request(#httpd{path_parts=[DbName|RestParts],method=Method}=Req)-> handle_changes_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - handle_changes_req1(Req, Db); + fabric2_fdb:transactional(Db, fun(TxDb) -> + handle_changes_req_tx(Req, TxDb) + end); handle_changes_req(#httpd{method='GET'}=Req, Db) -> - handle_changes_req1(Req, Db); + fabric2_fdb:transactional(Db, fun(TxDb) -> + handle_changes_req_tx(Req, TxDb) + end); handle_changes_req(#httpd{path_parts=[_,<<"_changes">>]}=Req, _Db) -> send_method_not_allowed(Req, "GET,POST,HEAD"). -handle_changes_req1(#httpd{}=Req, Db) -> - #changes_args{filter=Raw, style=Style} = Args0 = parse_changes_query(Req), - ChangesArgs = Args0#changes_args{ - filter_fun = couch_changes:configure_filter(Raw, Style, Req, Db), - db_open_options = [{user_ctx, couch_db:get_user_ctx(Db)}] - }, +handle_changes_req_tx(#httpd{}=Req, Db) -> + ChangesArgs = parse_changes_query(Req), + ChangesFun = chttpd_changes:handle_db_changes(ChangesArgs, Req, Db), Max = chttpd:chunked_response_buffer_size(), case ChangesArgs#changes_args.feed of "normal" -> - T0 = os:timestamp(), - {ok, Info} = fabric:get_db_info(Db), - Suffix = mem3:shard_suffix(Db), - Etag = chttpd:make_etag({Info, Suffix}), - DeltaT = timer:now_diff(os:timestamp(), T0) / 1000, - couch_stats:update_histogram([couchdb, dbinfo], DeltaT), - chttpd:etag_respond(Req, Etag, fun() -> - Acc0 = #cacc{ - feed = normal, - etag = Etag, - mochi = Req, - threshold = Max - }, - fabric:changes(Db, fun changes_callback/2, Acc0, ChangesArgs) - end); + Acc0 = #cacc{ + feed = normal, + mochi = Req, + threshold = Max + }, + ChangesFun({fun changes_callback/2, Acc0}); Feed when Feed =:= "continuous"; Feed =:= "longpoll"; Feed =:= "eventsource" -> couch_stats:increment_counter([couchdb, httpd, clients_requesting_changes]), Acc0 = #cacc{ feed = list_to_atom(Feed), mochi = Req, - threshold = Max + threshold = Max, + include_docs = ChangesArgs#changes_args.include_docs }, try - fabric:changes(Db, fun changes_callback/2, Acc0, ChangesArgs) + ChangesFun({fun changes_callback/2, Acc0}) after couch_stats:decrement_counter([couchdb, httpd, clients_requesting_changes]) end; @@ -136,8 +130,9 @@ handle_changes_req1(#httpd{}=Req, Db) -> changes_callback(start, #cacc{feed = continuous} = Acc) -> {ok, Resp} = chttpd:start_delayed_json_response(Acc#cacc.mochi, 200), {ok, Acc#cacc{mochi = Resp, responding = true}}; -changes_callback({change, Change}, #cacc{feed = continuous} = Acc) -> - chttpd_stats:incr_rows(), +changes_callback({change, Change}, #cacc{feed = continuous, + include_docs = IncludeDocs} = Acc) -> + incr_stats_changes_feed(IncludeDocs), Data = [?JSON_ENCODE(Change) | "\n"], Len = iolist_size(Data), maybe_flush_changes_feed(Acc, Data, Len); @@ -160,8 +155,9 @@ changes_callback(start, #cacc{feed = eventsource} = Acc) -> ], {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, Headers), {ok, Acc#cacc{mochi = Resp, responding = true}}; -changes_callback({change, {ChangeProp}=Change}, #cacc{feed = eventsource} = Acc) -> - chttpd_stats:incr_rows(), +changes_callback({change, {ChangeProp}=Change}, + #cacc{feed = eventsource, include_docs = IncludeDocs} = Acc) -> + incr_stats_changes_feed(IncludeDocs), Seq = proplists:get_value(seq, ChangeProp), Chunk = [ "data: ", ?JSON_ENCODE(Change), @@ -182,18 +178,17 @@ changes_callback({stop, _EndSeq}, #cacc{feed = eventsource} = Acc) -> % callbacks for longpoll and normal (single JSON Object) changes_callback(start, #cacc{feed = normal} = Acc) -> - #cacc{etag = Etag, mochi = Req} = Acc, + #cacc{mochi = Req} = Acc, FirstChunk = "{\"results\":[\n", - {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, - [{"ETag",Etag}], FirstChunk), + {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [], FirstChunk), {ok, Acc#cacc{mochi = Resp, responding = true}}; changes_callback(start, Acc) -> #cacc{mochi = Req} = Acc, FirstChunk = "{\"results\":[\n", {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [], FirstChunk), {ok, Acc#cacc{mochi = Resp, responding = true}}; -changes_callback({change, Change}, Acc) -> - chttpd_stats:incr_rows(), +changes_callback({change, Change}, #cacc{include_docs = IncludeDocs} = Acc) -> + incr_stats_changes_feed(IncludeDocs), Data = [Acc#cacc.prepend, ?JSON_ENCODE(Change)], Len = iolist_size(Data), maybe_flush_changes_feed(Acc, Data, Len); @@ -227,7 +222,7 @@ changes_callback(waiting_for_updates, Acc) -> mochi = Resp1, chunks_sent = ChunksSent + 1 }}; -changes_callback(timeout, Acc) -> +changes_callback({timeout, _ResponseType}, Acc) -> #cacc{mochi = Resp, chunks_sent = ChunksSent} = Acc, {ok, Resp1} = chttpd:send_delayed_chunk(Resp, "\n"), {ok, Acc#cacc{mochi = Resp1, chunks_sent = ChunksSent + 1}}; @@ -255,27 +250,25 @@ maybe_flush_changes_feed(Acc0, Data, Len) -> }, {ok, Acc}. -handle_compact_req(#httpd{method='POST'}=Req, Db) -> +incr_stats_changes_feed(IncludeDocs) -> + chttpd_stats:incr_rows(), + if not IncludeDocs -> ok; true -> + chttpd_stats:incr_reads() + end. + +% Return the same response as if a compaction succeeded even though _compaction +% isn't a valid operation in CouchDB >= 4.x anymore. This is mostly to not +% break existing user script which maybe periodically call this endpoint. In +% the future this endpoint will return a 410 response then it will be removed. +handle_compact_req(#httpd{method='POST'}=Req, _Db) -> chttpd:validate_ctype(Req, "application/json"), - case Req#httpd.path_parts of - [_DbName, <<"_compact">>] -> - ok = fabric:compact(Db), - send_json(Req, 202, {[{ok, true}]}); - [DbName, <<"_compact">>, DesignName | _] -> - case ddoc_cache:open(DbName, <<"_design/", DesignName/binary>>) of - {ok, _DDoc} -> - ok = fabric:compact(Db, DesignName), - send_json(Req, 202, {[{ok, true}]}); - Error -> - throw(Error) - end - end; + send_json(Req, 202, {[{ok, true}]}); handle_compact_req(Req, _Db) -> send_method_not_allowed(Req, "POST"). handle_view_cleanup_req(Req, Db) -> - ok = fabric:cleanup_index_files_all_nodes(Db), + ok = fabric2_index:cleanup(Db), send_json(Req, 202, {[{ok, true}]}). @@ -355,8 +348,7 @@ update_partition_stats(PathParts) -> handle_design_req(#httpd{ path_parts=[_DbName, _Design, Name, <<"_",_/binary>> = Action | _Rest] }=Req, Db) -> - DbName = mem3:dbname(couch_db:name(Db)), - case ddoc_cache:open(DbName, <<"_design/", Name/binary>>) of + case fabric2_db:open_doc(Db, <<"_design/", Name/binary>>) of {ok, DDoc} -> Handler = chttpd_handlers:design_handler(Action, fun bad_action_req/3), Handler(Req, Db, DDoc); @@ -372,7 +364,7 @@ bad_action_req(#httpd{path_parts=[_, _, Name|FileNameParts]}=Req, Db, _DDoc) -> handle_design_info_req(#httpd{method='GET'}=Req, Db, #doc{} = DDoc) -> [_, _, Name, _] = Req#httpd.path_parts, - {ok, GroupInfoList} = fabric:get_view_group_info(Db, DDoc), + {ok, GroupInfoList} = couch_views:get_info(Db, DDoc), send_json(Req, 200, {[ {name, Name}, {view_index, {GroupInfoList}} @@ -381,81 +373,58 @@ handle_design_info_req(#httpd{method='GET'}=Req, Db, #doc{} = DDoc) -> handle_design_info_req(Req, _Db, _DDoc) -> send_method_not_allowed(Req, "GET"). -create_db_req(#httpd{}=Req, DbName) -> +create_db_req(#httpd{user_ctx=Ctx}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), - N = chttpd:qs_value(Req, "n", config:get("cluster", "n", "3")), - Q = chttpd:qs_value(Req, "q", config:get("cluster", "q", "8")), - P = chttpd:qs_value(Req, "placement", config:get("cluster", "placement")), - EngineOpt = parse_engine_opt(Req), - DbProps = parse_partitioned_opt(Req), - Options = [ - {n, N}, - {q, Q}, - {placement, P}, - {props, DbProps} - ] ++ EngineOpt, DocUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), - case fabric:create_db(DbName, Options) of - ok -> - send_json(Req, 201, [{"Location", DocUrl}], {[{ok, true}]}); - accepted -> - send_json(Req, 202, [{"Location", DocUrl}], {[{ok, true}]}); - {error, file_exists} -> - chttpd:send_error(Req, file_exists); - Error -> - throw(Error) + case fabric2_db:create(DbName, [{user_ctx, Ctx}]) of + {ok, _} -> + send_json(Req, 201, [{"Location", DocUrl}], {[{ok, true}]}); + {error, file_exists} -> + chttpd:send_error(Req, file_exists); + Error -> + throw(Error) end. -delete_db_req(#httpd{}=Req, DbName) -> +delete_db_req(#httpd{user_ctx=Ctx}=Req, DbName) -> couch_httpd:verify_is_server_admin(Req), - case fabric:delete_db(DbName, []) of - ok -> - send_json(Req, 200, {[{ok, true}]}); - accepted -> - send_json(Req, 202, {[{ok, true}]}); - Error -> - throw(Error) + case fabric2_db:delete(DbName, [{user_ctx, Ctx}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + Error -> + throw(Error) end. do_db_req(#httpd{path_parts=[DbName|_], user_ctx=Ctx}=Req, Fun) -> - Shard = hd(mem3:shards(DbName)), - Props = couch_util:get_value(props, Shard#shard.opts, []), - Opts = case Ctx of - undefined -> - [{props, Props}]; - #user_ctx{} -> - [{user_ctx, Ctx}, {props, Props}] - end, - {ok, Db} = couch_db:clustered_db(DbName, Opts), + Options = [{user_ctx, Ctx}, {interactive, true}], + {ok, Db} = fabric2_db:open(DbName, Options), Fun(Req, Db). -db_req(#httpd{method='GET',path_parts=[DbName]}=Req, _Db) -> +db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> % measure the time required to generate the etag, see if it's worth it T0 = os:timestamp(), - {ok, DbInfo} = fabric:get_db_info(DbName), + {ok, DbInfo} = fabric2_db:get_db_info(Db), DeltaT = timer:now_diff(os:timestamp(), T0) / 1000, couch_stats:update_histogram([couchdb, dbinfo], DeltaT), send_json(Req, {DbInfo}); -db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> +db_req(#httpd{method='POST', path_parts=[DbName]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx,Ctx}, {w,W}], - - Doc = couch_db:doc_from_json_obj_validate(Db, chttpd:json_body(Req)), - Doc2 = case Doc#doc.id of + Doc0 = chttpd:json_body(Req), + Doc1 = couch_doc:from_json_obj_validate(Doc0, fabric2_db:name(Db)), + Doc2 = case Doc1#doc.id of <<"">> -> - Doc#doc{id=couch_uuids:new(), revs={0, []}}; + Doc1#doc{id=couch_uuids:new(), revs={0, []}}; _ -> - Doc + Doc1 end, - DocId = Doc2#doc.id, + Doc3 = read_att_data(Doc2), + DocId = Doc3#doc.id, case chttpd:qs_value(Req, "batch") of "ok" -> % async_batching spawn(fun() -> - case catch(fabric:update_doc(Db, Doc2, Options)) of + case catch(fabric2_db:update_doc(Db, Doc3, [])) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -475,7 +444,7 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> % normal DocUrl = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), - case fabric:update_doc(Db, Doc2, Options) of + case fabric2_db:update_doc(Db, Doc3, []) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -493,13 +462,10 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> db_req(#httpd{path_parts=[_DbName]}=Req, _Db) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST"); -db_req(#httpd{method='POST', path_parts=[DbName, <<"_ensure_full_commit">>], - user_ctx=Ctx}=Req, _Db) -> +db_req(#httpd{method='POST', path_parts=[_DbName, <<"_ensure_full_commit">>] + }=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - %% use fabric call to trigger a database_does_not_exist exception - %% for missing databases that'd return error 404 from chttpd - %% get_security used to prefer shards on the same node over other nodes - fabric:get_security(DbName, [{user_ctx, Ctx}]), + #{db_prefix := <<_/binary>>} = Db, send_json(Req, 201, {[ {ok, true}, {instance_start_time, <<"0">>} @@ -508,7 +474,7 @@ db_req(#httpd{method='POST', path_parts=[DbName, <<"_ensure_full_commit">>], db_req(#httpd{path_parts=[_,<<"_ensure_full_commit">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); -db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, Db) -> +db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> couch_stats:increment_counter([couchdb, httpd, bulk_requests]), chttpd:validate_ctype(Req, "application/json"), {JsonProps} = chttpd:json_body_obj(Req), @@ -520,23 +486,23 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, DocsArray0 -> DocsArray0 end, - couch_stats:update_histogram([couchdb, httpd, bulk_docs], length(DocsArray)), - W = case couch_util:get_value(<<"w">>, JsonProps) of - Value when is_integer(Value) -> - integer_to_list(Value); - _ -> - chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))) + MaxDocs = config:get_integer("couchdb", "max_bulk_docs_count", 10000), + case length(DocsArray) =< MaxDocs of + true -> ok; + false -> throw({request_entity_too_large, {bulk_docs, MaxDocs}}) end, - case chttpd:header_value(Req, "X-Couch-Full-Commit") of + couch_stats:update_histogram([couchdb, httpd, bulk_docs], length(DocsArray)), + Options = case chttpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> - Options = [full_commit, {user_ctx,Ctx}, {w,W}]; + [full_commit]; "false" -> - Options = [delay_commit, {user_ctx,Ctx}, {w,W}]; + [delay_commit]; _ -> - Options = [{user_ctx,Ctx}, {w,W}] + [] end, + DbName = fabric2_db:name(Db), Docs = lists:map(fun(JsonObj) -> - Doc = couch_db:doc_from_json_obj_validate(Db, JsonObj), + Doc = couch_doc:from_json_obj_validate(JsonObj, DbName), validate_attachment_names(Doc), case Doc#doc.id of <<>> -> Doc#doc{id = couch_uuids:new()}; @@ -550,7 +516,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, true -> [all_or_nothing|Options]; _ -> Options end, - case fabric:update_docs(Db, Docs, Options2) of + case fabric2_db:update_docs(Db, Docs, Options2) of {ok, Results} -> % output the results chttpd_stats:incr_writes(length(Results)), @@ -569,7 +535,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, send_json(Req, 417, ErrorsJson) end; false -> - case fabric:update_docs(Db, Docs, [replicated_changes|Options]) of + case fabric2_db:update_docs(Db, Docs, [replicated_changes|Options]) of {ok, Errors} -> chttpd_stats:incr_writes(length(Docs)), ErrorsJson = lists:map(fun update_doc_result_to_json/1, Errors), @@ -596,11 +562,14 @@ db_req(#httpd{method='POST', path_parts=[_, <<"_bulk_get">>], undefined -> throw({bad_request, <<"Missing JSON list of 'docs'.">>}); Docs -> + MaxDocs = config:get_integer("couchdb", "max_bulk_get_count", 10000), + case length(Docs) =< MaxDocs of + true -> ok; + false -> throw({request_entity_too_large, {bulk_get, MaxDocs}}) + end, #doc_query_args{ - options = Options0 + options = Options } = bulk_get_parse_doc_query(Req), - Options = [{user_ctx, Req#httpd.user_ctx} | Options0], - AcceptJson = MochiReq:accepts_content_type("application/json"), AcceptMixedMp = MochiReq:accepts_content_type("multipart/mixed"), AcceptRelatedMp = MochiReq:accepts_content_type("multipart/related"), @@ -665,8 +634,6 @@ db_req(#httpd{path_parts=[_, <<"_bulk_get">>]}=Req, _Db) -> db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> couch_stats:increment_counter([couchdb, httpd, purge_requests]), chttpd:validate_ctype(Req, "application/json"), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx, Req#httpd.user_ctx}, {w, W}], {IdsRevs} = chttpd:json_body_obj(Req), IdsRevs2 = [{Id, couch_doc:parse_revs(Revs)} || {Id, Revs} <- IdsRevs], MaxIds = config:get_integer("purge", "max_document_id_number", 100), @@ -683,7 +650,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_purge">>]}=Req, Db) -> true -> ok end, couch_stats:increment_counter([couchdb, document_purges, total], length(IdsRevs2)), - Results2 = case fabric:purge_docs(Db, IdsRevs2, Options) of + Results2 = case fabric:purge_docs(Db, IdsRevs2, []) of {ok, Results} -> chttpd_stats:incr_writes(length(Results)), Results; @@ -741,7 +708,7 @@ db_req(#httpd{path_parts=[_,OP]}=Req, _Db) when ?IS_ALL_DOCS(OP) -> db_req(#httpd{method='POST',path_parts=[_,<<"_missing_revs">>]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), {JsonDocIdRevs} = chttpd:json_body_obj(Req), - case fabric:get_missing_revs(Db, JsonDocIdRevs) of + case fabric2_db:get_missing_revs(Db, JsonDocIdRevs) of {error, Reason} -> chttpd:send_error(Req, Reason); {ok, Results} -> @@ -758,7 +725,7 @@ db_req(#httpd{path_parts=[_,<<"_missing_revs">>]}=Req, _Db) -> db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), {JsonDocIdRevs} = chttpd:json_body_obj(Req), - case fabric:get_missing_revs(Db, JsonDocIdRevs) of + case fabric2_db:get_missing_revs(Db, JsonDocIdRevs) of {error, Reason} -> chttpd:send_error(Req, Reason); {ok, Results} -> @@ -779,12 +746,10 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_revs_diff">>]}=Req, Db) -> db_req(#httpd{path_parts=[_,<<"_revs_diff">>]}=Req, _Db) -> send_method_not_allowed(Req, "POST"); -db_req(#httpd{method='PUT',path_parts=[_,<<"_security">>],user_ctx=Ctx}=Req, - Db) -> - DbName = ?b2l(couch_db:name(Db)), - validate_security_can_be_edited(DbName), +db_req(#httpd{method = 'PUT',path_parts = [_, <<"_security">>]} = Req, Db) -> + validate_security_can_be_edited(fabric2_db:name(Db)), SecObj = chttpd:json_body(Req), - case fabric:set_security(Db, SecObj, [{user_ctx, Ctx}]) of + case fabric2_db:set_security(Db, SecObj) of ok -> send_json(Req, {[{<<"ok">>, true}]}); Else -> @@ -792,28 +757,26 @@ db_req(#httpd{method='PUT',path_parts=[_,<<"_security">>],user_ctx=Ctx}=Req, end; db_req(#httpd{method='GET',path_parts=[_,<<"_security">>]}=Req, Db) -> - send_json(Req, fabric:get_security(Db)); + send_json(Req, fabric2_db:get_security(Db)); db_req(#httpd{path_parts=[_,<<"_security">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); -db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>],user_ctx=Ctx}=Req, - Db) -> +db_req(#httpd{method='PUT',path_parts=[_,<<"_revs_limit">>]}=Req, Db) -> Limit = chttpd:json_body(Req), - ok = fabric:set_revs_limit(Db, Limit, [{user_ctx,Ctx}]), + ok = fabric2_db:set_revs_limit(Db, Limit), send_json(Req, {[{<<"ok">>, true}]}); db_req(#httpd{method='GET',path_parts=[_,<<"_revs_limit">>]}=Req, Db) -> - send_json(Req, fabric:get_revs_limit(Db)); + send_json(Req, fabric2_db:get_revs_limit(Db)); db_req(#httpd{path_parts=[_,<<"_revs_limit">>]}=Req, _Db) -> send_method_not_allowed(Req, "PUT,GET"); db_req(#httpd{method='PUT',path_parts=[_,<<"_purged_infos_limit">>]}=Req, Db) -> - Options = [{user_ctx, Req#httpd.user_ctx}], case chttpd:json_body(Req) of Limit when is_integer(Limit), Limit > 0 -> - case fabric:set_purge_infos_limit(Db, Limit, Options) of + case fabric:set_purge_infos_limit(Db, Limit, []) of ok -> send_json(Req, {[{<<"ok">>, true}]}); Error -> @@ -861,49 +824,250 @@ db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> db_attachment_req(Req, Db, DocId, FileNameParts). multi_all_docs_view(Req, Db, OP, Queries) -> - Args0 = couch_mrview_http:parse_params(Req, undefined), + Args = couch_views_http:parse_params(Req, undefined), + case couch_views_util:is_paginated(Args) of + false -> + stream_multi_all_docs_view(Req, Db, OP, Args, Queries); + true -> + paginate_multi_all_docs_view(Req, Db, OP, Args, Queries) + end. + + +stream_multi_all_docs_view(Req, Db, OP, Args0, Queries) -> Args1 = Args0#mrargs{view_type=map}, - ArgQueries = lists:map(fun({Query}) -> - QueryArg1 = couch_mrview_http:parse_params(Query, undefined, - Args1, [decoded]), - QueryArgs2 = fabric_util:validate_all_docs_args(Db, QueryArg1), - set_namespace(OP, QueryArgs2) - end, Queries), - Options = [{user_ctx, Req#httpd.user_ctx}], - VAcc0 = #vacc{db=Db, req=Req, prepend="\r\n"}, - FirstChunk = "{\"results\":[", - {ok, Resp0} = chttpd:start_delayed_json_response(VAcc0#vacc.req, - 200, [], FirstChunk), - VAcc1 = VAcc0#vacc{resp=Resp0}, - VAcc2 = lists:foldl(fun(Args, Acc0) -> - {ok, Acc1} = fabric:all_docs(Db, Options, - fun view_cb/2, Acc0, Args), - Acc1 - end, VAcc1, ArgQueries), - {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), + ArgQueries = chttpd_view:parse_queries(Req, Args1, Queries, fun(QArgs) -> + set_namespace(OP, QArgs) + end), + Max = chttpd:chunked_response_buffer_size(), + First = "{\"results\":[", + {ok, Resp0} = chttpd:start_delayed_json_response(Req, 200, [], First), + VAcc0 = #vacc{ + db = Db, + req = Req, + resp = Resp0, + threshold = Max, + prepend = "\r\n" + }, + VAcc1 = lists:foldl(fun + (#mrargs{keys = undefined} = ArgsIn, Acc0) -> + send_all_docs(Db, ArgsIn, Acc0); + (#mrargs{keys = Keys} = ArgsIn, Acc0) when is_list(Keys) -> + Acc1 = send_all_docs_keys(Db, ArgsIn, Acc0), + {ok, Acc2} = view_cb(complete, Acc1), + Acc2 + end, VAcc0, ArgQueries), + {ok, Resp1} = chttpd:send_delayed_chunk(VAcc1#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). + +paginate_multi_all_docs_view(Req, Db, OP, Args0, Queries) -> + Args1 = Args0#mrargs{view_type=map}, + ArgQueries = chttpd_view:parse_queries(Req, Args1, Queries, fun(QArgs) -> + set_namespace(OP, QArgs) + end), + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), undefined} + end, + #mrargs{page_size = PageSize} = Args0, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + EtagTerm = {Parts, UpdateSeq, Args0}, + Response = couch_views_http:paginated( + Req, EtagTerm, PageSize, ArgQueries, KeyFun, + fun(Args) -> + all_docs_paginated_cb(Db, Args) + end), + chttpd:send_json(Req, Response). + + all_docs_view(Req, Db, Keys, OP) -> - Args0 = couch_mrview_http:parse_body_and_query(Req, Keys), + Args = couch_views_http:parse_body_and_query(Req, Keys), + case couch_views_util:is_paginated(Args) of + false -> + stream_all_docs_view(Req, Db, Args, OP); + true -> + paginate_all_docs_view(Req, Db, Args, OP) + end. + +stream_all_docs_view(Req, Db, Args0, OP) -> Args1 = Args0#mrargs{view_type=map}, - Args2 = fabric_util:validate_all_docs_args(Db, Args1), + Args2 = couch_views_util:validate_args(Args1), Args3 = set_namespace(OP, Args2), - Options = [{user_ctx, Req#httpd.user_ctx}], Max = chttpd:chunked_response_buffer_size(), - VAcc = #vacc{db=Db, req=Req, threshold=Max}, - {ok, Resp} = fabric:all_docs(Db, Options, fun view_cb/2, VAcc, Args3), - {ok, Resp#vacc.resp}. - -view_cb({row, Row} = Msg, Acc) -> - case lists:keymember(doc, 1, Row) of - true -> chttpd_stats:incr_reads(); - false -> ok + VAcc0 = #vacc{ + db = Db, + req = Req, + threshold = Max + }, + case Args3#mrargs.keys of + undefined -> + VAcc1 = send_all_docs(Db, Args3, VAcc0), + {ok, VAcc1#vacc.resp}; + Keys when is_list(Keys) -> + VAcc1 = send_all_docs_keys(Db, Args3, VAcc0), + {ok, VAcc2} = view_cb(complete, VAcc1), + {ok, VAcc2#vacc.resp} + end. + + +paginate_all_docs_view(Req, Db, Args0, OP) -> + Args1 = Args0#mrargs{view_type=map}, + Args2 = chttpd_view:validate_args(Req, Args1), + Args3 = set_namespace(OP, Args2), + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), undefined} + end, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + EtagTerm = {Parts, UpdateSeq, Args3}, + Response = couch_views_http:paginated( + Req, EtagTerm, Args3, KeyFun, + fun(Args) -> + all_docs_paginated_cb(Db, Args) + end), + chttpd:send_json(Req, Response). + + +all_docs_paginated_cb(Db, Args) -> + #vacc{meta=MetaMap, buffer=Items} = case Args#mrargs.keys of + undefined -> + send_all_docs(Db, Args, #vacc{paginated=true}); + Keys when is_list(Keys) -> + send_all_docs_keys(Db, Args, #vacc{paginated=true}) + end, + {MetaMap, Items}. + + +send_all_docs(Db, #mrargs{keys = undefined} = Args, VAcc0) -> + Opts0 = fabric2_util:all_docs_view_opts(Args), + NS = couch_util:get_value(namespace, Opts0), + FoldFun = case NS of + <<"_all_docs">> -> fold_docs; + <<"_design">> -> fold_design_docs; + <<"_local">> -> fold_local_docs + end, + Opts = case couch_views_util:is_paginated(Args) of + false -> + Opts0 ++ [{restart_tx, true}]; + true -> + Opts0 + end, + ViewCb = fun view_cb/2, + Acc = {iter, Db, Args, VAcc0}, + {ok, {iter, _, _, VAcc1}} = fabric2_db:FoldFun(Db, ViewCb, Acc, Opts), + VAcc1. + + +send_all_docs_keys(Db, #mrargs{} = Args, VAcc0) -> + Keys = apply_args_to_keylist(Args, Args#mrargs.keys), + NS = couch_util:get_value(namespace, Args#mrargs.extra), + TotalRows = fabric2_db:get_doc_count(Db, NS), + Meta = case Args#mrargs.update_seq of + true -> + UpdateSeq = fabric2_db:get_update_seq(Db), + [{update_seq, UpdateSeq}]; + false -> + [] + end ++ [{total, TotalRows}, {offset, null}], + {ok, VAcc1} = view_cb({meta, Meta}, VAcc0), + DocOpts = case Args#mrargs.conflicts of + true -> [conflicts | Args#mrargs.doc_options]; + _ -> Args#mrargs.doc_options + end, + IncludeDocs = Args#mrargs.include_docs, + OpenOpts = [deleted | DocOpts], + + CB = fun(DocId, Doc, Acc) -> + Row0 = case Doc of + {not_found, missing} -> + #view_row{key = DocId}; + {ok, #doc{deleted = true, revs = Revs}} -> + {RevPos, [RevId | _]} = Revs, + Value = {[ + {rev, couch_doc:rev_to_str({RevPos, RevId})}, + {deleted, true} + ]}, + DocValue = if not IncludeDocs -> undefined; true -> + null + end, + #view_row{ + key = DocId, + id = DocId, + value = Value, + doc = DocValue + }; + {ok, #doc{revs = Revs} = Doc0} -> + {RevPos, [RevId | _]} = Revs, + Value = {[ + {rev, couch_doc:rev_to_str({RevPos, RevId})} + ]}, + DocValue = if not IncludeDocs -> undefined; true -> + couch_doc:to_json_obj(Doc0, DocOpts) + end, + #view_row{ + key = DocId, + id = DocId, + value = Value, + doc = DocValue + } + end, + Row1 = fabric_view:transform_row(Row0), + view_cb(Row1, Acc) + end, + {ok, VAcc2} = fabric2_db:fold_docs(Db, Keys, CB, VAcc1, OpenOpts), + VAcc2. + + +apply_args_to_keylist(Args, Keys0) -> + Keys1 = case Args#mrargs.direction of + fwd -> Keys0; + _ -> lists:reverse(Keys0) + end, + Keys2 = case Args#mrargs.skip < length(Keys1) of + true -> lists:nthtail(Args#mrargs.skip, Keys1); + false -> [] + end, + case Args#mrargs.limit < length(Keys2) of + true -> lists:sublist(Keys2, Args#mrargs.limit); + false -> Keys2 + end. + + +view_cb({row, Row}, {iter, Db, Args, VAcc}) -> + NewRow = case lists:keymember(doc, 1, Row) of + true -> + chttpd_stats:incr_reads(), + Row; + false when Args#mrargs.include_docs -> + {id, DocId} = lists:keyfind(id, 1, Row), + chttpd_stats:incr_reads(), + DocOpts = case Args#mrargs.conflicts of + true -> [conflicts | Args#mrargs.doc_options]; + _ -> Args#mrargs.doc_options + end, + OpenOpts = [deleted | DocOpts], + DocMember = case fabric2_db:open_doc(Db, DocId, OpenOpts) of + {not_found, missing} -> + []; + {ok, #doc{deleted = true}} -> + [{doc, null}]; + {ok, #doc{} = Doc} -> + [{doc, couch_doc:to_json_obj(Doc, DocOpts)}] + end, + Row ++ DocMember; + _ -> + Row end, chttpd_stats:incr_rows(), - couch_mrview_http:view_cb(Msg, Acc); + {Go, NewVAcc} = couch_views_http:view_cb({row, NewRow}, VAcc), + {Go, {iter, Db, Args, NewVAcc}}; + +view_cb(Msg, {iter, Db, Args, VAcc}) -> + {Go, NewVAcc} = couch_views_http:view_cb(Msg, VAcc), + {Go, {iter, Db, Args, NewVAcc}}; view_cb(Msg, Acc) -> - couch_mrview_http:view_cb(Msg, Acc). + couch_views_http:view_cb(Msg, Acc). db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> % check for the existence of the doc to handle the 404 case. @@ -921,10 +1085,9 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> #doc_query_args{ rev = Rev, open_revs = Revs, - options = Options0, + options = Options, atts_since = AttsSince } = parse_doc_query(Req), - Options = [{user_ctx, Req#httpd.user_ctx} | Options0], case Revs of [] -> Options2 = @@ -935,7 +1098,7 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> Doc = couch_doc_open(Db, DocId, Rev, Options2), send_doc(Req, Doc, Options2); _ -> - case fabric:open_revs(Db, DocId, Revs, Options) of + case fabric2_db:open_doc_revs(Db, DocId, Revs, Options) of {ok, []} when Revs == all -> chttpd:send_error(Req, {not_found, missing}); {ok, Results} -> @@ -971,14 +1134,11 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> end end; -db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> +db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> couch_httpd:validate_referer(Req), - couch_db:validate_docid(Db, DocId), + fabric2_db:validate_docid(DocId), chttpd:validate_ctype(Req, "multipart/form-data"), - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx,Ctx}, {w,W}], - Form = couch_httpd:parse_form(Req), case proplists:is_defined("_doc", Form) of true -> @@ -986,7 +1146,7 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> Doc = couch_doc_from_req(Req, Db, DocId, Json); false -> Rev = couch_doc:parse_rev(list_to_binary(couch_util:get_value("_rev", Form))), - Doc = case fabric:open_revs(Db, DocId, [Rev], []) of + Doc = case fabric2_db:open_doc_revs(Db, DocId, [Rev], []) of {ok, [{ok, Doc0}]} -> chttpd_stats:incr_reads(), Doc0; @@ -1015,7 +1175,8 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> NewDoc = Doc#doc{ atts = UpdatedAtts ++ OldAtts2 }, - case fabric:update_doc(Db, NewDoc, Options) of + NewDoc1 = read_att_data(NewDoc), + case fabric2_db:update_doc(Db, NewDoc1, []) of {ok, NewRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1029,15 +1190,12 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> {rev, couch_doc:rev_to_str(NewRev)} ]}); -db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> +db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> #doc_query_args{ update_type = UpdateType } = parse_doc_query(Req), - DbName = couch_db:name(Db), - couch_db:validate_docid(Db, DocId), - - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - Options = [{user_ctx,Ctx}, {w,W}], + DbName = fabric2_db:name(Db), + fabric2_db:validate_docid(DocId), Loc = absolute_uri(Req, [$/, couch_util:url_encode(DbName), $/, couch_util:url_encode(DocId)]), @@ -1045,7 +1203,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of ("multipart/related;" ++ _) = ContentType -> couch_httpd:check_max_request_length(Req), - couch_httpd_multipart:num_mp_writers(mem3:n(mem3:dbname(DbName), DocId)), + couch_httpd_multipart:num_mp_writers(1), {ok, Doc0, WaitFun, Parser} = couch_doc:doc_from_multi_part_stream(ContentType, fun() -> receive_request_data(Req) end), Doc = couch_doc_from_req(Req, Db, DocId, Doc0), @@ -1062,10 +1220,10 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> case chttpd:qs_value(Req, "batch") of "ok" -> % batch - Doc = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), - + Doc0 = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), + Doc = read_att_data(Doc0), spawn(fun() -> - case catch(fabric:update_doc(Db, Doc, Options)) of + case catch(fabric2_db:update_doc(Db, Doc, [])) of {ok, _} -> chttpd_stats:incr_writes(), ok; @@ -1088,7 +1246,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> end end; -db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> +db_doc_req(#httpd{method='COPY'}=Req, Db, SourceDocId) -> SourceRev = case extract_header_rev(Req, chttpd:qs_value(Req, "rev")) of missing_rev -> nil; @@ -1099,8 +1257,8 @@ db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> % open old doc Doc = couch_doc_open(Db, SourceDocId, SourceRev, []), % save new doc - case fabric:update_doc(Db, - Doc#doc{id=TargetDocId, revs=TargetRevs}, [{user_ctx,Ctx}]) of + case fabric2_db:update_doc(Db, + Doc#doc{id=TargetDocId, revs=TargetRevs}, []) of {ok, NewTargetRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1109,13 +1267,13 @@ db_doc_req(#httpd{method='COPY', user_ctx=Ctx}=Req, Db, SourceDocId) -> HttpCode = 202 end, % respond - DbName = couch_db:name(Db), + DbName = fabric2_db:name(Db), {PartRes} = update_doc_result_to_json(TargetDocId, {ok, NewTargetRev}), Loc = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName) ++ "/" ++ couch_util:url_encode(TargetDocId)), send_json(Req, HttpCode, [{"Location", Loc}, {"ETag", "\"" ++ ?b2l(couch_doc:rev_to_str(NewTargetRev)) ++ "\""}], - {[{ok, true}] ++ PartRes}); + {PartRes}); db_doc_req(Req, _Db, _DocId) -> send_method_not_allowed(Req, "DELETE,GET,HEAD,POST,PUT,COPY"). @@ -1200,7 +1358,7 @@ send_docs_multipart(Req, Results, Options1) -> CType = {"Content-Type", "multipart/mixed; boundary=\"" ++ ?b2l(OuterBoundary) ++ "\""}, {ok, Resp} = start_chunked_response(Req, 200, [CType]), - couch_httpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), + chttpd:send_chunk(Resp, <<"--", OuterBoundary/binary>>), lists:foreach( fun({ok, #doc{atts=Atts}=Doc}) -> Refs = monitor_attachments(Doc#doc.atts), @@ -1208,25 +1366,25 @@ send_docs_multipart(Req, Results, Options1) -> JsonBytes = ?JSON_ENCODE(couch_doc:to_json_obj(Doc, Options)), {ContentType, _Len} = couch_doc:len_doc_to_multi_part_stream( InnerBoundary, JsonBytes, Atts, true), - couch_httpd:send_chunk(Resp, <<"\r\nContent-Type: ", + chttpd:send_chunk(Resp, <<"\r\nContent-Type: ", ContentType/binary, "\r\n\r\n">>), couch_doc:doc_to_multi_part_stream(InnerBoundary, JsonBytes, Atts, - fun(Data) -> couch_httpd:send_chunk(Resp, Data) + fun(Data) -> chttpd:send_chunk(Resp, Data) end, true), - couch_httpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>) + chttpd:send_chunk(Resp, <<"\r\n--", OuterBoundary/binary>>) after demonitor_refs(Refs) end; ({{not_found, missing}, RevId}) -> RevStr = couch_doc:rev_to_str(RevId), Json = ?JSON_ENCODE({[{<<"missing">>, RevStr}]}), - couch_httpd:send_chunk(Resp, + chttpd:send_chunk(Resp, [<<"\r\nContent-Type: application/json; error=\"true\"\r\n\r\n">>, Json, <<"\r\n--", OuterBoundary/binary>>]) end, Results), - couch_httpd:send_chunk(Resp, <<"--">>), - couch_httpd:last_chunk(Resp). + chttpd:send_chunk(Resp, <<"--">>), + chttpd:last_chunk(Resp). bulk_get_multipart_headers({0, []}, Id, Boundary) -> [ @@ -1266,6 +1424,8 @@ update_doc_result_to_json(DocId, {ok, NewRev}) -> {[{ok, true}, {id, DocId}, {rev, couch_doc:rev_to_str(NewRev)}]}; update_doc_result_to_json(DocId, {accepted, NewRev}) -> {[{ok, true}, {id, DocId}, {rev, couch_doc:rev_to_str(NewRev)}, {accepted, true}]}; +update_doc_result_to_json(DocId, {{DocId, _}, Error}) -> + update_doc_result_to_json(DocId, Error); update_doc_result_to_json(DocId, Error) -> {_Code, ErrorStr, Reason} = chttpd:error_info(Error), {[{id, DocId}, {error, ErrorStr}, {reason, Reason}]}. @@ -1294,17 +1454,16 @@ send_updated_doc(Req, Db, DocId, Json) -> send_updated_doc(Req, Db, DocId, Doc, Headers) -> send_updated_doc(Req, Db, DocId, Doc, Headers, interactive_edit). -send_updated_doc(#httpd{user_ctx=Ctx} = Req, Db, DocId, #doc{deleted=Deleted}=Doc, +send_updated_doc(#httpd{} = Req, Db, DocId, #doc{deleted=Deleted}=Doc, Headers, UpdateType) -> - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), Options = case couch_httpd:header_value(Req, "X-Couch-Full-Commit") of "true" -> - [full_commit, UpdateType, {user_ctx,Ctx}, {w,W}]; + [full_commit, UpdateType]; "false" -> - [delay_commit, UpdateType, {user_ctx,Ctx}, {w,W}]; + [delay_commit, UpdateType]; _ -> - [UpdateType, {user_ctx,Ctx}, {w,W}] + [UpdateType] end, {Status, {etag, Etag}, Body} = update_doc(Db, DocId, #doc{deleted=Deleted}=Doc, Options), @@ -1322,32 +1481,9 @@ http_code_from_status(Status) -> 200 end. -update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc, Options) -> - {_, Ref} = spawn_monitor(fun() -> - try fabric:update_doc(Db, Doc, Options) of - Resp -> - exit({exit_ok, Resp}) - catch - throw:Reason -> - exit({exit_throw, Reason}); - error:Reason -> - exit({exit_error, Reason}); - exit:Reason -> - exit({exit_exit, Reason}) - end - end), - Result = receive - {'DOWN', Ref, _, _, {exit_ok, Ret}} -> - Ret; - {'DOWN', Ref, _, _, {exit_throw, Reason}} -> - throw(Reason); - {'DOWN', Ref, _, _, {exit_error, Reason}} -> - erlang:error(Reason); - {'DOWN', Ref, _, _, {exit_exit, Reason}} -> - erlang:exit(Reason) - end, - - case Result of +update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc0, Options) -> + Doc = read_att_data(Doc0), + case fabric2_db:update_doc(Db, Doc, Options) of {ok, NewRev} -> Accepted = false; {accepted, NewRev} -> @@ -1394,7 +1530,7 @@ couch_doc_from_req(Req, _Db, DocId, #doc{revs=Revs} = Doc) -> end, Doc#doc{id=DocId, revs=Revs2}; couch_doc_from_req(Req, Db, DocId, Json) -> - Doc = couch_db:doc_from_json_obj_validate(Db, Json), + Doc = couch_doc:from_json_obj_validate(Json, fabric2_db:name(Db)), couch_doc_from_req(Req, Db, DocId, Doc). @@ -1402,11 +1538,10 @@ couch_doc_from_req(Req, Db, DocId, Json) -> % couch_doc_open(Db, DocId) -> % couch_doc_open(Db, DocId, nil, []). -couch_doc_open(Db, DocId, Rev, Options0) -> - Options = [{user_ctx, couch_db:get_user_ctx(Db)} | Options0], +couch_doc_open(Db, DocId, Rev, Options) -> case Rev of nil -> % open most recent rev - case fabric:open_doc(Db, DocId, Options) of + case fabric2_db:open_doc(Db, DocId, Options) of {ok, Doc} -> chttpd_stats:incr_reads(), Doc; @@ -1414,7 +1549,7 @@ couch_doc_open(Db, DocId, Rev, Options0) -> throw(Error) end; _ -> % open a specific rev (deletions come back as stubs) - case fabric:open_revs(Db, DocId, [Rev], Options) of + case fabric2_db:open_doc_revs(Db, DocId, [Rev], Options) of {ok, [{ok, Doc}]} -> chttpd_stats:incr_reads(), Doc; @@ -1535,8 +1670,11 @@ db_attachment_req(#httpd{method='GET',mochi_req=MochiReq}=Req, Db, DocId, FileNa end; -db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNameParts) +db_attachment_req(#httpd{method=Method}=Req, Db, DocId, FileNameParts) when (Method == 'PUT') or (Method == 'DELETE') -> + #httpd{ + mochi_req = MochiReq + } = Req, FileName = validate_attachment_name( mochiweb_util:join( lists:map(fun binary_to_list/1, @@ -1546,16 +1684,45 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa 'DELETE' -> []; _ -> - MimeType = case couch_httpd:header_value(Req,"Content-Type") of + MimeType = case chttpd:header_value(Req,"Content-Type") of % We could throw an error here or guess by the FileName. % Currently, just giving it a default. undefined -> <<"application/octet-stream">>; CType -> list_to_binary(CType) end, - Data = fabric:att_receiver(Req, chttpd:body_length(Req)), + Data = case chttpd:body_length(Req) of + undefined -> + <<"">>; + {unknown_transfer_encoding, Unknown} -> + exit({unknown_transfer_encoding, Unknown}); + chunked -> + fun(MaxChunkSize, ChunkFun, InitState) -> + chttpd:recv_chunked( + Req, MaxChunkSize, ChunkFun, InitState + ) + end; + 0 -> + <<"">>; + Length when is_integer(Length) -> + Expect = case chttpd:header_value(Req, "expect") of + undefined -> + undefined; + Value when is_list(Value) -> + string:to_lower(Value) + end, + case Expect of + "100-continue" -> + MochiReq:start_raw_response({100, gb_trees:empty()}); + _Else -> + ok + end, + fun() -> chttpd:recv(Req, 0) end; + Length -> + exit({length_not_integer, Length}) + end, ContentLen = case couch_httpd:header_value(Req,"Content-Length") of undefined -> undefined; - Length -> list_to_integer(Length) + CL -> list_to_integer(CL) end, ContentEnc = string:to_lower(string:strip( couch_httpd:header_value(Req, "Content-Encoding", "identity") @@ -1587,10 +1754,10 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa % check for the existence of the doc to handle the 404 case. couch_doc_open(Db, DocId, nil, []) end, - couch_db:validate_docid(Db, DocId), + fabric2_db:validate_docid(DocId), #doc{id=DocId}; Rev -> - case fabric:open_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of + case fabric2_db:open_doc_revs(Db, DocId, [Rev], []) of {ok, [{ok, Doc0}]} -> chttpd_stats:incr_reads(), Doc0; @@ -1602,11 +1769,11 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa end, #doc{atts=Atts} = Doc, - DocEdited = Doc#doc{ + DocEdited0 = Doc#doc{ atts = NewAtt ++ [A || A <- Atts, couch_att:fetch(name, A) /= FileName] }, - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), - case fabric:update_doc(Db, DocEdited, [{user_ctx,Ctx}, {w,W}]) of + DocEdited = read_att_data(DocEdited0), + case fabric2_db:update_doc(Db, DocEdited, []) of {ok, UpdatedRev} -> chttpd_stats:incr_writes(), HttpCode = 201; @@ -1615,7 +1782,7 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa HttpCode = 202 end, erlang:put(mochiweb_request_recv, true), - DbName = couch_db:name(Db), + DbName = fabric2_db:name(Db), {Status, Headers} = case Method of 'DELETE' -> @@ -1702,47 +1869,6 @@ get_md5_header(Req) -> parse_doc_query(Req) -> lists:foldl(fun parse_doc_query/2, #doc_query_args{}, chttpd:qs(Req)). -parse_engine_opt(Req) -> - case chttpd:qs_value(Req, "engine") of - undefined -> - []; - Extension -> - Available = couch_server:get_engine_extensions(), - case lists:member(Extension, Available) of - true -> - [{engine, iolist_to_binary(Extension)}]; - false -> - throw({bad_request, invalid_engine_extension}) - end - end. - - -parse_partitioned_opt(Req) -> - case chttpd:qs_value(Req, "partitioned") of - undefined -> - []; - "false" -> - []; - "true" -> - ok = validate_partitioned_db_enabled(Req), - [ - {partitioned, true}, - {hash, [couch_partition, hash, []]} - ]; - _ -> - throw({bad_request, <<"Invalid `partitioned` parameter">>}) - end. - - -validate_partitioned_db_enabled(Req) -> - case couch_flags:is_enabled(partitioned, Req) of - true -> - ok; - false -> - throw({bad_request, <<"Partitioned feature is not enabled.">>}) - end. - - parse_doc_query({Key, Value}, Args) -> case {Key, Value} of {"attachments", "true"} -> @@ -1811,7 +1937,7 @@ parse_changes_query(Req) -> {"descending", "true"} -> Args#changes_args{dir=rev}; {"since", _} -> - Args#changes_args{since=Value}; + Args#changes_args{since=parse_since_seq(Value)}; {"last-event-id", _} -> Args#changes_args{since=Value}; {"limit", _} -> @@ -1872,6 +1998,30 @@ parse_changes_query(Req) -> ChangesArgs end. + +parse_since_seq(<<"now">>) -> + now; + +parse_since_seq(Seq) when is_binary(Seq), size(Seq) > 30 -> + throw({bad_request, url_encoded_since_seq}); + +parse_since_seq(Seq) when is_binary(Seq), size(Seq) > 2 -> + % We have implicitly allowed the since seq to either be + % JSON encoded or a "raw" string. Here we just remove the + % surrounding quotes if they exist and are paired. + SeqSize = size(Seq) - 2, + case Seq of + <<"\"", S:SeqSize/binary, "\"">> -> S; + S -> S + end; + +parse_since_seq(Seq) when is_binary(Seq) -> + Seq; + +parse_since_seq(Seq) when is_list(Seq) -> + parse_since_seq(iolist_to_binary(Seq)). + + extract_header_rev(Req, ExplicitRev) when is_binary(ExplicitRev) or is_list(ExplicitRev)-> extract_header_rev(Req, couch_doc:parse_rev(ExplicitRev)); extract_header_rev(Req, ExplicitRev) -> @@ -1889,7 +2039,7 @@ extract_header_rev(Req, ExplicitRev) -> end. validate_security_can_be_edited(DbName) -> - UserDbName = config:get("chttpd_auth", "authentication_db", "_users"), + UserDbName = ?l2b(config:get("chttpd_auth", "authentication_db", "_users")), CanEditUserSecurityObject = config:get("couchdb","users_db_security_editable","false"), case {DbName,CanEditUserSecurityObject} of {UserDbName,"false"} -> @@ -1921,6 +2071,8 @@ monitor_attachments(Atts) when is_list(Atts) -> case couch_att:fetch(data, Att) of {Fd, _} -> [monitor(process, Fd) | Monitors]; + {loc, _, _, _} -> + Monitors; stub -> Monitors; Else -> @@ -1934,8 +2086,7 @@ monitor_attachments(Att) -> demonitor_refs(Refs) when is_list(Refs) -> [demonitor(Ref) || Ref <- Refs]. -set_namespace(<<"_all_docs">>, Args) -> - set_namespace(undefined, Args); + set_namespace(<<"_local_docs">>, Args) -> set_namespace(<<"_local">>, Args); set_namespace(<<"_design_docs">>, Args) -> @@ -1984,7 +2135,7 @@ bulk_get_open_doc_revs1(Db, Props, Options, {}) -> {null, {error, Error}, Options}; DocId -> try - couch_db:validate_docid(Db, DocId), + fabric2_db:validate_docid(DocId), bulk_get_open_doc_revs1(Db, Props, Options, {DocId}) catch throw:{Error, Reason} -> {DocId, {error, {null, Error, Reason}}, Options} @@ -2018,7 +2169,7 @@ bulk_get_open_doc_revs1(Db, Props, Options, {DocId, Revs}) -> bulk_get_open_doc_revs1(Db, Props, Options, {DocId, Revs, Options1}) end; bulk_get_open_doc_revs1(Db, Props, _, {DocId, Revs, Options}) -> - case fabric:open_revs(Db, DocId, Revs, Options) of + case fabric2_db:open_doc_revs(Db, DocId, Revs, Options) of {ok, []} -> RevStr = couch_util:get_value(<<"rev">>, Props), Error = {RevStr, <<"not_found">>, <<"missing">>}, @@ -2094,68 +2245,7 @@ bulk_get_json_error(DocId, Rev, Error, Reason) -> {<<"reason">>, Reason}]}}]}). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -monitor_attachments_test_() -> - {"ignore stubs", - fun () -> - Atts = [couch_att:new([{data, stub}])], - ?_assertEqual([], monitor_attachments(Atts)) - end - }. - -parse_partitioned_opt_test_() -> - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_should_allow_partitioned_db(), - t_should_throw_on_not_allowed_partitioned_db(), - t_returns_empty_array_for_partitioned_false(), - t_returns_empty_array_for_no_partitioned_qs() - ] - }. - - -setup() -> - ok. - -teardown(_) -> - meck:unload(). - -mock_request(Url) -> - Headers = mochiweb_headers:make([{"Host", "examples.com"}]), - MochiReq = mochiweb_request:new(nil, 'PUT', Url, {1, 1}, Headers), - #httpd{mochi_req = MochiReq}. - -t_should_allow_partitioned_db() -> - ?_test(begin - meck:expect(couch_flags, is_enabled, 2, true), - Req = mock_request("/all-test21?partitioned=true"), - [Partitioned, _] = parse_partitioned_opt(Req), - ?assertEqual(Partitioned, {partitioned, true}) - end). - -t_should_throw_on_not_allowed_partitioned_db() -> - ?_test(begin - meck:expect(couch_flags, is_enabled, 2, false), - Req = mock_request("/all-test21?partitioned=true"), - Throw = {bad_request, <<"Partitioned feature is not enabled.">>}, - ?assertThrow(Throw, parse_partitioned_opt(Req)) - end). - -t_returns_empty_array_for_partitioned_false() -> - ?_test(begin - Req = mock_request("/all-test21?partitioned=false"), - ?assertEqual(parse_partitioned_opt(Req), []) - end). - -t_returns_empty_array_for_no_partitioned_qs() -> - ?_test(begin - Req = mock_request("/all-test21"), - ?assertEqual(parse_partitioned_opt(Req), []) - end). - --endif. +read_att_data(#doc{} = Doc) -> + #doc{atts = Atts} = Doc, + Atts1 = lists:map(fun couch_att:read_data/1, Atts), + Doc#doc{atts = Atts1}. diff --git a/src/chttpd/src/chttpd_external.erl b/src/chttpd/src/chttpd_external.erl index 451d87d2e..7317b7e4b 100644 --- a/src/chttpd/src/chttpd_external.erl +++ b/src/chttpd/src/chttpd_external.erl @@ -38,7 +38,7 @@ json_req_obj_fields() -> <<"peer">>, <<"form">>, <<"cookie">>, <<"userCtx">>, <<"secObj">>]. json_req_obj_field(<<"info">>, #httpd{}, Db, _DocId) -> - {ok, Info} = get_db_info(Db), + {ok, Info} = fabric2_db:get_db_info(Db), {Info}; json_req_obj_field(<<"uuid">>, #httpd{}, _Db, _DocId) -> couch_uuids:new(); @@ -81,27 +81,18 @@ json_req_obj_field(<<"form">>, #httpd{mochi_req=Req, method=Method}=HttpReq, Db, json_req_obj_field(<<"cookie">>, #httpd{mochi_req=Req}, _Db, _DocId) -> to_json_terms(Req:parse_cookie()); json_req_obj_field(<<"userCtx">>, #httpd{}, Db, _DocId) -> - couch_util:json_user_ctx(Db); -json_req_obj_field(<<"secObj">>, #httpd{user_ctx=UserCtx}, Db, _DocId) -> - get_db_security(Db, UserCtx). - - -get_db_info(Db) -> - case couch_db:is_clustered(Db) of - true -> - fabric:get_db_info(Db); - false -> - couch_db:get_db_info(Db) - end. - - -get_db_security(Db, #user_ctx{}) -> - case couch_db:is_clustered(Db) of - true -> - fabric:get_security(Db); - false -> - couch_db:get_security(Db) - end. + json_user_ctx(Db); +json_req_obj_field(<<"secObj">>, #httpd{user_ctx = #user_ctx{}}, Db, _DocId) -> + fabric2_db:get_security(Db). + + +json_user_ctx(Db) -> + Ctx = fabric2_db:get_user_ctx(Db), + {[ + {<<"db">>, fabric2_db:name(Db)}, + {<<"name">>, Ctx#user_ctx.name}, + {<<"roles">>, Ctx#user_ctx.roles} + ]}. to_json_terms(Data) -> diff --git a/src/chttpd/src/chttpd_handlers.erl b/src/chttpd/src/chttpd_handlers.erl index 930563230..17d2952b3 100644 --- a/src/chttpd/src/chttpd_handlers.erl +++ b/src/chttpd/src/chttpd_handlers.erl @@ -15,7 +15,8 @@ -export([ url_handler/2, db_handler/2, - design_handler/2 + design_handler/2, + handler_info/1 ]). -define(SERVICE_ID, chttpd_handlers). @@ -35,6 +36,26 @@ db_handler(HandlerKey, DefaultFun) -> design_handler(HandlerKey, DefaultFun) -> select(collect(design_handler, [HandlerKey]), DefaultFun). +handler_info(HttpReq) -> + #httpd{ + method = Method, + path_parts = PathParts + } = HttpReq, + Default = {'unknown.unknown', #{}}, + try + select(collect(handler_info, [Method, PathParts, HttpReq]), Default) + catch Type:Reason -> + Stack = erlang:get_stacktrace(), + couch_log:error("~s :: handler_info failure for ~p : ~p:~p :: ~p", [ + ?MODULE, + get(nonce), + Type, + Reason, + Stack + ]), + Default + end. + %% ------------------------------------------------------------------ %% Internal Function Definitions %% ------------------------------------------------------------------ diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 5e86ea87d..d50115917 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -12,12 +12,23 @@ -module(chttpd_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). + +-export([ + not_supported/2, + not_supported/3, + not_implemented/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). + url_handler(<<>>) -> fun chttpd_misc:handle_welcome_req/1; url_handler(<<"favicon.ico">>) -> fun chttpd_misc:handle_favicon_req/1; url_handler(<<"_utils">>) -> fun chttpd_misc:handle_utils_dir_req/1; url_handler(<<"_all_dbs">>) -> fun chttpd_misc:handle_all_dbs_req/1; +url_handler(<<"_deleted_dbs">>) -> fun chttpd_misc:handle_deleted_dbs_req/1; url_handler(<<"_dbs_info">>) -> fun chttpd_misc:handle_dbs_info_req/1; url_handler(<<"_active_tasks">>) -> fun chttpd_misc:handle_task_status_req/1; url_handler(<<"_scheduler">>) -> fun couch_replicator_httpd:handle_scheduler_req/1; @@ -33,14 +44,475 @@ db_handler(<<"_view_cleanup">>) -> fun chttpd_db:handle_view_cleanup_req/2; db_handler(<<"_compact">>) -> fun chttpd_db:handle_compact_req/2; db_handler(<<"_design">>) -> fun chttpd_db:handle_design_req/2; db_handler(<<"_partition">>) -> fun chttpd_db:handle_partition_req/2; -db_handler(<<"_temp_view">>) -> fun chttpd_view:handle_temp_view_req/2; +db_handler(<<"_temp_view">>) -> fun ?MODULE:not_supported/2; db_handler(<<"_changes">>) -> fun chttpd_db:handle_changes_req/2; +db_handler(<<"_purge">>) -> fun ?MODULE:not_implemented/2; +db_handler(<<"_purged_infos_limit">>) -> fun ?MODULE:not_implemented/2; db_handler(_) -> no_match. design_handler(<<"_view">>) -> fun chttpd_view:handle_view_req/3; -design_handler(<<"_show">>) -> fun chttpd_show:handle_doc_show_req/3; -design_handler(<<"_list">>) -> fun chttpd_show:handle_view_list_req/3; +design_handler(<<"_show">>) -> fun ?MODULE:not_supported/3; +design_handler(<<"_list">>) -> fun ?MODULE:not_supported/3; design_handler(<<"_update">>) -> fun chttpd_show:handle_doc_update_req/3; design_handler(<<"_info">>) -> fun chttpd_db:handle_design_info_req/3; -design_handler(<<"_rewrite">>) -> fun chttpd_rewrite:handle_rewrite_req/3; +design_handler(<<"_rewrite">>) -> fun ?MODULE:not_supported/3; design_handler(_) -> no_match. + + +handler_info('GET', [], _) -> + {'welcome_message.read', #{}}; + +handler_info('GET', [<<"_active_tasks">>], _) -> + {'active_tasks.read', #{}}; + +handler_info('GET', [<<"_all_dbs">>], _) -> + {'all_dbs.read', #{}}; + +handler_info('GET', [<<"_deleted_dbs">>], _) -> + {'account-deleted-dbs.read', #{}}; + +handler_info('POST', [<<"_deleted_dbs">>], _) -> + {'account-deleted-dbs.undelete', #{}}; + +handler_info('DELETE', [<<"_deleted_dbs">>, Db], _) -> + {'account-deleted-dbs.delete', #{'db.name' => Db}}; + +handler_info('POST', [<<"_dbs_info">>], _) -> + {'dbs_info.read', #{}}; + +handler_info('GET', [<<"_node">>, <<"_local">>], _) -> + {'node.name.read', #{}}; + +handler_info(Method, [<<"_node">>, <<"_local">> | Rest], HttpReq) -> + handler_info(Method, [<<"_node">>, node() | Rest], HttpReq); + +handler_info('GET', [<<"_node">>, Node, <<"_config">>], _) -> + {'node.config.all.read', #{node => Node}}; + +handler_info('GET', [<<"_node">>, Node, <<"_config">>, Section], _) -> + {'node.config.section.read', #{node => Node, 'config.section' => Section}}; + +handler_info('GET', [<<"_node">>, Node, <<"_config">>, Section, Key], _) -> + {'node.config.key.read', #{ + node => Node, + 'config.section' => Section, + 'config.key' => Key + }}; + +handler_info('PUT', [<<"_node">>, Node, <<"_config">>, Section, Key], _) -> + {'node.config.key.write', #{ + node => Node, + 'config.section' => Section, + 'config.key' => Key + }}; + +handler_info('DELETE', [<<"_node">>, Node, <<"_config">>, Section, Key], _) -> + {'node.config.key.delete', #{ + node => Node, + 'config.section' => Section, + 'config.key' => Key + }}; + +handler_info('GET', [<<"_node">>, Node, <<"_stats">> | Path], _) -> + {'node.stats.read', #{node => Node, 'stat.path' => Path}}; + +handler_info('GET', [<<"_node">>, Node, <<"_system">>], _) -> + {'node.system.read', #{node => Node}}; + +handler_info('POST', [<<"_node">>, Node, <<"_restart">>], _) -> + {'node.restart.execute', #{node => Node}}; + +handler_info('POST', [<<"_reload_query_servers">>], _) -> + {'query_servers.reload', #{}}; + +handler_info('POST', [<<"_replicate">>], _) -> + {'replication.create', #{}}; + +handler_info('GET', [<<"_scheduler">>, <<"jobs">>], _) -> + {'replication.jobs.read', #{}}; + +handler_info('GET', [<<"_scheduler">>, <<"jobs">>, JobId], _) -> + {'replication.job.read', #{'job.id' => JobId}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">>], _) -> + {'replication.docs.read', #{'db.name' => <<"_replicator">>}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">>, Db], _) -> + {'replication.docs.read', #{'db.name' => Db}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">>, Db, DocId], _) -> + {'replication.doc.read', #{'db.name' => Db, 'doc.id' => DocId}}; + +handler_info('GET', [<<"_scheduler">>, <<"docs">> | Path], _) -> + case lists:splitwith(fun(Elem) -> Elem /= <<"_replicator">> end, Path) of + {_, [<<"_replicator">>]} -> + {'replication.docs.read', #{ + 'db.name' => filename:join(Path) + }}; + {DbParts, [<<"_replicator">>, DocId]} -> + {'replication.doc.read', #{ + 'db.name' => filename:join(DbParts ++ [<<"_replicator">>]), + 'doc.id' => DocId + }}; + _ -> + no_match + end; + +handler_info('GET', [<<"_session">>], _) -> + {'session.read', #{}}; + +handler_info('POST', [<<"_session">>], _) -> + {'session.create', #{}}; + +handler_info('DELETE', [<<"_session">>], _) -> + {'session.delete', #{}}; + +handler_info('GET', [<<"_up">>], _) -> + {'health.read', #{}}; + +handler_info('GET', [<<"_utils">> | Path], _) -> + {'utils.read', #{'file.path' => filename:join(Path)}}; + +handler_info('GET', [<<"_uuids">>], _) -> + {'uuids.read', #{}}; + +handler_info('GET', [<<"favicon.ico">>], _) -> + {'favicon.ico.read', #{}}; + + +handler_info(Method, [<<"_", _/binary>> = Part| Rest], Req) -> + % Maybe bail here so that we don't trample over a + % different url_handler plugin. However, we continue + % on for known system databases. + DbName = case Part of + <<"_dbs">> -> '_dbs'; + <<"_global_changes">> -> '_global_changes'; + <<"_metadata">> -> '_metadata'; + <<"_nodes">> -> '_nodes'; + <<"_replicator">> -> '_replicator'; + <<"_users">> -> '_users'; + _ -> no_match + end, + if DbName == no_match -> no_match; true -> + handler_info(Method, [DbName | Rest], Req) + end; + +handler_info('GET', [Db], _) -> + {'db.info.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db], _) -> + {'db.create', #{'db.name' => Db}}; + +handler_info('POST', [Db], _) -> + {'db.doc.write', #{'db.name' => Db}}; + +handler_info('DELETE', [Db], _) -> + {'db.delete', #{'db.name' => Db}}; + +handler_info(M, [Db, <<"_all_docs">>], _) when M == 'GET'; M == 'POST' -> + {'db.all_docs.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_all_docs">>, <<"queries">>], _) -> + {'db.all_docs.read', #{'db.name' => Db, multi => true}}; + +handler_info('POST', [Db, <<"_bulk_docs">>], _) -> + {'db.docs.write', #{'db.name' => Db, bulk => true}}; + +handler_info('POST', [Db, <<"_bulk_get">>], _) -> + {'db.docs.read', #{'db.name' => Db, bulk => true}}; + +handler_info('GET', [Db, <<"_changes">>], _) -> + {'db.changes.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_changes">>], _) -> + {'db.changes.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_compact">>], _) -> + {'db.compact.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.read', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('POST', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.write', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('PUT', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.write', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('COPY', [Db, <<"_design">>, Name], Req) -> + {'db.design.doc.write', #{ + 'db.name' => Db, + 'design.id' => get_copy_destination(Req), + 'copy.source.doc.id' => <<"_design/", Name/binary>> + }}; + +handler_info('DELETE', [Db, <<"_design">>, Name], _) -> + {'db.design.doc.delete', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info('GET', [Db, <<"_design">>, Name, <<"_info">>], _) -> + {'db.design.info.read', #{'db.name' => Db, 'design.id' => Name}}; + +handler_info(M, [Db, <<"_design">>, Name, <<"_list">>, List, View], _) + when M == 'GET'; M == 'POST', M == 'OPTIONS' -> + {'db.design.list.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.list.name' => List, + 'design.view.name' => View + }}; + +handler_info(M, [Db, <<"_design">>, Name, <<"_list">>, List, Design, View], _) + when M == 'GET'; M == 'POST', M == 'OPTIONS' -> + {'db.design.list.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.list.name' => List, + 'design.view.source.id' => Design, + 'design.view.name' => View + }}; + +handler_info(_, [Db, <<"_design">>, Name, <<"_rewrite">> | Path], _) -> + {'db.design.rewrite.execute', #{ + 'db.name' => Db, + 'design.id' => Name, + 'rewrite.path' => filename:join(Path) + }}; + +handler_info(_, [Db, <<"_design">>, Name, <<"_show">>, Show, DocId], _) -> + {'db.design.show.execute', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.show.name' => Show, + 'design.show.doc.id' => DocId + }}; + +handler_info(_, [Db, <<"_design">>, Name, <<"_update">>, Update | Rest], _) -> + BaseTags = #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.update.name' => Update + }, + Tags = case Rest of + [] -> + BaseTags; + _ -> + DocId = filename:join(Rest), + maps:put('design.update.doc.id', DocId, BaseTags) + end, + {'db.design.update.execute', Tags}; + +handler_info('POST', [Db, <<"_design">>, Name, <<"_view">>, View, <<"queries">>], _) -> + {'db.design.view.multi.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.view.name' => View + }}; + +handler_info(M, [Db, <<"_design">>, Name, <<"_view">>, View], _) + when M == 'GET'; M == 'POST' -> + {'db.design.view.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'design.view.name' => View + }}; + +handler_info(_, [_Db, <<"_design">>, _Name, <<"_", _/binary>> | _], _) -> + % Bail here so that we don't treat a plugin + % design handler in place of a design attachment + no_match; + +handler_info('GET', [Db, <<"_design">>, Name | Path], _) -> + {'db.design.doc.attachment.read', #{ + 'db.name' => Db, + 'design.id' => Name, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('PUT', [Db, <<"_design">>, Name | Path], _) -> + {'db.design.doc.attachment.write', #{ + 'db.name' => Db, + 'design.id' => Name, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('DELETE', [Db, <<"_design">>, Name | Path], _) -> + {'db.design.doc.attachment.delete', #{ + 'db.name' => Db, + 'design.id' => Name, + 'attachment.name' => filename:join(Path) + }}; + +handler_info(_, [Db, <<"_design/", Name/binary>> | Rest], Req) -> + % Recurse if someone sent us `_design%2Fname` + chttpd_handlers:handler_info(Req#httpd{ + path_parts = [Db, <<"_design">>, Name | Rest] + }); + +handler_info(M, [Db, <<"_design_docs">>], _) when M == 'GET'; M == 'POST' -> + {'db.design_docs.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_design_docs">>, <<"queries">>], _) -> + {'db.design_docs.read', #{'db.name' => Db, multi => true}}; + +handler_info('POST', [Db, <<"_ensure_full_commit">>], _) -> + {'db.ensure_full_commit.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.read', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info('POST', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.write', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info('PUT', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.write', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info('COPY', [Db, <<"_local">>, Name], Req) -> + {'db.local.doc.write', #{ + 'db.name' => Db, + 'local.id' => get_copy_destination(Req), + 'copy.source.doc.id' => <<"_local/", Name/binary>> + }}; + +handler_info('DELETE', [Db, <<"_local">>, Name], _) -> + {'db.local.doc.delete', #{'db.name' => Db, 'local.id' => Name}}; + +handler_info(_, [Db, <<"_local">>, Name | _Path], _) -> + {'db.local.doc.invalid_attachment_req', #{ + 'db.name' => Db, + 'local.id' => Name + }}; + +handler_info(M, [Db, <<"_local_docs">>], _) when M == 'GET'; M == 'POST' -> + {'db.local_docs.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_local_docs">>, <<"queries">>], _) -> + {'db.local_docs.read', #{'db.name' => Db, multi => true}}; + +handler_info('POST', [Db, <<"_missing_revs">>], _) -> + {'db.docs.missing_revs.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_partition">>, Partition], _) -> + {'db.partition.info.read', #{'db.name' => Db, partition => Partition}}; + +handler_info(_, [Db, <<"_partition">>, Partition | Rest], Req) -> + NewPath = case Rest of + [<<"_all_docs">> | _] -> + [Db | Rest]; + [<<"_index">> | _] -> + [Db | Rest]; + [<<"_find">> | _] -> + [Db | Rest]; + [<<"_explain">> | _] -> + [Db | Rest]; + [<<"_design">>, _Name, <<"_", _/binary>> | _] -> + [Db | Rest]; + _ -> + no_match + end, + if NewPath == no_match -> no_match; true -> + {OpName, Tags} = chttpd_handlers:handler_info(Req#httpd{ + path_parts = NewPath + }), + NewOpName = case atom_to_list(OpName) of + "db." ++ Name -> list_to_atom("db.partition." ++ Name); + Else -> list_to_atom(Else ++ ".partition") + end, + {NewOpName, maps:put(partition, Partition, Tags)} + end; + +handler_info('POST', [Db, <<"_purge">>], _) -> + {'db.docs.purge', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_purged_infos_limit">>], _) -> + {'db.purged_infos_limit.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db, <<"_purged_infos_limit">>], _) -> + {'db.purged_infos_limit.write', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_revs_diff">>], _) -> + {'db.docs.revs_diff.execute', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_revs_limit">>], _) -> + {'db.revs_limit.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db, <<"_revs_limit">>], _) -> + {'db.revs_limit.write', #{'db.name' => Db}}; + +handler_info('GET', [Db, <<"_security">>], _) -> + {'db.security.read', #{'db.name' => Db}}; + +handler_info('PUT', [Db, <<"_security">>], _) -> + {'db.security.write', #{'db.name' => Db}}; + +handler_info(_, [Db, <<"_view_cleanup">>], _) -> + {'views.cleanup.execute', #{'db.name' => Db}}; + +handler_info(_, [_Db, <<"_", _/binary>> | _], _) -> + % Bail here for other possible db_handleres + no_match; + +handler_info('GET', [Db, DocId], _) -> + {'db.doc.read', #{'db.name' => Db, 'doc.id' => DocId}}; + +handler_info('POST', [Db, DocId], _) -> + {'db.doc.write', #{'db.name' => Db, 'design.id' => DocId}}; + +handler_info('PUT', [Db, DocId], _) -> + {'db.doc.write', #{'db.name' => Db, 'design.id' => DocId}}; + +handler_info('COPY', [Db, DocId], Req) -> + {'db.doc.write', #{ + 'db.name' => Db, + 'doc.id' => get_copy_destination(Req), + 'copy.source.doc.id' => DocId + }}; + +handler_info('DELETE', [Db, DocId], _) -> + {'db.doc.delete', #{'db.name' => Db, 'doc.id' => DocId}}; + +handler_info('GET', [Db, DocId | Path], _) -> + {'db.doc.attachment.read', #{ + 'db.name' => Db, + 'doc.id' => DocId, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('PUT', [Db, DocId | Path], _) -> + {'db.doc.attachment.write', #{ + 'db.name' => Db, + 'doc.id' => DocId, + 'attachment.name' => filename:join(Path) + }}; + +handler_info('DELETE', [Db, DocId | Path], _) -> + {'db.doc.attachment.delete', #{ + 'db.name' => Db, + 'doc.id' => DocId, + 'attachment.name' => filename:join(Path) + }}; + +handler_info(_, _, _) -> + no_match. + + +get_copy_destination(Req) -> + try + {DocIdStr, _} = couch_httpd_db:parse_copy_destination_header(Req), + list_to_binary(mochiweb_util:unquote(DocIdStr)) + catch _:_ -> + unknown + end. + + +not_supported(#httpd{} = Req, Db, _DDoc) -> + not_supported(Req, Db). + + +not_supported(#httpd{} = Req, _Db) -> + Msg = <<"resource is not supported in CouchDB >= 4.x">>, + chttpd:send_error(Req, 410, gone, Msg). + + +not_implemented(#httpd{} = Req, _Db) -> + Msg = <<"resource is not implemented">>, + chttpd:send_error(Req, 501, not_implemented, Msg). diff --git a/src/chttpd/src/chttpd_misc.erl b/src/chttpd/src/chttpd_misc.erl index ffb5295b5..5cfd0f7cb 100644 --- a/src/chttpd/src/chttpd_misc.erl +++ b/src/chttpd/src/chttpd_misc.erl @@ -15,6 +15,7 @@ -export([ handle_all_dbs_req/1, handle_dbs_info_req/1, + handle_deleted_dbs_req/1, handle_favicon_req/1, handle_favicon_req/2, handle_replicate_req/1, @@ -105,7 +106,7 @@ handle_utils_dir_req(Req, _) -> send_method_not_allowed(Req, "GET,HEAD"). maybe_add_csp_headers(Headers, "true") -> - DefaultValues = "default-src 'self'; img-src 'self' data:; font-src 'self'; " + DefaultValues = "child-src 'self' data: blob:; default-src 'self'; img-src 'self' data:; font-src 'self'; " "script-src 'self' 'unsafe-eval'; style-src 'self' 'unsafe-inline';", Value = config:get("csp", "header_value", DefaultValues), [{"Content-Security-Policy", Value} | Headers]; @@ -113,22 +114,27 @@ maybe_add_csp_headers(Headers, _) -> Headers. handle_all_dbs_req(#httpd{method='GET'}=Req) -> - Args = couch_mrview_http:parse_params(Req, undefined), - ShardDbName = config:get("mem3", "shards_db", "_dbs"), - %% shard_db is not sharded but mem3:shards treats it as an edge case - %% so it can be pushed thru fabric - {ok, Info} = fabric:get_db_info(ShardDbName), - Etag = couch_httpd:make_etag({Info}), - Options = [{user_ctx, Req#httpd.user_ctx}], - {ok, Resp} = chttpd:etag_respond(Req, Etag, fun() -> - {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, [{"ETag",Etag}]), - VAcc = #vacc{req=Req,resp=Resp}, - fabric:all_docs(ShardDbName, Options, fun all_dbs_callback/2, VAcc, Args) - end), - case is_record(Resp, vacc) of - true -> {ok, Resp#vacc.resp}; - _ -> {ok, Resp} - end; + #mrargs{ + start_key = StartKey, + end_key = EndKey, + direction = Dir, + limit = Limit, + skip = Skip + } = couch_mrview_http:parse_params(Req, undefined), + + Options = [ + {start_key, StartKey}, + {end_key, EndKey}, + {dir, Dir}, + {limit, Limit}, + {skip, Skip} + ], + + {ok, Resp} = chttpd:start_delayed_json_response(Req, 200, []), + Callback = fun all_dbs_callback/2, + Acc = #vacc{req=Req,resp=Resp}, + {ok, Acc1} = fabric2_db:list_dbs(Callback, Acc, Options), + {ok, Acc1#vacc.resp}; handle_all_dbs_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). @@ -137,12 +143,9 @@ all_dbs_callback({meta, _Meta}, #vacc{resp=Resp0}=Acc) -> {ok, Acc#vacc{resp=Resp1}}; all_dbs_callback({row, Row}, #vacc{resp=Resp0}=Acc) -> Prepend = couch_mrview_http:prepend_val(Acc), - case couch_util:get_value(id, Row) of <<"_design", _/binary>> -> - {ok, Acc}; - DbName -> - {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, [Prepend, ?JSON_ENCODE(DbName)]), - {ok, Acc#vacc{prepend=",", resp=Resp1}} - end; + DbName = couch_util:get_value(id, Row), + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, [Prepend, ?JSON_ENCODE(DbName)]), + {ok, Acc#vacc{prepend=",", resp=Resp1}}; all_dbs_callback(complete, #vacc{resp=Resp0}=Acc) -> {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), @@ -151,7 +154,10 @@ all_dbs_callback({error, Reason}, #vacc{resp=Resp0}=Acc) -> {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), {ok, Acc#vacc{resp=Resp1}}. -handle_dbs_info_req(#httpd{method='POST'}=Req) -> +handle_dbs_info_req(#httpd{method = 'GET'} = Req) -> + ok = chttpd:verify_is_server_admin(Req), + send_db_infos(Req, list_dbs_info); +handle_dbs_info_req(#httpd{method='POST', user_ctx=UserCtx}=Req) -> chttpd:validate_ctype(Req, "application/json"), Props = chttpd:json_body_obj(Req), Keys = couch_mrview_util:get_view_keys(Props), @@ -168,46 +174,150 @@ handle_dbs_info_req(#httpd{method='POST'}=Req) -> {ok, Resp} = chttpd:start_json_response(Req, 200), send_chunk(Resp, "["), lists:foldl(fun(DbName, AccSeparator) -> - case catch fabric:get_db_info(DbName) of - {ok, Result} -> - Json = ?JSON_ENCODE({[{key, DbName}, {info, {Result}}]}), - send_chunk(Resp, AccSeparator ++ Json); - _ -> - Json = ?JSON_ENCODE({[{key, DbName}, {error, not_found}]}), - send_chunk(Resp, AccSeparator ++ Json) + try + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + {ok, Info} = fabric2_db:get_db_info(Db), + Json = ?JSON_ENCODE({[{key, DbName}, {info, {Info}}]}), + send_chunk(Resp, AccSeparator ++ Json) + catch error:database_does_not_exist -> + ErrJson = ?JSON_ENCODE({[{key, DbName}, {error, not_found}]}), + send_chunk(Resp, AccSeparator ++ ErrJson) end, "," % AccSeparator now has a comma end, "", Keys), send_chunk(Resp, "]"), chttpd:end_json_response(Resp); handle_dbs_info_req(Req) -> - send_method_not_allowed(Req, "POST"). + send_method_not_allowed(Req, "GET,HEAD,POST"). + +handle_deleted_dbs_req(#httpd{method='GET', path_parts=[_]}=Req) -> + ok = chttpd:verify_is_server_admin(Req), + send_db_infos(Req, list_deleted_dbs_info); +handle_deleted_dbs_req(#httpd{method='POST', user_ctx=Ctx, path_parts=[_]}=Req) -> + couch_httpd:verify_is_server_admin(Req), + chttpd:validate_ctype(Req, "application/json"), + GetJSON = fun(Key, Props, Default) -> + case couch_util:get_value(Key, Props) of + undefined when Default == error -> + Fmt = "POST body must include `~s` parameter.", + Msg = io_lib:format(Fmt, [Key]), + throw({bad_request, iolist_to_binary(Msg)}); + undefined -> + Default; + Value -> + Value + end + end, + {BodyProps} = chttpd:json_body_obj(Req), + {UndeleteProps} = GetJSON(<<"undelete">>, BodyProps, error), + DbName = GetJSON(<<"source">>, UndeleteProps, error), + TimeStamp = GetJSON(<<"timestamp">>, UndeleteProps, error), + TgtDbName = GetJSON(<<"target">>, UndeleteProps, DbName), + case fabric2_db:undelete(DbName, TgtDbName, TimeStamp, [{user_ctx, Ctx}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + {error, file_exists} -> + chttpd:send_error(Req, file_exists); + {error, not_found} -> + chttpd:send_error(Req, not_found); + Error -> + throw(Error) + end; +handle_deleted_dbs_req(#httpd{path_parts = PP}=Req) when length(PP) == 1 -> + send_method_not_allowed(Req, "GET,HEAD,POST"); +handle_deleted_dbs_req(#httpd{method='DELETE', user_ctx=Ctx, path_parts=[_, DbName]}=Req) -> + couch_httpd:verify_is_server_admin(Req), + TS = case ?JSON_DECODE(couch_httpd:qs_value(Req, "timestamp", "null")) of + null -> + throw({bad_request, "`timestamp` parameter is not provided."}); + TS0 -> + TS0 + end, + case fabric2_db:delete(DbName, [{user_ctx, Ctx}, {deleted_at, TS}]) of + ok -> + send_json(Req, 200, {[{ok, true}]}); + {error, not_found} -> + chttpd:send_error(Req, not_found); + Error -> + throw(Error) + end; +handle_deleted_dbs_req(#httpd{path_parts = PP}=Req) when length(PP) == 2 -> + send_method_not_allowed(Req, "HEAD,DELETE"); +handle_deleted_dbs_req(Req) -> + chttpd:send_error(Req, not_found). + +send_db_infos(Req, ListFunctionName) -> + #mrargs{ + start_key = StartKey, + end_key = EndKey, + direction = Dir, + limit = Limit, + skip = Skip + } = couch_mrview_http:parse_params(Req, undefined), + + Options = [ + {start_key, StartKey}, + {end_key, EndKey}, + {dir, Dir}, + {limit, Limit}, + {skip, Skip} + ], + + % TODO: Figure out if we can't calculate a valid + % ETag for this request. \xFFmetadataVersion won't + % work as we don't bump versions on size changes + + {ok, Resp1} = chttpd:start_delayed_json_response(Req, 200, []), + Callback = fun dbs_info_callback/2, + Acc = #vacc{req = Req, resp = Resp1}, + {ok, Resp2} = fabric2_db:ListFunctionName(Callback, Acc, Options), + case is_record(Resp2, vacc) of + true -> {ok, Resp2#vacc.resp}; + _ -> {ok, Resp2} + end. + +dbs_info_callback({meta, _Meta}, #vacc{resp = Resp0} = Acc) -> + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "["), + {ok, Acc#vacc{resp = Resp1}}; +dbs_info_callback({row, Props}, #vacc{resp = Resp0} = Acc) -> + Prepend = couch_mrview_http:prepend_val(Acc), + Chunk = [Prepend, ?JSON_ENCODE({Props})], + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, Chunk), + {ok, Acc#vacc{prepend = ",", resp = Resp1}}; +dbs_info_callback(complete, #vacc{resp = Resp0} = Acc) -> + {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, "]"), + {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), + {ok, Acc#vacc{resp = Resp2}}; +dbs_info_callback({error, Reason}, #vacc{resp = Resp0} = Acc) -> + {ok, Resp1} = chttpd:send_delayed_error(Resp0, Reason), + {ok, Acc#vacc{resp = Resp1}}. handle_task_status_req(#httpd{method='GET'}=Req) -> ok = chttpd:verify_is_server_admin(Req), - {Replies, _BadNodes} = gen_server:multi_call(couch_task_status, all), - Response = lists:flatmap(fun({Node, Tasks}) -> - [{[{node,Node} | Task]} || Task <- Tasks] - end, Replies), - send_json(Req, lists:sort(Response)); + ActiveTasks = fabric2_active_tasks:get_active_tasks(), + send_json(Req, ActiveTasks); handle_task_status_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). -handle_replicate_req(#httpd{method='POST', user_ctx=Ctx} = Req) -> +handle_replicate_req(#httpd{method='POST', user_ctx=Ctx, req_body=PostBody} = Req) -> chttpd:validate_ctype(Req, "application/json"), %% see HACK in chttpd.erl about replication - PostBody = get(post_body), - case replicate(PostBody, Ctx) of + case couch_replicator:replicate(PostBody, Ctx) of {ok, {continuous, RepId}} -> send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); {ok, {cancelled, RepId}} -> send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); - {ok, {JsonResults}} -> - send_json(Req, {[{ok, true} | JsonResults]}); + {ok, #{} = JsonResults} -> + send_json(Req, maps:merge(#{<<"ok">> => true}, JsonResults)); {ok, stopped} -> send_json(Req, 200, {[{ok, stopped}]}); {error, not_found=Error} -> chttpd:send_error(Req, Error); + {error, #{<<"error">> := Err, <<"reason">> := Reason}} when + is_binary(Err), is_binary(Reason) -> + % Safe to use binary_to_atom since this is only built + % from couch_replicator_jobs:error_info/1 + chttpd:send_error(Req, {binary_to_atom(Err, utf8), Reason}); {error, {_, _}=Error} -> chttpd:send_error(Req, Error); {_, _}=Error -> @@ -216,50 +326,6 @@ handle_replicate_req(#httpd{method='POST', user_ctx=Ctx} = Req) -> handle_replicate_req(Req) -> send_method_not_allowed(Req, "POST"). -replicate({Props} = PostBody, Ctx) -> - case couch_util:get_value(<<"cancel">>, Props) of - true -> - cancel_replication(PostBody, Ctx); - _ -> - Node = choose_node([ - couch_util:get_value(<<"source">>, Props), - couch_util:get_value(<<"target">>, Props) - ]), - case rpc:call(Node, couch_replicator, replicate, [PostBody, Ctx]) of - {badrpc, Reason} -> - erlang:error(Reason); - Res -> - Res - end - end. - -cancel_replication(PostBody, Ctx) -> - {Res, _Bad} = rpc:multicall(couch_replicator, replicate, [PostBody, Ctx]), - case [X || {ok, {cancelled, _}} = X <- Res] of - [Success|_] -> - % Report success if at least one node canceled the replication - Success; - [] -> - case lists:usort(Res) of - [UniqueReply] -> - % Report a universally agreed-upon reply - UniqueReply; - [] -> - {error, badrpc}; - Else -> - % Unclear what to do here -- pick the first error? - % Except try ignoring any {error, not_found} responses - % because we'll always get two of those - hd(Else -- [{error, not_found}]) - end - end. - -choose_node(Key) when is_binary(Key) -> - Checksum = erlang:crc32(Key), - Nodes = lists:sort([node()|erlang:nodes()]), - lists:nth(1 + Checksum rem length(Nodes), Nodes); -choose_node(Key) -> - choose_node(term_to_binary(Key)). handle_reload_query_servers_req(#httpd{method='POST'}=Req) -> chttpd:validate_ctype(Req, "application/json"), @@ -279,12 +345,11 @@ handle_up_req(#httpd{method='GET'} = Req) -> "nolb" -> send_json(Req, 404, {[{status, nolb}]}); _ -> - {ok, {Status}} = mem3_seeds:get_status(), - case couch_util:get_value(status, Status) of - ok -> - send_json(Req, 200, {Status}); - seeding -> - send_json(Req, 404, {Status}) + try + fabric2_db:list_dbs([{limit, 0}]), + send_json(Req, 200, {[{status, ok}]}) + catch error:{timeout, _} -> + send_json(Req, 404, {[{status, backend_unavailable}]}) end end; diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl index 033abd68d..0159672f5 100644 --- a/src/chttpd/src/chttpd_node.erl +++ b/src/chttpd/src/chttpd_node.erl @@ -15,7 +15,8 @@ -export([ handle_node_req/1, - get_stats/0 + get_stats/0, + run_queues/0 ]). -include_lib("couch/include/couch_db.hrl"). @@ -70,7 +71,9 @@ handle_node_req(#httpd{method='PUT', path_parts=[_, Node, <<"_config">>, Section Value = couch_util:trim(chttpd:json_body(Req)), Persist = chttpd:header_value(Req, "X-Couch-Persist") /= "false", OldValue = call_node(Node, config, get, [Section, Key, ""]), - case call_node(Node, config, set, [Section, Key, ?b2l(Value), Persist]) of + IsSensitive = Section == <<"admins">>, + Opts = #{persisit => Persist, sensitive => IsSensitive}, + case call_node(Node, config, set, [Section, Key, ?b2l(Value), Opts]) of ok -> send_json(Req, 200, list_to_binary(OldValue)); {error, Reason} -> @@ -210,10 +213,12 @@ get_stats() -> {CF, CDU} = db_pid_stats(), MessageQueues0 = [{couch_file, {CF}}, {couch_db_updater, {CDU}}], MessageQueues = MessageQueues0 ++ message_queues(registered()), + {SQ, DCQ} = run_queues(), [ {uptime, couch_app:uptime() div 1000}, {memory, {Memory}}, - {run_queue, statistics(run_queue)}, + {run_queue, SQ}, + {run_queue_dirty_cpu, DCQ}, {ets_table_count, length(ets:all())}, {context_switches, element(1, statistics(context_switches))}, {reductions, element(1, statistics(reductions))}, @@ -285,3 +290,13 @@ message_queues(Registered) -> {Type, Length} = process_info(whereis(Name), Type), {Name, Length} end, Registered). + +%% Workaround for https://bugs.erlang.org/browse/ERL-1355 +run_queues() -> + case erlang:system_info(dirty_cpu_schedulers) > 0 of + false -> + {statistics(run_queue), 0}; + true -> + [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)), + {lists:sum(SQs), DCQ} + end. diff --git a/src/chttpd/src/chttpd_rewrite.erl b/src/chttpd/src/chttpd_rewrite.erl index 019651374..1c2c1f333 100644 --- a/src/chttpd/src/chttpd_rewrite.erl +++ b/src/chttpd/src/chttpd_rewrite.erl @@ -71,8 +71,9 @@ do_rewrite(#httpd{mochi_req=MochiReq}=Req, {Props}=Rewrite) when is_list(Props) undefined -> erlang:get(mochiweb_request_body); B -> B end, + NewMochiReq:cleanup(), case Body of - undefined -> NewMochiReq:cleanup(); + undefined -> []; _ -> erlang:put(mochiweb_request_body, Body) end, couch_log:debug("rewrite to ~p", [Path]), diff --git a/src/chttpd/src/chttpd_show.erl b/src/chttpd/src/chttpd_show.erl index a6d0368b9..8a15bdcbe 100644 --- a/src/chttpd/src/chttpd_show.erl +++ b/src/chttpd/src/chttpd_show.erl @@ -123,15 +123,14 @@ send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> JsonReq = chttpd_external:json_req_obj(Req, Db, DocId), JsonDoc = couch_query_servers:json_doc(Doc), Cmd = [<<"updates">>, UpdateName], - W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), UpdateResp = couch_query_servers:ddoc_prompt(DDoc, Cmd, [JsonDoc, JsonReq]), JsonResp = case UpdateResp of [<<"up">>, {NewJsonDoc}, {JsonResp0}] -> case chttpd:header_value(Req, "X-Couch-Full-Commit", "false") of "true" -> - Options = [full_commit, {user_ctx, Req#httpd.user_ctx}, {w, W}]; + Options = [full_commit, {user_ctx, Req#httpd.user_ctx}]; _ -> - Options = [{user_ctx, Req#httpd.user_ctx}, {w, W}] + Options = [{user_ctx, Req#httpd.user_ctx}] end, NewDoc = couch_db:doc_from_json_obj_validate(Db, {NewJsonDoc}), couch_doc:validate_docid(NewDoc#doc.id), diff --git a/src/chttpd/src/chttpd_stats.erl b/src/chttpd/src/chttpd_stats.erl index 59ec9268d..27e9c3180 100644 --- a/src/chttpd/src/chttpd_stats.erl +++ b/src/chttpd/src/chttpd_stats.erl @@ -14,8 +14,8 @@ -export([ - init/0, - report/2, + init/1, + report/1, incr_reads/0, incr_reads/1, @@ -24,29 +24,40 @@ incr_writes/1, incr_rows/0, - incr_rows/1 + incr_rows/1, + + update_interval/1 ]). -record(st, { reads = 0, writes = 0, - rows = 0 + rows = 0, + reporter, + last_report_ts = 0, + interval, + request }). -define(KEY, chttpd_stats). +-define(INTERVAL_IN_SEC, 60). - -init() -> - put(?KEY, #st{}). +init(Request) -> + Reporter = config:get("chttpd", "stats_reporter"), + Time = erlang:monotonic_time(second), + Interval = config:get_integer("chttpd", "stats_reporting_interval", + ?INTERVAL_IN_SEC), + put(?KEY, #st{reporter = Reporter, last_report_ts = Time, + interval = Interval, request = Request}). -report(HttpReq, HttpResp) -> +report(HttpResp) -> try case get(?KEY) of #st{} = St -> - report(HttpReq, HttpResp, St); + report(HttpResp, St); _ -> ok end @@ -57,19 +68,18 @@ report(HttpReq, HttpResp) -> end. -report(HttpReq, HttpResp, St) -> - case config:get("chttpd", "stats_reporter") of - undefined -> - ok; - ModStr -> - Mod = list_to_existing_atom(ModStr), - #st{ - reads = Reads, - writes = Writes, - rows = Rows - } = St, - Mod:report(HttpReq, HttpResp, Reads, Writes, Rows) - end. +report(HttpResp, #st{reporter = undefined}) -> + ok; + +report(HttpResp, #st{reporter = Reporter} = St) -> + Mod = list_to_existing_atom(Reporter), + #st{ + reads = Reads, + writes = Writes, + rows = Rows, + request = HttpReq + } = St, + Mod:report(HttpReq, HttpResp, Reads, Writes, Rows). incr_reads() -> @@ -101,7 +111,47 @@ incr(Idx, Count) -> #st{} = St -> Total = element(Idx, St) + Count, NewSt = setelement(Idx, St, Total), - put(?KEY, NewSt); + put(?KEY, NewSt), + maybe_report_intermittent(St); + _ -> + ok + end. + + +maybe_report_intermittent(State) -> + #st{last_report_ts = LastTime, interval = Interval} = State, + CurrentTime = erlang:monotonic_time(second), + case CurrentTime - LastTime of + Change when Change >= Interval -> + % Since response is not available during the request, we set + % this undefined. Modules that call: + % Mod:report(HttpReq, HttpResp, Reads, Writes, Rows) should + % be aware of this. Mod:report should also return a boolean + % to indicate if reset should occur + case ?MODULE:report(undefined) of + true -> + reset_stats(State, CurrentTime); + _ -> + ok + end; _ -> ok end. + + +update_interval(Interval) -> + case get(?KEY) of + #st{} = St -> + put(?KEY, St#st{interval = Interval}); + _ -> + ok + end. + + +reset_stats(State, NewTime) -> + put(?KEY, State#st{ + reads = 0, + writes = 0, + rows = 0, + last_report_ts = NewTime + }). diff --git a/src/chttpd/src/chttpd_sup.erl b/src/chttpd/src/chttpd_sup.erl index d4bdb118c..8b51e6c40 100644 --- a/src/chttpd/src/chttpd_sup.erl +++ b/src/chttpd/src/chttpd_sup.erl @@ -18,17 +18,25 @@ -export([init/1]). --export([start_link/1]). +-export([start_link/0]). -export([handle_config_change/5, handle_config_terminate/3]). %% Helper macro for declaring children of supervisor -define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 100, Type, [I]}). -start_link(Args) -> - supervisor:start_link({local,?MODULE}, ?MODULE, Args). +start_link() -> + Arg = case fabric2_node_types:is_type(api_frontend) of + true -> normal; + false -> disabled + end, + supervisor:start_link({local,?MODULE}, ?MODULE, Arg). -init([]) -> +init(disabled) -> + couch_log:notice("~p : api_frontend disabled", [?MODULE]), + {ok, {{one_for_one, 3, 10}, []}}; + +init(normal) -> Children = [ { config_listener_mon, diff --git a/src/chttpd/src/chttpd_test_util.erl b/src/chttpd/src/chttpd_test_util.erl index a1a08eff4..8930a5a5e 100644 --- a/src/chttpd/src/chttpd_test_util.erl +++ b/src/chttpd/src/chttpd_test_util.erl @@ -21,7 +21,7 @@ start_couch() -> start_couch(?CONFIG_CHAIN). start_couch(IniFiles) -> - test_util:start_couch(IniFiles, [chttpd]). + test_util:start_couch(IniFiles, [couch_js, couch_views, chttpd]). stop_couch(Ctx) -> test_util:stop_couch(Ctx). diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index f73a8b7b1..8d401013c 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -14,58 +14,120 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). --export([handle_view_req/3, handle_temp_view_req/2]). +-export([ + handle_view_req/3, + validate_args/2, + parse_queries/4, + view_cb/2 +]). + +-define(DEFAULT_ALL_DOCS_PAGE_SIZE, 2000). +-define(DEFAULT_VIEWS_PAGE_SIZE, 2000). multi_query_view(Req, Db, DDoc, ViewName, Queries) -> - Args0 = couch_mrview_http:parse_params(Req, undefined), + Args = couch_views_http:parse_params(Req, undefined), + case couch_views_util:is_paginated(Args) of + false -> + stream_multi_query_view(Req, Db, DDoc, ViewName, Args, Queries); + true -> + paginate_multi_query_view(Req, Db, DDoc, ViewName, Args, Queries) + end. + + +stream_multi_query_view(Req, Db, DDoc, ViewName, Args0, Queries) -> {ok, #mrst{views=Views}} = couch_mrview_util:ddoc_to_mrst(Db, DDoc), Args1 = couch_mrview_util:set_view_type(Args0, ViewName, Views), - ArgQueries = lists:map(fun({Query}) -> - QueryArg = couch_mrview_http:parse_params(Query, undefined, - Args1, [decoded]), - QueryArg1 = couch_mrview_util:set_view_type(QueryArg, ViewName, Views), - fabric_util:validate_args(Db, DDoc, QueryArg1) - end, Queries), - Options = [{user_ctx, Req#httpd.user_ctx}], + ArgQueries = parse_queries(Req, Args1, Queries, fun(QueryArg) -> + couch_mrview_util:set_view_type(QueryArg, ViewName, Views) + end), VAcc0 = #vacc{db=Db, req=Req, prepend="\r\n"}, FirstChunk = "{\"results\":[", {ok, Resp0} = chttpd:start_delayed_json_response(VAcc0#vacc.req, 200, [], FirstChunk), VAcc1 = VAcc0#vacc{resp=Resp0}, VAcc2 = lists:foldl(fun(Args, Acc0) -> - {ok, Acc1} = fabric:query_view(Db, Options, DDoc, ViewName, - fun view_cb/2, Acc0, Args), + Fun = fun view_cb/2, + {ok, Acc1} = couch_views:query(Db, DDoc, ViewName, Fun, Acc0, Args), Acc1 end, VAcc1, ArgQueries), {ok, Resp1} = chttpd:send_delayed_chunk(VAcc2#vacc.resp, "\r\n]}"), chttpd:end_delayed_json_response(Resp1). + +paginate_multi_query_view(Req, Db, DDoc, ViewName, Args0, Queries) -> + {ok, #mrst{views=Views}} = couch_mrview_util:ddoc_to_mrst(Db, DDoc), + ArgQueries = parse_queries(Req, Args0, Queries, fun(QueryArg) -> + couch_mrview_util:set_view_type(QueryArg, ViewName, Views) + end), + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), couch_util:get_value(key, Props)} + end, + #mrargs{page_size = PageSize} = Args0, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + EtagTerm = {Parts, UpdateSeq, Args0}, + Response = couch_views_http:paginated( + Req, EtagTerm, PageSize, ArgQueries, KeyFun, + fun(Args) -> + {ok, #vacc{meta=MetaMap, buffer=Items}} = couch_views:query( + Db, DDoc, ViewName, fun view_cb/2, #vacc{paginated=true}, Args), + {MetaMap, Items} + end), + chttpd:send_json(Req, Response). + + design_doc_post_view(Req, Props, Db, DDoc, ViewName, Keys) -> Args = couch_mrview_http:parse_body_and_query(Req, Props, Keys), fabric_query_view(Db, Req, DDoc, ViewName, Args). design_doc_view(Req, Db, DDoc, ViewName, Keys) -> - Args = couch_mrview_http:parse_params(Req, Keys), + Args = couch_views_http:parse_params(Req, Keys), fabric_query_view(Db, Req, DDoc, ViewName, Args). + fabric_query_view(Db, Req, DDoc, ViewName, Args) -> + case couch_views_util:is_paginated(Args) of + false -> + stream_fabric_query_view(Db, Req, DDoc, ViewName, Args); + true -> + paginate_fabric_query_view(Db, Req, DDoc, ViewName, Args) + end. + + +stream_fabric_query_view(Db, Req, DDoc, ViewName, Args) -> Max = chttpd:chunked_response_buffer_size(), + Fun = fun view_cb/2, VAcc = #vacc{db=Db, req=Req, threshold=Max}, - Options = [{user_ctx, Req#httpd.user_ctx}], - {ok, Resp} = fabric:query_view(Db, Options, DDoc, ViewName, - fun view_cb/2, VAcc, Args), + {ok, Resp} = couch_views:query(Db, DDoc, ViewName, Fun, VAcc, Args), {ok, Resp#vacc.resp}. +paginate_fabric_query_view(Db, Req, DDoc, ViewName, Args0) -> + KeyFun = fun({Props}) -> + {couch_util:get_value(id, Props), couch_util:get_value(key, Props)} + end, + #httpd{path_parts = Parts} = Req, + UpdateSeq = fabric2_db:get_update_seq(Db), + ETagTerm = {Parts, UpdateSeq, Args0}, + Response = couch_views_http:paginated( + Req, ETagTerm, Args0, KeyFun, + fun(Args) -> + VAcc0 = #vacc{paginated=true}, + {ok, VAcc1} = couch_views:query(Db, DDoc, ViewName, fun view_cb/2, VAcc0, Args), + #vacc{meta=Meta, buffer=Items} = VAcc1, + {Meta, Items} + end), + chttpd:send_json(Req, Response). + view_cb({row, Row} = Msg, Acc) -> case lists:keymember(doc, 1, Row) of true -> chttpd_stats:incr_reads(); false -> ok end, chttpd_stats:incr_rows(), - couch_mrview_http:view_cb(Msg, Acc); + couch_views_http:view_cb(Msg, Acc); view_cb(Msg, Acc) -> - couch_mrview_http:view_cb(Msg, Acc). + couch_views_http:view_cb(Msg, Acc). handle_view_req(#httpd{method='POST', @@ -102,9 +164,6 @@ handle_view_req(#httpd{method='POST', handle_view_req(Req, _Db, _DDoc) -> chttpd:send_method_not_allowed(Req, "GET,POST,HEAD"). -handle_temp_view_req(Req, _Db) -> - Msg = <<"Temporary views are not supported in CouchDB">>, - chttpd:send_error(Req, 410, gone, Msg). % See https://github.com/apache/couchdb/issues/2168 assert_no_queries_param(undefined) -> @@ -116,6 +175,86 @@ assert_no_queries_param(_) -> }). +validate_args(Req, #mrargs{page_size = PageSize} = Args) when is_integer(PageSize) -> + MaxPageSize = max_page_size(Req), + couch_views_util:validate_args(Args, [{page_size, MaxPageSize}]); + +validate_args(_Req, #mrargs{} = Args) -> + couch_views_util:validate_args(Args, []). + + +max_page_size(#httpd{path_parts=[_Db, <<"_all_docs">>, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_all_docs/queries", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_all_docs">>]}) -> + config:get_integer( + "request_limits", "_all_docs", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_local_docs">>, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_all_docs/queries", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_local_docs">>]}) -> + config:get_integer( + "request_limits", "_all_docs", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_design_docs">>, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_all_docs/queries", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[_Db, <<"_design_docs">>]}) -> + config:get_integer( + "request_limits", "_all_docs", ?DEFAULT_ALL_DOCS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[ + _Db, <<"_design">>, _DDocName, <<"_view">>, _View, <<"queries">>]}) -> + config:get_integer( + "request_limits", "_view/queries", ?DEFAULT_VIEWS_PAGE_SIZE); + +max_page_size(#httpd{path_parts=[ + _Db, <<"_design">>, _DDocName, <<"_view">>, _View]}) -> + config:get_integer( + "request_limits", "_view", ?DEFAULT_VIEWS_PAGE_SIZE). + + +parse_queries(Req, #mrargs{page_size = PageSize} = Args0, Queries, Fun) + when is_integer(PageSize) -> + MaxPageSize = max_page_size(Req), + if length(Queries) < PageSize -> ok; true -> + throw({ + query_parse_error, + <<"Provided number of queries is more than given page_size">> + }) + end, + couch_views_util:validate_args(Fun(Args0), [{page_size, MaxPageSize}]), + Args = Args0#mrargs{page_size = undefined}, + lists:map(fun({Query}) -> + Args1 = couch_views_http:parse_params(Query, undefined, Args, [decoded]), + if not is_integer(Args1#mrargs.page_size) -> ok; true -> + throw({ + query_parse_error, + <<"You cannot specify `page_size` inside the query">> + }) + end, + Args2 = maybe_set_page_size(Args1, MaxPageSize), + couch_views_util:validate_args(Fun(Args2), [{page_size, MaxPageSize}]) + end, Queries); + +parse_queries(_Req, #mrargs{} = Args, Queries, Fun) -> + lists:map(fun({Query}) -> + Args1 = couch_views_http:parse_params(Query, undefined, Args, [decoded]), + couch_views_util:validate_args(Fun(Args1)) + end, Queries). + + +maybe_set_page_size(#mrargs{page_size = undefined} = Args, MaxPageSize) -> + Args#mrargs{page_size = MaxPageSize}; + +maybe_set_page_size(#mrargs{} = Args, _MaxPageSize) -> + Args. + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -141,7 +280,7 @@ check_multi_query_reduce_view_overrides_test_() -> t_check_include_docs_throw_validation_error() -> ?_test(begin Req = #httpd{qs = []}, - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, Query = {[{<<"include_docs">>, true}]}, Throw = {query_parse_error, <<"`include_docs` is invalid for reduce">>}, ?assertThrow(Throw, multi_query_view(Req, Db, ddoc, <<"v">>, [Query])) @@ -151,7 +290,7 @@ t_check_include_docs_throw_validation_error() -> t_check_user_can_override_individual_query_type() -> ?_test(begin Req = #httpd{qs = []}, - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, Query = {[{<<"include_docs">>, true}, {<<"reduce">>, false}]}, multi_query_view(Req, Db, ddoc, <<"v">>, [Query]), ?assertEqual(1, meck:num_calls(chttpd, start_delayed_json_response, '_')) @@ -162,7 +301,7 @@ setup_all() -> Views = [#mrview{reduce_funs = [{<<"v">>, <<"_count">>}]}], meck:expect(couch_mrview_util, ddoc_to_mrst, 2, {ok, #mrst{views = Views}}), meck:expect(chttpd, start_delayed_json_response, 4, {ok, resp}), - meck:expect(fabric, query_view, 7, {ok, #vacc{}}), + meck:expect(couch_views, query, 6, {ok, #vacc{}}), meck:expect(chttpd, send_delayed_chunk, 2, {ok, resp}), meck:expect(chttpd, end_delayed_json_response, 1, ok). @@ -174,8 +313,8 @@ teardown_all(_) -> setup() -> meck:reset([ chttpd, - couch_mrview_util, - fabric + couch_views, + couch_mrview_util ]). diff --git a/src/chttpd/test/eunit/chttpd_auth_tests.erl b/src/chttpd/test/eunit/chttpd_auth_tests.erl new file mode 100644 index 000000000..b4a8eabfb --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_auth_tests.erl @@ -0,0 +1,129 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_auth_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +setup() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + BaseUrl = lists:concat(["http://", Addr, ":", Port]), + BaseUrl. + +teardown(_Url) -> + ok. + + +require_valid_user_exception_test_() -> + { + "_up", + { + setup, + fun chttpd_test_util:start_couch/0, + fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun should_handle_require_valid_user_except_up_on_up_route/1, + fun should_handle_require_valid_user_except_up_on_non_up_routes/1 + ] + } + } + }. + +set_require_user_false() -> + ok = config:set("chttpd", "require_valid_user", "false", _Persist=false). + +set_require_user_true() -> + ok = config:set("chttpd", "require_valid_user", "true", _Persist=false). + +set_require_user_except_for_up_false() -> + ok = config:set("chttpd", "require_valid_user_except_for_up", "false", _Persist=false). + +set_require_user_except_for_up_true() -> + ok = config:set("chttpd", "require_valid_user_except_for_up", "true", _Persist=false). + +should_handle_require_valid_user_except_up_on_up_route(_Url) -> + ?_test(begin + % require_valid_user | require_valid_user_except_up | up needs auth + % 1 F | F | F + % 2 F | T | F + % 3 T | F | T + % 4 T | T | F + + UpRequest = #httpd{path_parts=[<<"_up">>]}, + % we use ?ADMIN_USER here because these tests run under admin party + % so this is equivalent to an unauthenticated request + ExpectAuth = {unauthorized, <<"Authentication required.">>}, + ExpectNoAuth = #httpd{user_ctx=?ADMIN_USER,path_parts=[<<"_up">>]}, + + % 1 + set_require_user_false(), + set_require_user_except_for_up_false(), + Result1 = chttpd_auth:party_mode_handler(UpRequest), + ?assertEqual(ExpectNoAuth, Result1), + + % 2 + set_require_user_false(), + set_require_user_except_for_up_true(), + Result2 = chttpd_auth:party_mode_handler(UpRequest), + ?assertEqual(ExpectNoAuth, Result2), + + % 3 + set_require_user_true(), + set_require_user_except_for_up_false(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(UpRequest)), + + % 4 + set_require_user_true(), + set_require_user_except_for_up_true(), + Result4 = chttpd_auth:party_mode_handler(UpRequest), + ?assertEqual(ExpectNoAuth, Result4) + + end). + +should_handle_require_valid_user_except_up_on_non_up_routes(_Url) -> + ?_test(begin + % require_valid_user | require_valid_user_except_up | everything not _up requires auth + % 5 F | F | F + % 6 F | T | T + % 7 T | F | T + % 8 T | T | T + + NonUpRequest = #httpd{path_parts=[<<"/">>]}, + ExpectAuth = {unauthorized, <<"Authentication required.">>}, + ExpectNoAuth = #httpd{user_ctx=?ADMIN_USER,path_parts=[<<"/">>]}, + % 5 + set_require_user_false(), + set_require_user_except_for_up_false(), + Result5 = chttpd_auth:party_mode_handler(NonUpRequest), + ?assertEqual(ExpectNoAuth, Result5), + + % 6 + set_require_user_false(), + set_require_user_except_for_up_true(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(NonUpRequest)), + + % 7 + set_require_user_true(), + set_require_user_except_for_up_false(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(NonUpRequest)), + + % 8 + set_require_user_true(), + set_require_user_except_for_up_true(), + ?assertThrow(ExpectAuth, chttpd_auth:party_mode_handler(NonUpRequest)) + end). diff --git a/src/chttpd/test/eunit/chttpd_csp_tests.erl b/src/chttpd/test/eunit/chttpd_csp_tests.erl index e86436254..b80e3fee6 100644 --- a/src/chttpd/test/eunit/chttpd_csp_tests.erl +++ b/src/chttpd/test/eunit/chttpd_csp_tests.erl @@ -56,7 +56,7 @@ should_not_return_any_csp_headers_when_disabled(Url) -> should_apply_default_policy(Url) -> ?_assertEqual( - "default-src 'self'; img-src 'self' data:; font-src 'self'; " + "child-src 'self' data: blob:; default-src 'self'; img-src 'self' data:; font-src 'self'; " "script-src 'self' 'unsafe-eval'; style-src 'self' 'unsafe-inline';", begin {ok, _, Headers, _} = test_request:get(Url), diff --git a/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl b/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl index 86a8eab1a..c0095d59d 100644 --- a/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_bulk_get_multipart_test.erl @@ -39,7 +39,7 @@ setup() -> couch_epi, couch_httpd, couch_stats, - fabric, + fabric2_db, mochireq ]), spawn_accumulator(). @@ -78,13 +78,13 @@ bulk_get_test_() -> should_require_docs_field(_) -> Req = fake_request({[{}]}), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, ?_assertThrow({bad_request, _}, chttpd_db:db_req(Req, Db)). should_not_accept_specific_query_params(_) -> Req = fake_request({[{<<"docs">>, []}]}), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, lists:map(fun (Param) -> {Param, ?_assertThrow({bad_request, _}, begin BadReq = Req#httpd{qs = [{Param, ""}]}, @@ -95,7 +95,7 @@ should_not_accept_specific_query_params(_) -> should_return_empty_results_on_no_docs(Pid) -> Req = fake_request({[{<<"docs">>, []}]}), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, chttpd_db:db_req(Req, Db), Results = get_results_from_response(Pid), ?_assertEqual([], Results). @@ -104,7 +104,7 @@ should_return_empty_results_on_no_docs(Pid) -> should_get_doc_with_all_revs(Pid) -> DocId = <<"docudoc">>, Req = fake_request(DocId), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, DocRevA = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-ABC">>}]}}, DocRevB = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-CDE">>}]}}, @@ -120,7 +120,7 @@ should_validate_doc_with_bad_id(Pid) -> DocId = <<"_docudoc">>, Req = fake_request(DocId), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), @@ -137,7 +137,7 @@ should_validate_doc_with_bad_rev(Pid) -> Rev = <<"revorev">>, Req = fake_request(DocId, Rev), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), @@ -154,7 +154,7 @@ should_validate_missing_doc(Pid) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, mock_open_revs([{1,<<"revorev">>}], {ok, []}), chttpd_db:db_req(Req, Db), @@ -172,7 +172,7 @@ should_validate_bad_atts_since(Pid) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev, <<"badattsince">>), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, mock_open_revs([{1,<<"revorev">>}], {ok, []}), chttpd_db:db_req(Req, Db), @@ -190,14 +190,13 @@ should_include_attachments_when_atts_since_specified(_) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev, [<<"1-abc">>]), - Db = test_util:fake_db([{name, <<"foo">>}]), + Db = #{name => <<"foo">>}, mock_open_revs([{1,<<"revorev">>}], {ok, []}), chttpd_db:db_req(Req, Db), - ?_assert(meck:called(fabric, open_revs, - ['_', DocId, [{1, <<"revorev">>}], - [{atts_since, [{1, <<"abc">>}]}, attachments, - {user_ctx, undefined}]])). + Options = [{atts_since, [{1, <<"abc">>}]}, attachments], + ?_assert(meck:called(fabric2_db, open_doc_revs, ['_', DocId, + [{1, <<"revorev">>}], Options])). %% helpers @@ -217,7 +216,7 @@ fake_request(DocId, Rev, AttsSince) -> mock_open_revs(RevsReq0, RevsResp) -> - ok = meck:expect(fabric, open_revs, + ok = meck:expect(fabric2_db, open_doc_revs, fun(_, _, RevsReq1, _) -> ?assertEqual(RevsReq0, RevsReq1), RevsResp @@ -259,7 +258,7 @@ mock(couch_stats) -> ok = meck:expect(couch_stats, update_gauge, fun(_, _) -> ok end), ok; mock(fabric) -> - ok = meck:new(fabric, [passthrough]), + ok = meck:new(fabric2_db, [passthrough]), ok; mock(config) -> ok = meck:new(config, [passthrough]), diff --git a/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl b/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl index 1a3411254..0e4778371 100644 --- a/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_bulk_get_test.erl @@ -99,7 +99,8 @@ should_get_doc_with_all_revs(Pid) -> DocRevB = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-CDE">>}]}}, mock_open_revs(all, {ok, [{ok, DocRevA}, {ok, DocRevB}]}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -119,7 +120,8 @@ should_validate_doc_with_bad_id(Pid) -> DocId = <<"_docudoc">>, Req = fake_request(DocId), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -142,7 +144,8 @@ should_validate_doc_with_bad_rev(Pid) -> Rev = <<"revorev">>, Req = fake_request(DocId, Rev), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -166,7 +169,8 @@ should_validate_missing_doc(Pid) -> Req = fake_request(DocId, Rev), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -190,7 +194,8 @@ should_validate_bad_atts_since(Pid) -> Req = fake_request(DocId, Rev, <<"badattsince">>), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -214,12 +219,13 @@ should_include_attachments_when_atts_since_specified(_) -> Req = fake_request(DocId, Rev, [<<"1-abc">>]), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), + Db = #{name => <<"foo">>}, + chttpd_db:db_req(Req, Db), + + Options = [{atts_since, [{1, <<"abc">>}]}, attachments], + ?_assert(meck:called(fabric2_db, open_doc_revs, ['_', DocId, + [{1, <<"revorev">>}], Options])). - ?_assert(meck:called(fabric, open_revs, - ['_', DocId, [{1, <<"revorev">>}], - [{atts_since, [{1, <<"abc">>}]}, attachments, - {user_ctx, undefined}]])). %% helpers @@ -239,7 +245,7 @@ fake_request(DocId, Rev, AttsSince) -> mock_open_revs(RevsReq0, RevsResp) -> - ok = meck:expect(fabric, open_revs, + ok = meck:expect(fabric2_db, open_doc_revs, fun(_, _, RevsReq1, _) -> ?assertEqual(RevsReq0, RevsReq1), RevsResp @@ -276,7 +282,7 @@ mock(couch_stats) -> ok = meck:expect(couch_stats, update_gauge, fun(_, _) -> ok end), ok; mock(fabric) -> - ok = meck:new(fabric, [passthrough]), + ok = meck:new(fabric2_db, [passthrough]), ok; mock(config) -> ok = meck:new(config, [passthrough]), diff --git a/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl b/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl index 88e2797a3..2826cda24 100644 --- a/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl +++ b/src/chttpd/test/eunit/chttpd_db_doc_size_tests.erl @@ -29,6 +29,8 @@ setup() -> Hashed = couch_passwords:hash_admin_password(?PASS), ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), ok = config:set("couchdb", "max_document_size", "50"), + ok = config:set("couchdb", "max_bulk_docs_count", "2"), + ok = config:set("couchdb", "max_bulk_get_count", "2"), TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), @@ -39,7 +41,10 @@ setup() -> teardown(Url) -> delete_db(Url), ok = config:delete("admins", ?USER, _Persist=false), - ok = config:delete("couchdb", "max_document_size"). + ok = config:delete("couchdb", "max_document_size"), + ok = config:delete("couchdb", "max_bulk_docs_count"), + ok = config:delete("couchdb", "max_bulk_get_count"), + ok. create_db(Url) -> {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), @@ -67,6 +72,8 @@ all_test_() -> fun post_single_doc/1, fun put_single_doc/1, fun bulk_doc/1, + fun bulk_docs_too_many_docs/1, + fun bulk_get_too_many_docs/1, fun put_post_doc_attach_inline/1, fun put_multi_part_related/1, fun post_multi_part_form/1 @@ -100,6 +107,41 @@ bulk_doc(Url) -> Expect = {[{<<"error">>,<<"document_too_large">>},{<<"reason">>,<<>>}]}, ?_assertEqual(Expect, ResultJson). + +bulk_docs_too_many_docs(Url) -> + Docs = "{\"docs\": [" + "{\"doc1\": \"{}\"}, " + "{\"doc2\": \"{}\"}, " + "{\"doc3\": \"{}\"}" + "]}", + {ok, Code, _, ResultBody} = test_request:post(Url ++ "/_bulk_docs/", + [?CONTENT_JSON, ?AUTH], Docs), + ResultJson = ?JSON_DECODE(ResultBody), + ExpectJson = {[ + {<<"error">>,<<"max_bulk_docs_count_exceeded">>}, + {<<"reason">>,<<"2">>} + ]}, + ?_assertEqual({413, ExpectJson}, {Code, ResultJson}). + + +bulk_get_too_many_docs(Url) -> + Docs = lists:map(fun(_) -> + {ok, 201, _, Body} = test_request:post(Url, + [?CONTENT_JSON, ?AUTH], "{}"), + {Props} = ?JSON_DECODE(Body), + {lists:keydelete(<<"ok">>, 1, Props)} + end, [1, 2, 3, 4]), + + {ok, Code, _, ResultBody} = test_request:post(Url ++ "/_bulk_get/", + [?CONTENT_JSON, ?AUTH], ?JSON_ENCODE({[{<<"docs">>, Docs}]})), + ResultJson = ?JSON_DECODE(ResultBody), + ExpectJson = {[ + {<<"error">>,<<"max_bulk_get_count_exceeded">>}, + {<<"reason">>,<<"2">>} + ]}, + ?_assertEqual({413, ExpectJson}, {Code, ResultJson}). + + put_post_doc_attach_inline(Url) -> Body1 = "{\"body\":\"This is a body.\",", Body2 = lists:concat(["{\"body\":\"This is a body it should fail", diff --git a/src/chttpd/test/eunit/chttpd_db_test.erl b/src/chttpd/test/eunit/chttpd_db_test.erl index 204332d7f..cebc3b6d6 100644 --- a/src/chttpd/test/eunit/chttpd_db_test.erl +++ b/src/chttpd/test/eunit/chttpd_db_test.erl @@ -73,8 +73,8 @@ all_test_() -> fun should_return_update_seq_when_set_on_all_docs/1, fun should_not_return_update_seq_when_unset_on_all_docs/1, fun should_return_correct_id_on_doc_copy/1, - fun should_return_400_for_bad_engine/1, - fun should_not_change_db_proper_after_rewriting_shardmap/1, + fun should_ignore_engine_parameter/1, + fun should_return_only_one_ok_on_doc_copy/1, fun should_succeed_on_all_docs_with_queries_keys/1, fun should_succeed_on_all_docs_with_queries_limit_skip/1, fun should_succeed_on_all_docs_with_multiple_queries/1, @@ -269,6 +269,17 @@ should_return_correct_id_on_doc_copy(Url) -> ] end)}. +should_return_only_one_ok_on_doc_copy(Url) -> + {timeout, ?TIMEOUT, ?_test(begin + {ok, _, _, _} = create_doc(Url, "testdoc"), + {_, _, _, ResultBody} = test_request:copy(Url ++ "/testdoc", + [?CONTENT_JSON, ?AUTH, ?DESTHEADER1]), + {ResultJson} = jiffy:decode(ResultBody), + NumOks = length(lists:filter(fun({Key, Value}) -> Key == <<"ok">> end, ResultJson)), + [ + ?assertEqual(1, NumOks) + ] + end)}. attachment_doc() -> {ok, Data} = file:read_file(?FIXTURE_TXT), @@ -282,7 +293,7 @@ attachment_doc() -> ]}. -should_return_400_for_bad_engine(_) -> +should_ignore_engine_parameter(_) -> {timeout, ?TIMEOUT, ?_test(begin TmpDb = ?tempdb(), Addr = config:get("chttpd", "bind_address", "127.0.0.1"), @@ -290,30 +301,7 @@ should_return_400_for_bad_engine(_) -> BaseUrl = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), Url = BaseUrl ++ "?engine=cowabunga", {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), - ?assertEqual(400, Status) - end)}. - - -should_not_change_db_proper_after_rewriting_shardmap(_) -> - {timeout, ?TIMEOUT, ?_test(begin - TmpDb = ?tempdb(), - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - AdmPort = mochiweb_socket_server:get(couch_httpd, port), - - BaseUrl = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), - Url = BaseUrl ++ "?partitioned=true&q=1", - {ok, 201, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), - - ShardDbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - {ok, ShardDb} = mem3_util:ensure_exists(ShardDbName), - {ok, #doc{body = {Props}}} = couch_db:open_doc( - ShardDb, TmpDb, [ejson_body]), - Shards = mem3_util:build_shards(TmpDb, Props), - - {Prop2} = ?JSON_DECODE(?JSON_ENCODE({Props})), - Shards2 = mem3_util:build_shards(TmpDb, Prop2), - ?assertEqual(Shards2, Shards) + ?assertEqual(201, Status) end)}. @@ -341,7 +329,7 @@ should_succeed_on_all_docs_with_queries_limit_skip(Url) -> {ResultJson} = ?JSON_DECODE(RespBody), ResultJsonBody = couch_util:get_value(<<"results">>, ResultJson), {InnerJson} = lists:nth(1, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson))) end)}. @@ -359,7 +347,7 @@ should_succeed_on_all_docs_with_multiple_queries(Url) -> {InnerJson1} = lists:nth(1, ResultJsonBody), ?assertEqual(2, length(couch_util:get_value(<<"rows">>, InnerJson1))), {InnerJson2} = lists:nth(2, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson2)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson2)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson2))) end)}. @@ -389,7 +377,7 @@ should_succeed_on_design_docs_with_queries_limit_skip(Url) -> {ResultJson} = ?JSON_DECODE(RespBody), ResultJsonBody = couch_util:get_value(<<"results">>, ResultJson), {InnerJson} = lists:nth(1, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson))) end)}. @@ -407,7 +395,7 @@ should_succeed_on_design_docs_with_multiple_queries(Url) -> {InnerJson1} = lists:nth(1, ResultJsonBody), ?assertEqual(2, length(couch_util:get_value(<<"rows">>, InnerJson1))), {InnerJson2} = lists:nth(2, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson2)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson2)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson2))) end)}. diff --git a/src/chttpd/test/eunit/chttpd_dbs_info_test.erl b/src/chttpd/test/eunit/chttpd_dbs_info_test.erl index 5b61d8831..6e11f3245 100644 --- a/src/chttpd/test/eunit/chttpd_dbs_info_test.erl +++ b/src/chttpd/test/eunit/chttpd_dbs_info_test.erl @@ -57,7 +57,7 @@ dbs_info_test_() -> foreach, fun setup/0, fun teardown/1, [ - fun should_return_error_for_get_db_info/1, + fun should_return_for_get_db_info/1, fun should_return_dbs_info_for_single_db/1, fun should_return_dbs_info_for_multiple_dbs/1, fun should_return_error_for_exceeded_keys/1, @@ -69,15 +69,14 @@ dbs_info_test_() -> }. -should_return_error_for_get_db_info(Url) -> +should_return_for_get_db_info(Url) -> ?_test(begin {ok, Code, _, ResultBody} = test_request:get(Url ++ "/_dbs_info?" - ++ "keys=[\"db1\"]", [?CONTENT_JSON, ?AUTH]), - {Body} = jiffy:decode(ResultBody), + ++ "start_key=\"db1\"&end_key=\"db1\"", [?CONTENT_JSON, ?AUTH]), + Body = jiffy:decode(ResultBody, [return_maps]), [ - ?assertEqual(<<"method_not_allowed">>, - couch_util:get_value(<<"error">>, Body)), - ?assertEqual(405, Code) + ?assertEqual(200, Code), + ?assertMatch([#{<<"db_name">> := <<"db1">>}], Body) ] end). diff --git a/src/chttpd/test/eunit/chttpd_delayed_test.erl b/src/chttpd/test/eunit/chttpd_delayed_test.erl new file mode 100644 index 000000000..63e6cb0e5 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_delayed_test.erl @@ -0,0 +1,72 @@ +-module(chttpd_delayed_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(USER, "chttpd_view_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). +-define(DDOC, "{\"_id\": \"_design/bar\", \"views\": {\"baz\": + {\"map\": \"function(doc) {emit(doc._id, doc._id);}\"}}}"). + +-define(FIXTURE_TXT, ?ABS_PATH(?FILE)). +-define(i2l(I), integer_to_list(I)). +-define(TIMEOUT, 60). % seconds + +setup() -> + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + ok = config:set("chttpd", "buffer_response", "true", _Persist=false), + TmpDb = ?tempdb(), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), + create_db(Url), + Url. + +teardown(Url) -> + delete_db(Url), + ok = config:delete("admins", ?USER, _Persist=false). + +create_db(Url) -> + {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), + ?assert(Status =:= 201 orelse Status =:= 202). + + +delete_db(Url) -> + {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). + + +all_test_() -> + { + "chttpd delay tests", + { + setup, + fun chttpd_test_util:start_couch/0, fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_buffer_response_all_docs/1, + fun test_buffer_response_changes/1 + ] + } + } + }. + + +test_buffer_response_all_docs(Url) -> + assert_successful_response(Url ++ "/_all_docs"). + + +test_buffer_response_changes(Url) -> + assert_successful_response(Url ++ "/_changes"). + + +assert_successful_response(Url) -> + {timeout, ?TIMEOUT, ?_test(begin + {ok, Code, _Headers, _Body} = test_request:get(Url, [?AUTH]), + ?assertEqual(200, Code) + end)}. + diff --git a/src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl b/src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl new file mode 100644 index 000000000..d6375c048 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_deleted_dbs_test.erl @@ -0,0 +1,234 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_deleted_dbs_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(USER, "chttpd_db_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). + + +setup() -> + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + lists:concat(["http://", Addr, ":", Port, "/"]). + + +teardown(_Url) -> + ok = config:delete("couchdb", "enable_database_recovery", false), + ok = config:delete("admins", ?USER, _Persist=false). + + +create_db(Url) -> + {ok, Status, _, _} = http(put, Url, ""), + ?assert(Status =:= 201 orelse Status =:= 202). + + +delete_db(Url) -> + {ok, 200, _, _} = http(delete, Url). + + +deleted_dbs_test_() -> + { + "chttpd deleted dbs tests", + { + setup, + fun chttpd_test_util:start_couch/0, + fun chttpd_test_util:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + fun should_return_error_for_unsupported_method/1, + fun should_list_deleted_dbs/1, + fun should_list_deleted_dbs_info/1, + fun should_undelete_db/1, + fun should_remove_deleted_db/1, + fun should_undelete_db_to_target_db/1, + fun should_not_undelete_db_to_existing_db/1 + ] + } + } + }. + + +should_return_error_for_unsupported_method(Url) -> + ?_test(begin + {ok, Code, _, Body} = http(delete, mk_url(Url)), + + ?assertEqual(405, Code), + ?assertEqual(<<"method_not_allowed">>, get_json(<<"error">>, Body)) + end). + + +should_list_deleted_dbs(Url) -> + ?_test(begin + DbName1 = create_and_delete_db(Url), + DbName2 = create_and_delete_db(Url), + {ok, Code, _, Body} = http(get, mk_url(Url)), + DeletedDbs = get_db_names(Body), + + ?assertEqual(200, Code), + ?assertEqual(true, lists:member(DbName1, DeletedDbs)), + ?assertEqual(true, lists:member(DbName2, DeletedDbs)) + end). + + +should_list_deleted_dbs_info(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, Body} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(Body), + + ?assertEqual(DbName, couch_util:get_value(<<"db_name">>, Props)) + end). + + +should_undelete_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, ResultBody} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(ResultBody), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + ErlJSON = {[ + {undelete, {[ + {source, DbName}, + {timestamp, TimeStamp} + ]}} + ]}, + + {ok, Code1, _, _} = http(get, Url ++ DbName), + ?assertEqual(404, Code1), + + {ok, Code2, _, _} = http(post, mk_url(Url), ErlJSON), + ?assertEqual(200, Code2), + + {ok, Code3, _, _} = http(get, Url ++ DbName), + ?assertEqual(200, Code3) + end). + + +should_remove_deleted_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, Body1} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(Body1), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + {ok, Code, _, _} = http(delete, mk_url(Url, DbName, TimeStamp)), + ?assertEqual(200, Code), + + {ok, _, _, Body2} = http(get, mk_url(Url, DbName)), + ?assertEqual([], jiffy:decode(Body2)) + end). + + +should_undelete_db_to_target_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, Body} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(Body), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + NewDbName = ?tempdb(), + ErlJSON = {[ + {undelete, {[ + {source, DbName}, + {timestamp, TimeStamp}, + {target, NewDbName} + ]}} + ]}, + + {ok, Code1, _, _} = http(get, Url ++ NewDbName), + ?assertEqual(404, Code1), + + {ok, Code2, _, _} = http(post, mk_url(Url), ErlJSON), + ?assertEqual(200, Code2), + + {ok, Code3, _, _} = http(get, Url ++ NewDbName), + ?assertEqual(200, Code3) + end). + + +should_not_undelete_db_to_existing_db(Url) -> + ?_test(begin + DbName = create_and_delete_db(Url), + {ok, _, _, ResultBody} = http(get, mk_url(Url, DbName)), + [{Props}] = jiffy:decode(ResultBody), + TimeStamp = couch_util:get_value(<<"timestamp">>, Props), + + NewDbName = ?tempdb(), + create_db(Url ++ NewDbName), + ErlJSON = {[ + {undelete, {[ + {source, DbName}, + {timestamp, TimeStamp}, + {target, NewDbName} + ]}} + ]}, + {ok, Code2, _, ResultBody2} = http(post, mk_url(Url), ErlJSON), + ?assertEqual(412, Code2), + ?assertEqual(<<"file_exists">>, get_json(<<"error">>, ResultBody2)) + end). + + +create_and_delete_db(BaseUrl) -> + DbName = ?tempdb(), + DbUrl = BaseUrl ++ DbName, + create_db(DbUrl), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + delete_db(DbUrl), + DbName. + + +http(Verb, Url) -> + Headers = [?CONTENT_JSON, ?AUTH], + test_request:Verb(Url, Headers). + + +http(Verb, Url, Body) -> + Headers = [?CONTENT_JSON, ?AUTH], + test_request:Verb(Url, Headers, jiffy:encode(Body)). + + +mk_url(Url) -> + Url ++ "/_deleted_dbs". + + +mk_url(Url, DbName) -> + Url ++ "/_deleted_dbs?key=\"" ++ ?b2l(DbName) ++ "\"". + + +mk_url(Url, DbName, TimeStamp) -> + Url ++ "/_deleted_dbs/" ++ ?b2l(DbName) ++ "?timestamp=\"" ++ + ?b2l(TimeStamp) ++ "\"". + + +get_json(Key, Body) -> + {Props} = jiffy:decode(Body), + couch_util:get_value(Key, Props). + + +get_db_names(Body) -> + RevDbNames = lists:foldl(fun({DbInfo}, Acc) -> + DbName = couch_util:get_value(<<"db_name">>, DbInfo), + [DbName | Acc] + end, [], jiffy:decode(Body)), + lists:reverse(RevDbNames). diff --git a/src/chttpd/test/eunit/chttpd_handlers_tests.erl b/src/chttpd/test/eunit/chttpd_handlers_tests.erl index f3e8f5dcd..649d82e86 100644 --- a/src/chttpd/test/eunit/chttpd_handlers_tests.erl +++ b/src/chttpd/test/eunit/chttpd_handlers_tests.erl @@ -70,7 +70,7 @@ request_replicate(Url, Body) -> Headers = [{"Content-Type", "application/json"}], Handler = {chttpd_misc, handle_replicate_req}, request(post, Url, Headers, Body, Handler, fun(Req) -> - chttpd:send_json(Req, 200, get(post_body)) + chttpd:send_json(Req, 200, Req#httpd.req_body) end). request(Method, Url, Headers, Body, {M, F}, MockFun) -> diff --git a/src/chttpd/test/eunit/chttpd_open_revs_error_test.erl b/src/chttpd/test/eunit/chttpd_open_revs_error_test.erl deleted file mode 100644 index d53d370f8..000000000 --- a/src/chttpd/test/eunit/chttpd_open_revs_error_test.erl +++ /dev/null @@ -1,112 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(chttpd_open_revs_error_test). - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). - --define(USER, "chttpd_db_test_admin"). --define(PASS, "pass"). --define(AUTH, {basic_auth, {?USER, ?PASS}}). --define(CONTENT_JSON, {"Content-Type", "application/json"}). --define(CONTENT_MULTI_FORM, {"Content-Type", - "multipart/form-data;boundary=\"bound\""}). - -setup() -> - Hashed = couch_passwords:hash_admin_password(?PASS), - ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), - TmpDb = ?tempdb(), - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), - mock(fabric), - create_db(Url), - Url. - -teardown(Url) -> - delete_db(Url), - (catch meck:unload(fabric)), - ok = config:delete("admins", ?USER, _Persist=false). - -create_db(Url) -> - {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), - ?assert(Status =:= 201 orelse Status =:= 202). - - -create_doc(Url, Id) -> - test_request:put(Url ++ "/" ++ Id, - [?CONTENT_JSON, ?AUTH], "{\"mr\": \"rockoartischocko\"}"). - -delete_db(Url) -> - {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). - -open_revs_error_test_() -> - { - "open revs error tests", - { - setup, - fun chttpd_test_util:start_couch/0, - fun chttpd_test_util:stop_couch/1, - { - foreach, - fun setup/0, fun teardown/1, - [ - fun should_return_503_error_for_open_revs_get/1, - fun should_return_503_error_for_open_revs_post_form/1 - ] - } - } - }. - -should_return_503_error_for_open_revs_get(Url) -> - {ok, _, _, Body} = create_doc(Url, "testdoc"), - {Json} = ?JSON_DECODE(Body), - Ref = couch_util:get_value(<<"rev">>, Json, undefined), - mock_open_revs({error, all_workers_died}), - {ok, Code, _, _} = test_request:get(Url ++ - "/testdoc?rev=" ++ ?b2l(Ref), [?AUTH]), - ?_assertEqual(503, Code). - -should_return_503_error_for_open_revs_post_form(Url) -> - Port = mochiweb_socket_server:get(chttpd, port), - Host = lists:concat([ "http://127.0.0.1:", Port]), - Referer = {"Referer", Host}, - Body1 = "{\"body\":\"This is a body.\"}", - DocBeg = "--bound\r\nContent-Disposition: form-data; name=\"_doc\"\r\n\r\n", - DocRev = "--bound\r\nContent-Disposition: form-data; name=\"_rev\"\r\n\r\n", - DocRest = "\r\n--bound\r\nContent-Disposition:" - "form-data; name=\"_attachments\"; filename=\"file.txt\"\r\n" - "Content-Type: text/plain\r\n\r\ncontents of file.txt\r\n\r\n" - "--bound--", - Doc1 = lists:concat([DocBeg, Body1, DocRest]), - {ok, _, _, ResultBody} = test_request:post(Url ++ "/" ++ "RevDoc", - [?CONTENT_MULTI_FORM, ?AUTH, Referer], Doc1), - {Json} = ?JSON_DECODE(ResultBody), - Ref = couch_util:get_value(<<"rev">>, Json, undefined), - Doc2 = lists:concat([DocRev, ?b2l(Ref) , DocRest]), - - mock_open_revs({error, all_workers_died}), - {ok, Code, _, ResultBody1} = test_request:post(Url ++ "/" ++ "RevDoc", - [?CONTENT_MULTI_FORM, ?AUTH, Referer], Doc2), - {Json1} = ?JSON_DECODE(ResultBody1), - ErrorMessage = couch_util:get_value(<<"error">>, Json1), - [ - ?_assertEqual(503, Code), - ?_assertEqual(<<"service unvailable">>, ErrorMessage) - ]. - -mock_open_revs(RevsResp) -> - ok = meck:expect(fabric, open_revs, fun(_, _, _, _) -> RevsResp end). - -mock(fabric) -> - ok = meck:new(fabric, [passthrough]). diff --git a/src/chttpd/test/eunit/chttpd_purge_tests.erl b/src/chttpd/test/eunit/chttpd_purge_tests.erl index ab435682a..bc1fce0cd 100644 --- a/src/chttpd/test/eunit/chttpd_purge_tests.erl +++ b/src/chttpd/test/eunit/chttpd_purge_tests.erl @@ -13,6 +13,10 @@ -module(chttpd_purge_tests). +% Remove when purge is implemented +-compile(nowarn_unused_function). + + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). @@ -62,7 +66,7 @@ delete_db(Url) -> {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). -purge_test_() -> +purge_test_disabled() -> { "chttpd db tests", { diff --git a/src/chttpd/test/eunit/chttpd_security_tests.erl b/src/chttpd/test/eunit/chttpd_security_tests.erl index 0bea9dbcd..8085f82a0 100644 --- a/src/chttpd/test/eunit/chttpd_security_tests.erl +++ b/src/chttpd/test/eunit/chttpd_security_tests.erl @@ -12,6 +12,9 @@ -module(chttpd_security_tests). +% Remove when purge is implemented +-compile(nowarn_unused_function). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). @@ -38,14 +41,13 @@ setup() -> ok = config:set("admins", ?USER, ?b2l(Hashed), Persist), UserDb = ?tempdb(), TmpDb = ?tempdb(), - ok = config:set("chttpd_auth", "authentication_db", ?b2l(UserDb), Persist), - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), Port = mochiweb_socket_server:get(chttpd, port), BaseUrl = lists:concat(["http://", Addr, ":", Port, "/"]), - Url = lists:concat([BaseUrl, ?b2l(TmpDb)]), UsersUrl = lists:concat([BaseUrl, ?b2l(UserDb)]), create_db(UsersUrl), + ok = config:set("chttpd_auth", "authentication_db", ?b2l(UserDb), Persist), + Url = lists:concat([BaseUrl, ?b2l(TmpDb)]), create_db(Url), create_design_doc(Url), create_user(UsersUrl,?TEST_MEMBER,?TEST_MEMBER_PASS,[<<?TEST_MEMBER>>]), @@ -56,6 +58,7 @@ setup() -> teardown([Url,UsersUrl]) -> delete_db(Url), delete_db(UsersUrl), + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), ok = config:delete("admins", ?USER, _Persist=false). create_db(Url) -> @@ -108,15 +111,21 @@ all_test_() -> fun should_disallow_db_member_db_compaction/1, fun should_allow_db_admin_db_compaction/1, fun should_allow_admin_view_compaction/1, - fun should_disallow_anonymous_view_compaction/1, - fun should_allow_admin_db_view_cleanup/1, - fun should_disallow_anonymous_db_view_cleanup/1, - fun should_allow_admin_purge/1, - fun should_disallow_anonymous_purge/1, - fun should_disallow_db_member_purge/1, - fun should_allow_admin_purged_infos_limit/1, - fun should_disallow_anonymous_purged_infos_limit/1, - fun should_disallow_db_member_purged_infos_limit/1 + fun should_disallow_anonymous_view_compaction/1 + + % Re-enable when _view_cleanup is implemented + % + %fun should_allow_admin_db_view_cleanup/1, + %fun should_disallow_anonymous_db_view_cleanup/1, + + % Re-enable when purge is implemented + % + %fun should_allow_admin_purge/1, + %fun should_disallow_anonymous_purge/1, + %fun should_disallow_db_member_purge/1, + %fun should_allow_admin_purged_infos_limit/1, + %fun should_disallow_anonymous_purged_infos_limit/1, + %fun should_disallow_db_member_purged_infos_limit/1 ] } } @@ -337,13 +346,11 @@ should_return_error_for_sec_obj_with_incorrect_roles_and_names( Body = jiffy:encode({SecurityProperties}), {ok, Status, _, RespBody} = test_request:put(SecurityUrl, [?CONTENT_JSON, ?AUTH], Body), - ResultJson = ?JSON_DECODE(RespBody), + ResultJson = couch_util:json_decode(RespBody, [return_maps]), + ExpectReason = <<"names must be a JSON list of strings">>, [ ?_assertEqual(500, Status), - ?_assertEqual({[ - {<<"error">>,<<"error">>}, - {<<"reason">>,<<"no_majority">>} - ]}, ResultJson) + ?_assertMatch(#{<<"reason">> := ExpectReason}, ResultJson) ]. should_return_error_for_sec_obj_with_incorrect_roles([Url,_UsersUrl]) -> @@ -356,13 +363,11 @@ should_return_error_for_sec_obj_with_incorrect_roles([Url,_UsersUrl]) -> Body = jiffy:encode({SecurityProperties}), {ok, Status, _, RespBody} = test_request:put(SecurityUrl, [?CONTENT_JSON, ?AUTH], Body), - ResultJson = ?JSON_DECODE(RespBody), + ResultJson = couch_util:json_decode(RespBody, [return_maps]), + ExpectReason = <<"roles must be a JSON list of strings">>, [ ?_assertEqual(500, Status), - ?_assertEqual({[ - {<<"error">>,<<"error">>}, - {<<"reason">>,<<"no_majority">>} - ]}, ResultJson) + ?_assertMatch(#{<<"reason">> := ExpectReason}, ResultJson) ]. should_return_error_for_sec_obj_with_incorrect_names([Url,_UsersUrl]) -> @@ -375,13 +380,11 @@ should_return_error_for_sec_obj_with_incorrect_names([Url,_UsersUrl]) -> Body = jiffy:encode({SecurityProperties}), {ok, Status, _, RespBody} = test_request:put(SecurityUrl, [?CONTENT_JSON, ?AUTH], Body), - ResultJson = ?JSON_DECODE(RespBody), + ResultJson = couch_util:json_decode(RespBody, [return_maps]), + ExpectReason = <<"names must be a JSON list of strings">>, [ ?_assertEqual(500, Status), - ?_assertEqual({[ - {<<"error">>,<<"error">>}, - {<<"reason">>,<<"no_majority">>} - ]}, ResultJson) + ?_assertMatch(#{<<"reason">> := ExpectReason}, ResultJson) ]. should_return_error_for_sec_obj_in_user_db([_,_UsersUrl]) -> diff --git a/src/chttpd/test/eunit/chttpd_session_tests.erl b/src/chttpd/test/eunit/chttpd_session_tests.erl new file mode 100644 index 000000000..a802d9ec2 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_session_tests.erl @@ -0,0 +1,74 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(chttpd_session_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include("chttpd_test.hrl"). + +-define(USER, "chttpd_test_admin"). +-define(PASS, "pass"). + + +setup() -> + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, binary_to_list(Hashed), _Persist=false), + root_url() ++ "/_session". + + +cleanup(_) -> + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), + ok = config:delete("admins", ?USER, _Persist=false). + + +session_test_() -> + { + "Session tests", + { + setup, + fun() -> test_util:start_couch([fabric, chttpd]) end, + fun test_util:stop_couch/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(session_authentication_db_absent), + ?TDEF_FE(session_authentication_db_present) + ] + } + } + }. + + +session_authentication_db_absent(Url) -> + ok = config:delete("chttpd_auth", "authentication_db", _Persist=false), + ?assertThrow({not_found, _}, session_authentication_db(Url)). + + +session_authentication_db_present(Url) -> + Name = "_users", + ok = config:set("chttpd_auth", "authentication_db", Name, false), + ?assertEqual(list_to_binary(Name), session_authentication_db(Url)). + + +session_authentication_db(Url) -> + {ok, 200, _, Body} = test_request:get(Url, [{basic_auth, {?USER, ?PASS}}]), + couch_util:get_nested_json_value( + jiffy:decode(Body), [<<"info">>, <<"authentication_db">>]). + + +root_url() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + lists:concat(["http://", Addr, ":", Port]). diff --git a/src/chttpd/test/eunit/chttpd_stats_tests.erl b/src/chttpd/test/eunit/chttpd_stats_tests.erl new file mode 100644 index 000000000..1742285a1 --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_stats_tests.erl @@ -0,0 +1,77 @@ +-module(chttpd_stats_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +start() -> + ok = application:start(config), + ok = application:start(couch_log). + + +stop(_) -> + ok = application:stop(config), + ok = application:stop(couch_log). + + +setup() -> + ok = meck:new(chttpd_stats, [passthrough]). + + +teardown(_) -> + meck:unload(), + ok. + + + +chttpd_stats_test_() -> + { + "chttpd_stats tests", + { + setup, + fun start/0, + fun stop/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_reset/1, + fun test_no_reset/1 + ] + } + } + }. + + +test_reset(_) -> + ?_test(begin + chttpd_stats:init(undefined), + chttpd_stats:incr_rows(3), + chttpd_stats:incr_rows(), + chttpd_stats:incr_writes(5), + chttpd_stats:incr_writes(), + chttpd_stats:incr_reads(), + chttpd_stats:incr_reads(2), + State1 = get(chttpd_stats), + ?assertMatch({st, 3, 6, 4, _, _, _, _}, State1), + + ok = meck:expect(chttpd_stats, report, fun(_) -> true end), + % force a reset with 0 interval + chttpd_stats:update_interval(0), + % after this is called, the report should happen and rows should + % reset to 0 + chttpd_stats:incr_rows(), + ResetState = get(chttpd_stats), + ?assertMatch({st, 0, 0, 0, _, _, _, _}, ResetState) + end). + + +test_no_reset(_) -> + ?_test(begin + ok = meck:expect(chttpd_stats, report, fun(_) -> false end), + chttpd_stats:init(undefined), + chttpd_stats:update_interval(0), + chttpd_stats:incr_rows(), + State = get(chttpd_stats), + ?assertMatch({st, 0, 0, 1, _, _, _, _}, State) + end). diff --git a/src/chttpd/test/eunit/chttpd_test.hrl b/src/chttpd/test/eunit/chttpd_test.hrl new file mode 100644 index 000000000..6db97ec2b --- /dev/null +++ b/src/chttpd/test/eunit/chttpd_test.hrl @@ -0,0 +1,35 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +% Borrowed from fabric2_test.hrl + +% Some test modules do not use with, so squash the unused fun compiler warning +-compile([{nowarn_unused_function, [{with, 1}]}]). + + +-define(TDEF(Name), {atom_to_list(Name), fun Name/1}). +-define(TDEF(Name, Timeout), {atom_to_list(Name), Timeout, fun Name/1}). + +-define(TDEF_FE(Name), fun(Arg) -> {atom_to_list(Name), ?_test(Name(Arg))} end). +-define(TDEF_FE(Name, Timeout), fun(Arg) -> {atom_to_list(Name), {timeout, Timeout, ?_test(Name(Arg))}} end). + + +with(Tests) -> + fun(ArgsTuple) -> + lists:map(fun + ({Name, Fun}) -> + {Name, ?_test(Fun(ArgsTuple))}; + ({Name, Timeout, Fun}) -> + {Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} + end, Tests) + end. diff --git a/src/chttpd/test/eunit/chttpd_view_test.erl b/src/chttpd/test/eunit/chttpd_view_test.erl index 4c224bb4e..1744f97a1 100644 --- a/src/chttpd/test/eunit/chttpd_view_test.erl +++ b/src/chttpd/test/eunit/chttpd_view_test.erl @@ -99,7 +99,7 @@ should_succeed_on_view_with_queries_limit_skip(Url) -> {ResultJson} = ?JSON_DECODE(RespBody), ResultJsonBody = couch_util:get_value(<<"results">>, ResultJson), {InnerJson} = lists:nth(1, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson))) end)}. @@ -119,6 +119,6 @@ should_succeed_on_view_with_multiple_queries(Url) -> {InnerJson1} = lists:nth(1, ResultJsonBody), ?assertEqual(2, length(couch_util:get_value(<<"rows">>, InnerJson1))), {InnerJson2} = lists:nth(2, ResultJsonBody), - ?assertEqual(2, couch_util:get_value(<<"offset">>, InnerJson2)), + ?assertEqual(null, couch_util:get_value(<<"offset">>, InnerJson2)), ?assertEqual(5, length(couch_util:get_value(<<"rows">>, InnerJson2))) end)}. diff --git a/src/chttpd/test/exunit/pagination_test.exs b/src/chttpd/test/exunit/pagination_test.exs new file mode 100644 index 000000000..6544017df --- /dev/null +++ b/src/chttpd/test/exunit/pagination_test.exs @@ -0,0 +1,1393 @@ +defmodule Couch.Test.Pagination do + use ExUnit.Case + import Couch.DBTest, only: [retry_until: 1] + alias Couch.DBTest, as: Utils + + defp create_admin(user_name, password) do + hashed = String.to_charlist(:couch_passwords.hash_admin_password(password)) + :config.set('admins', String.to_charlist(user_name), hashed, false) + end + + defp base_url() do + addr = :config.get('chttpd', 'bind_address', '127.0.0.1') + port = :mochiweb_socket_server.get(:chttpd, :port) + "http://#{addr}:#{port}" + end + + setup_all do + test_ctx = + :test_util.start_couch([:chttpd, :couch_jobs, :couch_views, :couch_eval, :couch_js]) + + :ok = create_admin("adm", "pass") + + on_exit(fn -> + :test_util.stop_couch(test_ctx) + end) + + %{ + base_url: base_url(), + user: "adm", + pass: "pass" + } + end + + defp with_session(context) do + session = Couch.login(context.user, context.pass, base_url: context.base_url) + %{session: session} + end + + defp random_db(context) do + db_name = Utils.random_db_name("db") + + on_exit(fn -> + delete_db(context.session, db_name) + end) + + create_db(context.session, db_name) + %{db_name: db_name} + end + + defp with_docs(context) do + assert Map.has_key?(context, :n_docs), "Please define '@describetag n_docs: 10'" + %{docs: create_docs(context.session, context.db_name, 1..context.n_docs)} + end + + defp with_view(context) do + ddoc_id = "simple" + + ddoc = %{ + _id: "_design/#{ddoc_id}", + views: %{ + all: %{ + map: "function(doc) { emit(doc.string, doc) }" + } + } + } + + create_doc(context.session, context.db_name, ddoc) + %{view_name: "all", ddoc_id: ddoc_id} + end + + defp with_same_key_docs(context) do + assert Map.has_key?(context, :n_docs), "Please define '@describetag n_docs: 10'" + + docs = + for id <- 1..context.n_docs do + str_id = docid(id) + %{"_id" => str_id, "integer" => id, "string" => docid(div(id, context.page_size))} + end + + docs = + docs + |> Enum.map(fn doc -> + created_doc = create_doc(context.session, context.db_name, doc) + Map.merge(doc, created_doc) + end) + + %{docs: docs} + end + + defp all_docs(context) do + assert Map.has_key?(context, :page_size), "Please define '@describetag page_size: 4'" + + assert Map.has_key?(context, :descending), + "Please define '@describetag descending: false'" + + resp = + Couch.Session.get(context.session, "/#{context.db_name}/_all_docs", + query: %{page_size: context.page_size, descending: context.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + %{ + response: resp.body + } + end + + defp paginate_queries(context, opts) do + paginate_queries(context, [], opts) + end + + defp paginate_queries(context, acc, opts) do + {paginate_opts, client_opts} = Keyword.split(opts, [:url, :direction]) + + resp = + Couch.Session.post(context.session, Keyword.get(paginate_opts, :url), client_opts) + + results = resp.body["results"] + view_url = String.replace_suffix(Keyword.get(paginate_opts, :url), "/queries", "") + + opts = + opts + |> Keyword.replace!(:url, view_url) + |> Keyword.delete(:body) + + final = + Enum.map(results, fn result -> + paginate(context, result, [Map.get(result, "rows")], opts) + end) + + final + end + + defp paginate(context, current, acc, opts) do + {paginate_opts, client_opts} = Keyword.split(opts, [:url, :direction]) + direction_key = Keyword.get(paginate_opts, :direction, "next") + + if Map.has_key?(current, direction_key) do + bookmark = current[direction_key] + client_opts = Keyword.replace!(client_opts, :query, %{bookmark: bookmark}) + + resp = + Couch.Session.get(context.session, Keyword.get(paginate_opts, :url), client_opts) + + result = resp.body + paginate(context, result, [Map.get(result, "rows") | acc], opts) + else + Enum.reverse(acc) + end + end + + defp paginate(context) do + if Map.has_key?(context.response, "next") do + bookmark = context.response["next"] + pages = Map.get(context, :pages, [context.response]) + assert length(pages) < div(context.n_docs, context.page_size) + 1 + + resp = + Couch.Session.get(context.session, "/#{context.db_name}/_all_docs", + query: %{bookmark: bookmark} + ) + + context = + Map.merge(context, %{ + pages: [resp.body | pages], + response: resp.body + }) + + paginate(context) + else + context = + Map.update(context, :pages, [], fn acc -> + Enum.reverse(acc) + end) + + context + end + end + + def create_db(session, db_name, opts \\ []) do + retry_until(fn -> + resp = Couch.Session.put(session, "/#{db_name}", opts) + assert resp.status_code in [201, 202], "got error #{inspect(resp.body)}" + assert resp.body == %{"ok" => true} + {:ok, resp} + end) + end + + defp delete_db(session, db_name) do + retry_until(fn -> + resp = Couch.Session.delete(session, "/#{db_name}") + assert resp.status_code in [200, 202, 404], "got error #{inspect(resp.body)}" + {:ok, resp} + end) + end + + defp create_doc(session, db_name, body) do + {:ok, body} = + retry_until(fn -> + resp = Couch.Session.post(session, "/#{db_name}", body: body) + assert resp.status_code in [201, 202], "got error #{inspect(resp.body)}" + assert resp.body["ok"] + {:ok, resp.body} + end) + + Map.delete(body, "ok") + end + + defp create_docs(session, db_name, range) do + docs = make_docs(range) + + docs + |> Enum.map(fn doc -> + created_doc = create_doc(session, db_name, doc) + Map.merge(doc, created_doc) + end) + end + + defp docid(id) do + id |> Integer.to_string() |> String.pad_leading(3, "0") + end + + defp make_docs(id_range) do + max = Enum.max(id_range) + + for id <- id_range do + str_id = docid(id) + %{"_id" => str_id, "integer" => id, "string" => docid(max - id)} + end + end + + describe "Legacy API (10 docs)" do + @describetag n_docs: 10 + setup [:with_session, :random_db, :with_docs] + + test ": _all_docs/queries", ctx do + queries = %{ + queries: [%{descending: false}, %{descending: true}] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + [q1, q2] = resp.body["results"] + assert q1["rows"] == Enum.reverse(q2["rows"]) + end + end + + for descending <- [false, true] do + describe "Legacy API (10 docs) : _all_docs?descending=#{descending}" do + @describetag n_docs: 10 + @describetag descending: descending + setup [:with_session, :random_db, :with_docs] + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + + test "the rows are correctly sorted", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + ids = Enum.map(body["rows"], fn row -> row["id"] end) + + if ctx.descending do + assert Enum.reverse(Enum.sort(ids)) == ids + else + assert Enum.sort(ids) == ids + end + end + + test "start_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {start_pos, doc_ids} = + if ctx.descending do + {head_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), -tail_pos))} + else + {tail_pos, Enum.drop(Enum.sort(doc_ids), tail_pos - 1)} + end + + start_key = ~s("#{docid(start_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "end_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {end_pos, doc_ids} = + if ctx.descending do + {tail_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), tail_pos - 1))} + else + {head_pos, Enum.drop(Enum.sort(doc_ids), -tail_pos)} + end + + end_key = ~s("#{docid(end_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "range between start_key and end_key works", ctx do + head_pos = 2 + slice_size = 3 + doc_ids = Enum.sort(Enum.map(ctx.docs, fn doc -> doc["id"] end)) + # -1 due to 0 based indexing + # -2 is due to 0 based indexing and inclusive end + slice = Enum.slice(doc_ids, (head_pos - 1)..(head_pos + slice_size - 2)) + + {start_key, end_key, doc_ids} = + if ctx.descending do + reversed = Enum.reverse(slice) + [first | _] = reversed + [last | _] = slice + {~s("#{first}"), ~s("#{last}"), reversed} + else + [first | _] = slice + [last | _] = Enum.reverse(slice) + {~s("#{first}"), ~s("#{last}"), slice} + end + + assert length(doc_ids) == slice_size + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + end + end + + describe "Legacy API (10 docs) : /{db}/_design/{ddoc}/_view" do + @describetag n_docs: 10 + @describetag descending: false + @describetag page_size: 4 + setup [:with_session, :random_db, :with_view, :with_docs] + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + end + + describe "Legacy API (10 docs) : /{db}/_design/{ddoc}/_view/queries" do + @describetag n_docs: 10 + @describetag page_size: 4 + setup [:with_session, :random_db, :with_view, :with_docs] + + test "descending is respected", ctx do + queries = %{ + queries: [%{descending: false}, %{descending: true}] + } + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + [q1, q2] = resp.body["results"] + q1 = Enum.map(q1["rows"], fn row -> row["key"] end) + q2 = Enum.map(q2["rows"], fn row -> row["key"] end) + assert q1 == Enum.reverse(q2) + assert q1 == Enum.sort(q1) + end + + test "ensure we paginate starting from first query", ctx do + queries = %{ + queries: [%{descending: false}, %{descending: true}] + } + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size}, + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + [q1, q2] = resp.body["results"] + q1 = Enum.map(q1["rows"], fn row -> row["key"] end) + q2 = Enum.map(q2["rows"], fn row -> row["key"] end) + assert ctx.page_size == length(q1) + assert q2 == [] + end + end + + describe "Pagination API (10 docs)" do + @describetag n_docs: 10 + @describetag page_size: 4 + setup [:with_session, :random_db, :with_docs] + + test ": _all_docs?page_size=4", ctx do + %{session: session, db_name: db_name} = ctx + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + end + + test ": _all_docs?page_size=4 should respect limit", ctx do + %{session: session, db_name: db_name} = ctx + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size - 2} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size - 2 + assert not Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size - 1} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size - 1 + assert not Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size + assert not Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size + 1} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size + assert Map.has_key?(resp.body, "next") + + resp = + Couch.Session.get(session, "/#{db_name}/_all_docs", + query: %{page_size: ctx.page_size, limit: ctx.page_size + 2} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert length(resp.body["rows"]) == ctx.page_size + assert Map.has_key?(resp.body, "next") + end + + test ": _all_docs/queries should limit number of queries", ctx do + queries = %{ + queries: [%{}, %{}, %{}, %{}, %{}] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size}, + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "Provided number of queries is more than given page_size" + end + + test ": _all_docs/queries should forbid `page_size` in queries", ctx do + queries = %{ + queries: [%{page_size: 3}] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size}, + body: :jiffy.encode(queries) + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "You cannot specify `page_size` inside the query" + end + + test ": _all_docs should forbid `page_size` and `keys`", ctx do + body = %{ + page_size: 3, + keys: [ + "002", + "004" + ] + } + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs", + body: :jiffy.encode(body) + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "`page_size` is incompatible with `keys`" + end + + test ": _all_docs should limit 'skip' parameter", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{page_size: ctx.page_size, skip: 3000} + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "`skip` should be an integer in range [0 .. 2000]" + end + + test ": _all_docs should forbid extra parameters when 'bookmark' is present", ctx do + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{page_size: ctx.page_size, skip: 3000, bookmark: ""} + ) + + assert resp.status_code == 400 + + assert resp.body["reason"] == + "Cannot use `bookmark` with other options" + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}&descending=#{ + descending + }" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_docs, :all_docs] + + test "should return 'next' bookmark", ctx do + body = ctx.response + assert Map.has_key?(body, "next") + end + + test "total_rows matches the length of rows array", ctx do + body = ctx.response + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows matches the requested page_size", ctx do + body = ctx.response + assert body["total_rows"] == ctx.page_size + end + + test "can use 'next' bookmark to get remaining results", ctx do + bookmark = ctx.response["next"] + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + assert body["total_rows"] <= ctx.page_size + end + end + + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}&descending=#{ + descending + } : range" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_docs] + + test "start_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {start_pos, doc_ids} = + if ctx.descending do + {head_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), -tail_pos))} + else + {tail_pos, Enum.drop(Enum.sort(doc_ids), tail_pos - 1)} + end + + start_key = ~s("#{docid(start_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "end_key is respected", ctx do + head_pos = 2 + tail_pos = ctx.n_docs - head_pos + doc_ids = Enum.map(ctx.docs, fn doc -> doc["id"] end) + + {end_pos, doc_ids} = + if ctx.descending do + {tail_pos, Enum.reverse(Enum.drop(Enum.sort(doc_ids), tail_pos - 1))} + else + {head_pos, Enum.drop(Enum.sort(doc_ids), -tail_pos)} + end + + end_key = ~s("#{docid(end_pos)}") + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + + test "range between start_key and end_key works", ctx do + head_pos = 2 + slice_size = 3 + doc_ids = Enum.sort(Enum.map(ctx.docs, fn doc -> doc["id"] end)) + # -1 due to 0 based indexing + # -2 is due to 0 based indexing and inclusive end + slice = Enum.slice(doc_ids, (head_pos - 1)..(head_pos + slice_size - 2)) + + {start_key, end_key, doc_ids} = + if ctx.descending do + reversed = Enum.reverse(slice) + [first | _] = reversed + [last | _] = slice + {~s("#{first}"), ~s("#{last}"), reversed} + else + [first | _] = slice + [last | _] = Enum.reverse(slice) + {~s("#{first}"), ~s("#{last}"), slice} + end + + assert length(doc_ids) == slice_size + + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{descending: ctx.descending, start_key: start_key, end_key: end_key} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + ids = Enum.map(resp.body["rows"], fn row -> row["id"] end) + assert doc_ids == ids + end + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}&descending=#{ + descending + } : pages" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_docs, :all_docs, :paginate] + + test "final page doesn't include 'next' bookmark", ctx do + assert not Map.has_key?(ctx.response, "next") + assert ctx.response["total_rows"] == rem(ctx.n_docs, ctx.page_size) + end + + test "each but last page has page_size rows", ctx do + pages = Enum.drop(ctx.pages, -1) + + assert Enum.all?(pages, fn resp -> + length(resp["rows"]) == ctx.page_size + end) + end + + test "sum of rows on all pages is equal to number of documents", ctx do + pages = ctx.pages + n = Enum.reduce(pages, 0, fn resp, acc -> acc + length(resp["rows"]) end) + assert n == ctx.n_docs + end + + test "the rows are correctly sorted", ctx do + pages = ctx.pages + + ids = + Enum.reduce(pages, [], fn resp, acc -> + acc ++ Enum.map(resp["rows"], fn row -> row["id"] end) + end) + + if ctx.descending do + assert Enum.reverse(Enum.sort(ids)) == ids + else + assert Enum.sort(ids) == ids + end + end + end + end + end + + for n <- 10..11 do + describe "Pagination API (10 docs) : _all_docs?page_size=#{n}" do + @describetag n_docs: 10 + @describetag descending: false + @describetag page_size: n + setup [:with_session, :random_db, :with_docs, :all_docs] + + test "should not return 'next' bookmark", ctx do + body = ctx.response + assert not Map.has_key?(body, "next") + end + + test "total_rows matches the length of rows array", ctx do + body = ctx.response + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows less than the requested page_size", ctx do + body = ctx.response + assert body["total_rows"] <= ctx.page_size + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : _all_docs/queries?page_size=#{n}&descending=#{ + descending + } : pages" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + + @describetag queries: %{ + queries: [ + %{ + descending: true + }, + %{ + limit: n + 1, + skip: 2 + } + ] + } + + setup [:with_session, :random_db, :with_docs] + + test "one of the results contains 'next' bookmark", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "each 'next' bookmark is working", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + bookmarks = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> Map.get(result, "next") end) + + assert [] != bookmarks + + Enum.each(bookmarks, fn bookmark -> + resp = + Couch.Session.get(ctx.session, "/#{ctx.db_name}/_all_docs", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert [] != resp.body["rows"] + end) + + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "can post bookmarks to queries", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + queries = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert [] != result["rows"] + end) + end + + test "respect request page_size", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + Enum.each(results ++ resp.body["results"], fn result -> + assert length(result["rows"]) <= ctx.page_size + end) + end + + test "independent page_size in the bookmark", ctx do + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + queries = + resp.body["results"] + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post(ctx.session, "/#{ctx.db_name}/_all_docs/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert length(result["rows"]) > ctx.page_size + end) + end + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view?page_size=#{n}&descending=#{ + descending + }" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + setup [:with_session, :random_db, :with_view, :with_docs] + + test "should return 'next' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert Map.has_key?(resp.body, "next") + end + + test "first page should not return 'previous' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert not Map.has_key?(resp.body, "previous") + end + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows matches the requested page_size", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert resp.body["total_rows"] == ctx.page_size + end + + test "can use 'next' bookmark to get remaining results", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + bookmark = resp.body["next"] + + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + assert body["total_rows"] <= ctx.page_size + end + + test "can use 'previous' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + next_bookmark = resp.body["next"] + + first_page_keys = Enum.map(resp.body["rows"], fn row -> row["key"] end) + + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: next_bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert Map.has_key?(resp.body, "previous") + + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: resp.body["previous"]} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + keys = Enum.map(resp.body["rows"], fn row -> row["key"] end) + assert first_page_keys == keys + end + end + end + end + + for n <- 10..11 do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view?page_size=#{n}" do + @describetag n_docs: 10 + @describetag descending: false + @describetag page_size: n + setup [:with_session, :random_db, :with_view, :with_docs] + + test "should not return 'next' bookmark", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert not Map.has_key?(resp.body, "next") + end + + test "total_rows matches the length of rows array", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + body = resp.body + assert body["total_rows"] == length(body["rows"]) + end + + test "total_rows less than the requested page_size", ctx do + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{page_size: ctx.page_size, descending: ctx.descending} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert resp.body["total_rows"] <= ctx.page_size + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view/queries?page_size=#{ + n + }&descending=#{descending} : pages" do + @describetag n_docs: 10 + @describetag descending: descending + @describetag page_size: n + + @describetag queries: %{ + queries: [ + %{ + descending: true + }, + %{ + limit: n + 1, + skip: 2 + } + ] + } + setup [:with_session, :random_db, :with_view, :with_docs] + + test "one of the results contains 'next' bookmark", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "each 'next' bookmark is working", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + bookmarks = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> Map.get(result, "next") end) + + assert [] != bookmarks + + Enum.each(bookmarks, fn bookmark -> + resp = + Couch.Session.get( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}", + query: %{bookmark: bookmark} + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + assert [] != resp.body["rows"] + end) + + assert Enum.any?(results, fn result -> Map.has_key?(result, "next") end) + end + + test "can post bookmarks to queries", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + queries = + results + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert [] != result["rows"] + end) + end + + test "respect request page_size", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + results = resp.body["results"] + + Enum.each(results ++ resp.body["results"], fn result -> + assert length(result["rows"]) <= ctx.page_size + end) + end + + test "independent page_size in the bookmark", ctx do + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + queries = + resp.body["results"] + |> Enum.filter(fn result -> Map.has_key?(result, "next") end) + |> Enum.map(fn result -> %{bookmark: Map.get(result, "next")} end) + + resp = + Couch.Session.post( + ctx.session, + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + body: :jiffy.encode(%{queries: queries}) + ) + + assert resp.status_code == 200, "got error #{inspect(resp.body)}" + + Enum.each(resp.body["results"], fn result -> + assert length(result["rows"]) > ctx.page_size + end) + end + + test "can retrieve all pages", ctx do + [descending_query, limit_query] = + paginate_queries( + ctx, + url: + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + results = List.flatten(descending_query) + assert ctx.n_docs == length(results) + expected_key_order = :descending + expected_ids_order = :ascending + + assert expected_key_order == ordering?(results, "key"), + "expecting keys in #{expected_key_order} order, got: #{ + inspect(field(results, "key")) + }" + + assert expected_ids_order == ordering?(results, "id"), + "expecting ids in #{expected_ids_order} order, got: #{ + inspect(field(results, "id")) + }" + + results = List.flatten(limit_query) + [_descendiing_query, query] = ctx.queries[:queries] + + expected_length = + if ctx.n_docs - query.skip > query.limit do + query.limit + else + query.limit - query.skip + end + + assert expected_length == length(results) + + {expected_key_order, expected_ids_order} = + if ctx.descending do + {:descending, :ascending} + else + {:ascending, :descending} + end + + assert expected_key_order == ordering?(results, "key"), + ~s(expecting keys in #{expected_key_order} order, got: #{ + inspect(field(results, "key")) + }) + + assert expected_ids_order == ordering?(results, "id"), + ~s(expecting keys in #{expected_ids_order} order, got: #{ + inspect(field(results, "id")) + }) + + keys = Enum.map(results, &Map.get(&1, "key")) + end + end + end + end + + for descending <- [false, true] do + for n <- [4, 9] do + describe "Pagination API (10 docs) : /{db}/_design/{ddoc}/_view/queries?page_size=#{ + n + }&descending=#{descending} : pages with same key" do + @describetag descending: descending + @describetag n_docs: 10 + @describetag page_size: n + + @describetag queries: %{ + queries: [ + %{ + descending: true + }, + %{ + limit: n + 1, + skip: 2 + } + ] + } + setup [:with_session, :random_db, :with_view, :with_same_key_docs] + + test "handle same key", ctx do + ''' + make sure the results are first sorted by key and then by id + ''' + + [descending_query, limit_query] = + paginate_queries( + ctx, + url: + "/#{ctx.db_name}/_design/#{ctx.ddoc_id}/_view/#{ctx.view_name}/queries", + query: %{page_size: ctx.page_size, descending: ctx.descending}, + body: :jiffy.encode(ctx.queries) + ) + + aggregate = fn pages -> + Enum.reduce(pages, {[], %{}}, fn page, acc -> + Enum.reduce(page, acc, fn row, {keys, in_acc} -> + id = Map.get(row, "id") + key = Map.get(row, "key") + {keys ++ [key], Map.update(in_acc, key, [id], &(&1 ++ [id]))} + end) + end) + end + + {keys, aggregated} = aggregate.(descending_query) + + # keys are sorted in reverse order + assert :descending == ordering?(keys), + ~s(expecting keys in descending order, got: #{inspect(keys)}) + + Enum.each(Map.values(aggregated), fn ids -> + # keys are sorted in reverse order by id + assert :descending == ordering?(ids), + ~s(expecting ids in descending order, got: #{inspect(ids)}) + end) + + {keys, aggregated} = aggregate.(limit_query) + + {expected_key_order, expected_ids_order} = + if ctx.descending do + {:descending, :descending} + else + {:ascending, :ascending} + end + + # keys are sorted + assert expected_key_order == ordering?(keys) or :equal == ordering?(keys), + ~s(expecting keys in #{expected_key_order} order, got: #{inspect(keys)}) + + Enum.each(Map.values(aggregated), fn ids -> + # Keys are sorted by id + assert expected_ids_order == ordering?(ids) or :equal == ordering?(ids), + ~s(expecting ids in #{expected_ids_order} order, got: #{inspect(ids)}) + end) + end + end + end + end + + defp ordering?(maps, key) do + ordering?(field(maps, key)) + end + + defp ordering?(elements) do + ascending = Enum.sort(elements) + descending = Enum.reverse(Enum.sort(elements)) + + case {ascending, descending} do + {^elements, ^elements} -> :equal + {^elements, _} -> :ascending + {_, ^descending} -> :descending + _ -> :unordered + end + end + + defp field(maps, key) do + Enum.map(maps, &Map.get(&1, key)) + end +end diff --git a/src/chttpd/test/exunit/test_helper.exs b/src/chttpd/test/exunit/test_helper.exs new file mode 100644 index 000000000..314050085 --- /dev/null +++ b/src/chttpd/test/exunit/test_helper.exs @@ -0,0 +1,2 @@ +ExUnit.configure(formatters: [JUnitFormatter, ExUnit.CLIFormatter]) +ExUnit.start() diff --git a/src/chttpd/test/exunit/tracing_test.exs b/src/chttpd/test/exunit/tracing_test.exs new file mode 100644 index 000000000..f66fb87a2 --- /dev/null +++ b/src/chttpd/test/exunit/tracing_test.exs @@ -0,0 +1,101 @@ +defmodule Couch.Test.OpenTracing do + use Couch.Test.ExUnit.Case + alias Couch.Test.Setup + alias Couch.Test.Setup.Step + alias Couch.Test.Utils + import Couch.DBTest, only: [retry_until: 1] + + defp create_admin(user_name, password) do + hashed = String.to_charlist(:couch_passwords.hash_admin_password(password)) + :config.set('admins', String.to_charlist(user_name), hashed, false) + end + + defp base_url() do + addr = :config.get('chttpd', 'bind_address', '127.0.0.1') + port = :mochiweb_socket_server.get(:chttpd, :port) + "http://#{addr}:#{port}" + end + + setup_all context do + test_ctx = :test_util.start_couch([:chttpd]) + :ok = create_admin("adm", "pass") + + Map.merge(context, %{ + base_url: base_url(), + user: "adm", + pass: "pass" + }) + end + + setup context do + db_name = Utils.random_name("db") + session = Couch.login(context.user, context.pass, base_url: context.base_url) + + on_exit(fn -> + delete_db(session, db_name) + end) + + create_db(session, db_name) + + Map.merge(context, %{ + db_name: db_name, + session: session + }) + end + + def create_db(session, db_name, opts \\ []) do + retry_until(fn -> + resp = Couch.Session.put(session, "/#{db_name}", opts) + assert resp.status_code in [201, 202] + assert resp.body == %{"ok" => true} + {:ok, resp} + end) + end + + def delete_db(session, db_name) do + retry_until(fn -> + resp = Couch.Session.delete(session, "/#{db_name}") + assert resp.status_code in [200, 202, 404] + {:ok, resp} + end) + end + + def create_doc(session, db_name, body) do + retry_until(fn -> + resp = Couch.Session.post(session, "/#{db_name}", body: body) + assert resp.status_code in [201, 202] + assert resp.body["ok"] + {:ok, resp} + end) + end + + defp trace_id() do + :couch_util.to_hex(:crypto.strong_rand_bytes(16)) + end + + defp span_id() do + :couch_util.to_hex(:crypto.strong_rand_bytes(8)) + end + + describe "Open Tracing" do + test "should return success with combined b3 header", ctx do + %{session: session, db_name: db_name} = ctx + doc = '{"mr": "rockoartischocko"}' + {:ok, _} = create_doc(session, db_name, doc) + + resp = + retry_until(fn -> + b3 = "#{trace_id()}-#{span_id()}-#{span_id()}" + + response = + Couch.Session.get(session, "/#{db_name}/_all_docs", headers: [b3: b3]) + + assert %HTTPotion.Response{} = response + response + end) + + assert resp.status_code == 200, "Expected 200, got: #{resp.status_code}" + assert length(resp.body["rows"]) == 1 + end + end +end diff --git a/src/couch/.gitignore b/src/couch/.gitignore index e1fa65333..861974adb 100644 --- a/src/couch/.gitignore +++ b/src/couch/.gitignore @@ -19,3 +19,5 @@ test/engines/log/ .rebar/ .eunit + +rebar.config diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl index 830b9bcf4..cc1fb5def 100644 --- a/src/couch/include/couch_db.hrl +++ b/src/couch/include/couch_db.hrl @@ -219,3 +219,6 @@ -type sec_props() :: [tuple()]. -type sec_obj() :: {sec_props()}. + +-define(record_to_keyval(Name, Record), + lists:zip(record_info(fields, Name), tl(tuple_to_list(Record)))). diff --git a/src/couch/include/couch_eunit.hrl b/src/couch/include/couch_eunit.hrl index d3611c88b..188524893 100644 --- a/src/couch/include/couch_eunit.hrl +++ b/src/couch/include/couch_eunit.hrl @@ -49,6 +49,11 @@ Suffix = couch_uuids:random(), iolist_to_binary(["eunit-test-db-", Suffix]) end). +-define(tempshard, + fun() -> + Suffix = couch_uuids:random(), + iolist_to_binary(["shards/80000000-ffffffff/eunit-test-db-", Suffix]) + end). -define(docid, fun() -> integer_to_list(couch_util:unique_monotonic_integer()) diff --git a/src/couch/priv/couch_js/1.8.5/help.h b/src/couch/priv/couch_js/1.8.5/help.h index 678651fd3..3a19901f0 100644 --- a/src/couch/priv/couch_js/1.8.5/help.h +++ b/src/couch/priv/couch_js/1.8.5/help.h @@ -16,7 +16,7 @@ #include "config.h" static const char VERSION_TEMPLATE[] = - "%s - %s\n" + "%s - %s (SpiderMonkey 1.8.5)\n" "\n" "Licensed under the Apache License, Version 2.0 (the \"License\"); you may " "not use\n" @@ -46,15 +46,9 @@ static const char USAGE_TEMPLATE[] = "\n" " -h display a short help message and exit\n" " -V display version information and exit\n" - " -H enable %s cURL bindings (only avaiable\n" - " if package was built with cURL available)\n" - " -T enable test suite specific functions (these\n" - " should not be enabled for production systems)\n" " -S SIZE specify that the runtime should allow at\n" " most SIZE bytes of memory to be allocated\n" " default is 64 MiB\n" - " -u FILE path to a .uri file containing the address\n" - " (or addresses) of one or more servers\n" " --eval Enable runtime code evaluation (dangerous!)\n" "\n" "Report bugs at <%s>.\n"; @@ -78,7 +72,6 @@ static const char USAGE_TEMPLATE[] = basename, \ basename, \ PACKAGE_NAME, \ - basename, \ PACKAGE_BUGREPORT) #define DISPLAY_USAGE couch_usage(BASENAME) diff --git a/src/couch/priv/couch_js/1.8.5/http.c b/src/couch/priv/couch_js/1.8.5/http.c deleted file mode 100644 index c4b389659..000000000 --- a/src/couch/priv/couch_js/1.8.5/http.c +++ /dev/null @@ -1,701 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <jsapi.h> -#include "config.h" -#include "utf8.h" -#include "util.h" - -// Soft dependency on cURL bindings because they're -// only used when running the JS tests from the -// command line which is rare. -#ifndef HAVE_CURL - -void -http_check_enabled() -{ - fprintf(stderr, "HTTP API was disabled at compile time.\n"); - exit(3); -} - - -JSBool -http_ctor(JSContext* cx, JSObject* req) -{ - return JS_FALSE; -} - - -JSBool -http_dtor(JSContext* cx, JSObject* req) -{ - return JS_FALSE; -} - - -JSBool -http_open(JSContext* cx, JSObject* req, jsval mth, jsval url, jsval snc) -{ - return JS_FALSE; -} - - -JSBool -http_set_hdr(JSContext* cx, JSObject* req, jsval name, jsval val) -{ - return JS_FALSE; -} - - -JSBool -http_send(JSContext* cx, JSObject* req, jsval body) -{ - return JS_FALSE; -} - - -int -http_status(JSContext* cx, JSObject* req, jsval body) -{ - return -1; -} - -JSBool -http_uri(JSContext* cx, JSObject* req, couch_args* args, jsval* uri_val) -{ - return JS_FALSE; -} - - -#else -#include <curl/curl.h> -#ifndef XP_WIN -#include <unistd.h> -#endif - - -void -http_check_enabled() -{ - return; -} - - -// Map some of the string function names to things which exist on Windows -#ifdef XP_WIN -#define strcasecmp _strcmpi -#define strncasecmp _strnicmp -#define snprintf _snprintf -#endif - - -typedef struct curl_slist CurlHeaders; - - -typedef struct { - int method; - char* url; - CurlHeaders* req_headers; - jsint last_status; -} HTTPData; - - -char* METHODS[] = {"GET", "HEAD", "POST", "PUT", "DELETE", "COPY", "OPTIONS", NULL}; - - -#define GET 0 -#define HEAD 1 -#define POST 2 -#define PUT 3 -#define DELETE 4 -#define COPY 5 -#define OPTIONS 6 - - -static JSBool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t blen); - - -static JSString* -str_from_binary(JSContext* cx, char* data, size_t length); - - -JSBool -http_ctor(JSContext* cx, JSObject* req) -{ - HTTPData* http = NULL; - JSBool ret = JS_FALSE; - - http = (HTTPData*) malloc(sizeof(HTTPData)); - if(!http) - { - JS_ReportError(cx, "Failed to create CouchHTTP instance."); - goto error; - } - - http->method = -1; - http->url = NULL; - http->req_headers = NULL; - http->last_status = -1; - - if(!JS_SetPrivate(cx, req, http)) - { - JS_ReportError(cx, "Failed to set private CouchHTTP data."); - goto error; - } - - ret = JS_TRUE; - goto success; - -error: - if(http) free(http); - -success: - return ret; -} - - -void -http_dtor(JSContext* cx, JSObject* obj) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(cx, obj); - if(http) { - if(http->url) free(http->url); - if(http->req_headers) curl_slist_free_all(http->req_headers); - free(http); - } -} - - -JSBool -http_open(JSContext* cx, JSObject* req, jsval mth, jsval url, jsval snc) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(cx, req); - char* method = NULL; - int methid; - JSBool ret = JS_FALSE; - - if(!http) { - JS_ReportError(cx, "Invalid CouchHTTP instance."); - goto done; - } - - if(JSVAL_IS_VOID(mth)) { - JS_ReportError(cx, "You must specify a method."); - goto done; - } - - method = enc_string(cx, mth, NULL); - if(!method) { - JS_ReportError(cx, "Failed to encode method."); - goto done; - } - - for(methid = 0; METHODS[methid] != NULL; methid++) { - if(strcasecmp(METHODS[methid], method) == 0) break; - } - - if(methid > OPTIONS) { - JS_ReportError(cx, "Invalid method specified."); - goto done; - } - - http->method = methid; - - if(JSVAL_IS_VOID(url)) { - JS_ReportError(cx, "You must specify a URL."); - goto done; - } - - if(http->url != NULL) { - free(http->url); - http->url = NULL; - } - - http->url = enc_string(cx, url, NULL); - if(http->url == NULL) { - JS_ReportError(cx, "Failed to encode URL."); - goto done; - } - - if(JSVAL_IS_BOOLEAN(snc) && JSVAL_TO_BOOLEAN(snc)) { - JS_ReportError(cx, "Synchronous flag must be false."); - goto done; - } - - if(http->req_headers) { - curl_slist_free_all(http->req_headers); - http->req_headers = NULL; - } - - // Disable Expect: 100-continue - http->req_headers = curl_slist_append(http->req_headers, "Expect:"); - - ret = JS_TRUE; - -done: - if(method) free(method); - return ret; -} - - -JSBool -http_set_hdr(JSContext* cx, JSObject* req, jsval name, jsval val) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(cx, req); - char* keystr = NULL; - char* valstr = NULL; - char* hdrbuf = NULL; - size_t hdrlen = -1; - JSBool ret = JS_FALSE; - - if(!http) { - JS_ReportError(cx, "Invalid CouchHTTP instance."); - goto done; - } - - if(JSVAL_IS_VOID(name)) - { - JS_ReportError(cx, "You must speciy a header name."); - goto done; - } - - keystr = enc_string(cx, name, NULL); - if(!keystr) - { - JS_ReportError(cx, "Failed to encode header name."); - goto done; - } - - if(JSVAL_IS_VOID(val)) - { - JS_ReportError(cx, "You must specify a header value."); - goto done; - } - - valstr = enc_string(cx, val, NULL); - if(!valstr) - { - JS_ReportError(cx, "Failed to encode header value."); - goto done; - } - - hdrlen = strlen(keystr) + strlen(valstr) + 3; - hdrbuf = (char*) malloc(hdrlen * sizeof(char)); - if(!hdrbuf) { - JS_ReportError(cx, "Failed to allocate header buffer."); - goto done; - } - - snprintf(hdrbuf, hdrlen, "%s: %s", keystr, valstr); - http->req_headers = curl_slist_append(http->req_headers, hdrbuf); - - ret = JS_TRUE; - -done: - if(keystr) free(keystr); - if(valstr) free(valstr); - if(hdrbuf) free(hdrbuf); - return ret; -} - -JSBool -http_send(JSContext* cx, JSObject* req, jsval body) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(cx, req); - char* bodystr = NULL; - size_t bodylen = 0; - JSBool ret = JS_FALSE; - - if(!http) { - JS_ReportError(cx, "Invalid CouchHTTP instance."); - goto done; - } - - if(!JSVAL_IS_VOID(body)) { - bodystr = enc_string(cx, body, &bodylen); - if(!bodystr) { - JS_ReportError(cx, "Failed to encode body."); - goto done; - } - } - - ret = go(cx, req, http, bodystr, bodylen); - -done: - if(bodystr) free(bodystr); - return ret; -} - -int -http_status(JSContext* cx, JSObject* req) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(cx, req); - - if(!http) { - JS_ReportError(cx, "Invalid CouchHTTP instance."); - return JS_FALSE; - } - - return http->last_status; -} - -JSBool -http_uri(JSContext* cx, JSObject* req, couch_args* args, jsval* uri_val) -{ - FILE* uri_fp = NULL; - JSString* uri_str; - - // Default is http://localhost:15986/ when no uri file is specified - if (!args->uri_file) { - uri_str = JS_InternString(cx, "http://localhost:15986/"); - *uri_val = STRING_TO_JSVAL(uri_str); - JS_SetReservedSlot(cx, req, 0, *uri_val); - return JS_TRUE; - } - - // Else check to see if the base url is cached in a reserved slot - if (JS_GetReservedSlot(cx, req, 0, uri_val) && !JSVAL_IS_VOID(*uri_val)) { - return JS_TRUE; - } - - // Read the first line of the couch.uri file. - if(!((uri_fp = fopen(args->uri_file, "r")) && - (uri_str = couch_readline(cx, uri_fp)))) { - JS_ReportError(cx, "Failed to read couch.uri file."); - goto error; - } - - fclose(uri_fp); - *uri_val = STRING_TO_JSVAL(uri_str); - JS_SetReservedSlot(cx, req, 0, *uri_val); - return JS_TRUE; - -error: - if(uri_fp) fclose(uri_fp); - return JS_FALSE; -} - - -// Curl Helpers - -typedef struct { - HTTPData* http; - JSContext* cx; - JSObject* resp_headers; - char* sendbuf; - size_t sendlen; - size_t sent; - int sent_once; - char* recvbuf; - size_t recvlen; - size_t read; -} CurlState; - -/* - * I really hate doing this but this doesn't have to be - * uber awesome, it just has to work. - */ -CURL* HTTP_HANDLE = NULL; -char ERRBUF[CURL_ERROR_SIZE]; - -static size_t send_body(void *ptr, size_t size, size_t nmem, void *data); -static int seek_body(void *ptr, curl_off_t offset, int origin); -static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data); -static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data); - -static JSBool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) -{ - CurlState state; - char* referer; - JSString* jsbody; - JSBool ret = JS_FALSE; - jsval tmp; - - state.cx = cx; - state.http = http; - - state.sendbuf = body; - state.sendlen = bodylen; - state.sent = 0; - state.sent_once = 0; - - state.recvbuf = NULL; - state.recvlen = 0; - state.read = 0; - - if(HTTP_HANDLE == NULL) { - HTTP_HANDLE = curl_easy_init(); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_READFUNCTION, send_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKFUNCTION, - (curl_seek_callback) seek_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_HEADERFUNCTION, recv_header); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEFUNCTION, recv_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOPROGRESS, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_ERRORBUFFER, ERRBUF); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_COOKIEFILE, ""); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_USERAGENT, - "CouchHTTP Client - Relax"); - } - - if(!HTTP_HANDLE) { - JS_ReportError(cx, "Failed to initialize cURL handle."); - goto done; - } - - if(!JS_GetReservedSlot(cx, obj, 0, &tmp)) { - JS_ReportError(cx, "Failed to readreserved slot."); - goto done; - } - - if(!(referer = enc_string(cx, tmp, NULL))) { - JS_ReportError(cx, "Failed to encode referer."); - goto done; - } - curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer); - free(referer); - - if(http->method < 0 || http->method > OPTIONS) { - JS_ReportError(cx, "INTERNAL: Unknown method."); - goto done; - } - - curl_easy_setopt(HTTP_HANDLE, CURLOPT_CUSTOMREQUEST, METHODS[http->method]); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOBODY, 0); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_UPLOAD, 0); - - if(http->method == HEAD) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOBODY, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); - } else if(http->method == POST || http->method == PUT) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_UPLOAD, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); - } - - if(body && bodylen) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, bodylen); - } else { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, 0); - } - - // curl_easy_setopt(HTTP_HANDLE, CURLOPT_VERBOSE, 1); - - curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_HTTPHEADER, http->req_headers); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_READDATA, &state); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKDATA, &state); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEHEADER, &state); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEDATA, &state); - - if(curl_easy_perform(HTTP_HANDLE) != 0) { - JS_ReportError(cx, "Failed to execute HTTP request: %s", ERRBUF); - goto done; - } - - if(!state.resp_headers) { - JS_ReportError(cx, "Failed to recieve HTTP headers."); - goto done; - } - - tmp = OBJECT_TO_JSVAL(state.resp_headers); - if(!JS_DefineProperty( - cx, obj, - "_headers", - tmp, - NULL, NULL, - JSPROP_READONLY - )) { - JS_ReportError(cx, "INTERNAL: Failed to set response headers."); - goto done; - } - - if(state.recvbuf) { - state.recvbuf[state.read] = '\0'; - jsbody = dec_string(cx, state.recvbuf, state.read+1); - if(!jsbody) { - // If we can't decode the body as UTF-8 we forcefully - // convert it to a string by just forcing each byte - // to a jschar. - jsbody = str_from_binary(cx, state.recvbuf, state.read); - if(!jsbody) { - if(!JS_IsExceptionPending(cx)) { - JS_ReportError(cx, "INTERNAL: Failed to decode body."); - } - goto done; - } - } - tmp = STRING_TO_JSVAL(jsbody); - } else { - tmp = JS_GetEmptyStringValue(cx); - } - - if(!JS_DefineProperty( - cx, obj, - "responseText", - tmp, - NULL, NULL, - JSPROP_READONLY - )) { - JS_ReportError(cx, "INTERNAL: Failed to set responseText."); - goto done; - } - - ret = JS_TRUE; - -done: - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; -} - -static size_t -send_body(void *ptr, size_t size, size_t nmem, void *data) -{ - CurlState* state = (CurlState*) data; - size_t length = size * nmem; - size_t towrite = state->sendlen - state->sent; - - // Assume this is cURL trying to resend a request that - // failed. - if(towrite == 0 && state->sent_once == 0) { - state->sent_once = 1; - return 0; - } else if(towrite == 0) { - state->sent = 0; - state->sent_once = 0; - towrite = state->sendlen; - } - - if(length < towrite) towrite = length; - - memcpy(ptr, state->sendbuf + state->sent, towrite); - state->sent += towrite; - - return towrite; -} - -static int -seek_body(void* ptr, curl_off_t offset, int origin) -{ - CurlState* state = (CurlState*) ptr; - if(origin != SEEK_SET) return -1; - - state->sent = (size_t) offset; - return (int) state->sent; -} - -static size_t -recv_header(void *ptr, size_t size, size_t nmem, void *data) -{ - CurlState* state = (CurlState*) data; - char code[4]; - char* header = (char*) ptr; - size_t length = size * nmem; - JSString* hdr = NULL; - jsuint hdrlen; - jsval hdrval; - - if(length > 7 && strncasecmp(header, "HTTP/1.", 7) == 0) { - if(length < 12) { - return CURLE_WRITE_ERROR; - } - - memcpy(code, header+9, 3*sizeof(char)); - code[3] = '\0'; - state->http->last_status = atoi(code); - - state->resp_headers = JS_NewArrayObject(state->cx, 0, NULL); - if(!state->resp_headers) { - return CURLE_WRITE_ERROR; - } - - return length; - } - - // We get a notice at the \r\n\r\n after headers. - if(length <= 2) { - return length; - } - - // Append the new header to our array. - hdr = dec_string(state->cx, header, length); - if(!hdr) { - return CURLE_WRITE_ERROR; - } - - if(!JS_GetArrayLength(state->cx, state->resp_headers, &hdrlen)) { - return CURLE_WRITE_ERROR; - } - - hdrval = STRING_TO_JSVAL(hdr); - if(!JS_SetElement(state->cx, state->resp_headers, hdrlen, &hdrval)) { - return CURLE_WRITE_ERROR; - } - - return length; -} - -static size_t -recv_body(void *ptr, size_t size, size_t nmem, void *data) -{ - CurlState* state = (CurlState*) data; - size_t length = size * nmem; - char* tmp = NULL; - - if(!state->recvbuf) { - state->recvlen = 4096; - state->read = 0; - state->recvbuf = JS_malloc(state->cx, state->recvlen); - } - - if(!state->recvbuf) { - return CURLE_WRITE_ERROR; - } - - // +1 so we can add '\0' back up in the go function. - while(length+1 > state->recvlen - state->read) state->recvlen *= 2; - tmp = JS_realloc(state->cx, state->recvbuf, state->recvlen); - if(!tmp) return CURLE_WRITE_ERROR; - state->recvbuf = tmp; - - memcpy(state->recvbuf + state->read, ptr, length); - state->read += length; - return length; -} - -JSString* -str_from_binary(JSContext* cx, char* data, size_t length) -{ - jschar* conv = (jschar*) JS_malloc(cx, length * sizeof(jschar)); - JSString* ret = NULL; - size_t i; - - if(!conv) return NULL; - - for(i = 0; i < length; i++) { - conv[i] = (jschar) data[i]; - } - - ret = JS_NewUCString(cx, conv, length); - if(!ret) JS_free(cx, conv); - - return ret; -} - -#endif /* HAVE_CURL */ diff --git a/src/couch/priv/couch_js/1.8.5/http.h b/src/couch/priv/couch_js/1.8.5/http.h deleted file mode 100644 index 63d45bd06..000000000 --- a/src/couch/priv/couch_js/1.8.5/http.h +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#ifndef COUCH_JS_HTTP_H -#define COUCH_JS_HTTP_H - -#include "util.h" - -void http_check_enabled(); -JSBool http_ctor(JSContext* cx, JSObject* req); -void http_dtor(JSContext* cx, JSObject* req); -JSBool http_open(JSContext* cx, JSObject* req, jsval mth, jsval url, jsval snc); -JSBool http_set_hdr(JSContext* cx, JSObject* req, jsval name, jsval val); -JSBool http_send(JSContext* cx, JSObject* req, jsval body); -int http_status(JSContext* cx, JSObject* req); -JSBool http_uri(JSContext* cx, JSObject *req, couch_args* args, jsval* uri); - -#endif diff --git a/src/couch/priv/couch_js/1.8.5/main.c b/src/couch/priv/couch_js/1.8.5/main.c index 986791c90..c8e385cc9 100644 --- a/src/couch/priv/couch_js/1.8.5/main.c +++ b/src/couch/priv/couch_js/1.8.5/main.c @@ -22,7 +22,6 @@ #include <jsapi.h> #include "config.h" -#include "http.h" #include "utf8.h" #include "util.h" @@ -49,105 +48,6 @@ static JSClass global_class = { JSCLASS_NO_OPTIONAL_MEMBERS }; - -static JSBool -req_ctor(JSContext* cx, uintN argc, jsval* vp) -{ - JSBool ret; - JSObject* obj = JS_NewObjectForConstructor(cx, vp); - if(!obj) { - JS_ReportError(cx, "Failed to create CouchHTTP instance.\n"); - return JS_FALSE; - } - ret = http_ctor(cx, obj); - JS_SET_RVAL(cx, vp, OBJECT_TO_JSVAL(obj)); - return ret; -} - - -static void -req_dtor(JSContext* cx, JSObject* obj) -{ - http_dtor(cx, obj); -} - - -static JSBool -req_open(JSContext* cx, uintN argc, jsval* vp) -{ - JSObject* obj = JS_THIS_OBJECT(cx, vp); - jsval* argv = JS_ARGV(cx, vp); - JSBool ret = JS_FALSE; - - if(argc == 2) { - ret = http_open(cx, obj, argv[0], argv[1], JSVAL_FALSE); - } else if(argc == 3) { - ret = http_open(cx, obj, argv[0], argv[1], argv[2]); - } else { - JS_ReportError(cx, "Invalid call to CouchHTTP.open"); - } - - JS_SET_RVAL(cx, vp, JSVAL_VOID); - return ret; -} - - -static JSBool -req_set_hdr(JSContext* cx, uintN argc, jsval* vp) -{ - JSObject* obj = JS_THIS_OBJECT(cx, vp); - jsval* argv = JS_ARGV(cx, vp); - JSBool ret = JS_FALSE; - - if(argc == 2) { - ret = http_set_hdr(cx, obj, argv[0], argv[1]); - } else { - JS_ReportError(cx, "Invalid call to CouchHTTP.set_header"); - } - - JS_SET_RVAL(cx, vp, JSVAL_VOID); - return ret; -} - - -static JSBool -req_send(JSContext* cx, uintN argc, jsval* vp) -{ - JSObject* obj = JS_THIS_OBJECT(cx, vp); - jsval* argv = JS_ARGV(cx, vp); - JSBool ret = JS_FALSE; - - if(argc == 1) { - ret = http_send(cx, obj, argv[0]); - } else { - JS_ReportError(cx, "Invalid call to CouchHTTP.send"); - } - - JS_SET_RVAL(cx, vp, JSVAL_VOID); - return ret; -} - - -static JSBool -req_status(JSContext* cx, JSObject* obj, jsid pid, jsval* vp) -{ - int status = http_status(cx, obj); - if(status < 0) - return JS_FALSE; - - JS_SET_RVAL(cx, vp, INT_TO_JSVAL(status)); - return JS_TRUE; -} - - -static JSBool -base_url(JSContext *cx, JSObject* obj, jsid pid, jsval* vp) -{ - couch_args *args = (couch_args*)JS_GetContextPrivate(cx); - return http_uri(cx, obj, args, &JS_RVAL(cx, vp)); -} - - static JSBool evalcx(JSContext *cx, uintN argc, jsval* vp) { @@ -281,63 +181,6 @@ seal(JSContext* cx, uintN argc, jsval* vp) } -static JSBool -js_sleep(JSContext* cx, uintN argc, jsval* vp) -{ - jsval* argv = JS_ARGV(cx, vp); - int duration = 0; - if(!JS_ConvertArguments(cx, argc, argv, "/i", &duration)) { - return JS_FALSE; - } - -#ifdef XP_WIN - Sleep(duration); -#else - usleep(duration * 1000); -#endif - - return JS_TRUE; -} - - -JSClass CouchHTTPClass = { - "CouchHTTP", - JSCLASS_HAS_PRIVATE - | JSCLASS_CONSTRUCT_PROTOTYPE - | JSCLASS_HAS_RESERVED_SLOTS(2), - JS_PropertyStub, - JS_PropertyStub, - JS_PropertyStub, - JS_StrictPropertyStub, - JS_EnumerateStub, - JS_ResolveStub, - JS_ConvertStub, - req_dtor, - JSCLASS_NO_OPTIONAL_MEMBERS -}; - - -JSPropertySpec CouchHTTPProperties[] = { - {"status", 0, JSPROP_READONLY, req_status, NULL}, - {"base_url", 0, JSPROP_READONLY | JSPROP_SHARED, base_url, NULL}, - {0, 0, 0, 0, 0} -}; - - -JSFunctionSpec CouchHTTPFunctions[] = { - JS_FS("_open", req_open, 3, 0), - JS_FS("_setRequestHeader", req_set_hdr, 2, 0), - JS_FS("_send", req_send, 1, 0), - JS_FS_END -}; - - -JSFunctionSpec TestSuiteFunctions[] = { - JS_FS("sleep", js_sleep, 1, 0), - JS_FS_END -}; - - static JSFunctionSpec global_functions[] = { JS_FS("evalcx", evalcx, 0, 0), JS_FS("gc", gc, 0, 0), @@ -376,7 +219,6 @@ main(int argc, const char* argv[]) JSContext* cx = NULL; JSObject* global = NULL; JSCrossCompartmentCall *call = NULL; - JSObject* klass = NULL; JSSCRIPT_TYPE script; JSString* scriptsrc; const jschar* schars; @@ -420,30 +262,6 @@ main(int argc, const char* argv[]) if(couch_load_funcs(cx, global, global_functions) != JS_TRUE) return 1; - if(args->use_http) { - http_check_enabled(); - - klass = JS_InitClass( - cx, global, - NULL, - &CouchHTTPClass, req_ctor, - 0, - CouchHTTPProperties, CouchHTTPFunctions, - NULL, NULL - ); - - if(!klass) - { - fprintf(stderr, "Failed to initialize CouchHTTP class.\n"); - exit(2); - } - } - - if(args->use_test_funs) { - if(couch_load_funcs(cx, global, TestSuiteFunctions) != JS_TRUE) - return 1; - } - for(i = 0 ; args->scripts[i] ; i++) { // Convert script source to jschars. scriptsrc = couch_readfile(cx, args->scripts[i]); diff --git a/src/couch/priv/couch_js/1.8.5/util.c b/src/couch/priv/couch_js/1.8.5/util.c index cf676ea33..5cf94b63a 100644 --- a/src/couch/priv/couch_js/1.8.5/util.c +++ b/src/couch/priv/couch_js/1.8.5/util.c @@ -96,8 +96,6 @@ couch_parse_args(int argc, const char* argv[]) fprintf(stderr, "Invalid stack size.\n"); exit(2); } - } else if(strcmp("-u", argv[i]) == 0) { - args->uri_file = argv[++i]; } else if(strcmp("--eval", argv[i]) == 0) { args->eval = 1; } else if(strcmp("--", argv[i]) == 0) { diff --git a/src/couch/priv/couch_js/1.8.5/util.h b/src/couch/priv/couch_js/1.8.5/util.h index b24d7f76f..9dd290a4c 100644 --- a/src/couch/priv/couch_js/1.8.5/util.h +++ b/src/couch/priv/couch_js/1.8.5/util.h @@ -21,8 +21,6 @@ typedef struct { int use_test_funs; int stack_size; const char** scripts; - const char* uri_file; - JSString* uri; } couch_args; couch_args* couch_parse_args(int argc, const char* argv[]); diff --git a/src/couch/priv/couch_js/60/help.h b/src/couch/priv/couch_js/60/help.h index 678651fd3..826babbba 100644 --- a/src/couch/priv/couch_js/60/help.h +++ b/src/couch/priv/couch_js/60/help.h @@ -16,7 +16,7 @@ #include "config.h" static const char VERSION_TEMPLATE[] = - "%s - %s\n" + "%s - %s (SpiderMonkey 60)\n" "\n" "Licensed under the Apache License, Version 2.0 (the \"License\"); you may " "not use\n" @@ -46,15 +46,9 @@ static const char USAGE_TEMPLATE[] = "\n" " -h display a short help message and exit\n" " -V display version information and exit\n" - " -H enable %s cURL bindings (only avaiable\n" - " if package was built with cURL available)\n" - " -T enable test suite specific functions (these\n" - " should not be enabled for production systems)\n" " -S SIZE specify that the runtime should allow at\n" " most SIZE bytes of memory to be allocated\n" " default is 64 MiB\n" - " -u FILE path to a .uri file containing the address\n" - " (or addresses) of one or more servers\n" " --eval Enable runtime code evaluation (dangerous!)\n" "\n" "Report bugs at <%s>.\n"; @@ -78,7 +72,6 @@ static const char USAGE_TEMPLATE[] = basename, \ basename, \ PACKAGE_NAME, \ - basename, \ PACKAGE_BUGREPORT) #define DISPLAY_USAGE couch_usage(BASENAME) diff --git a/src/couch/priv/couch_js/60/http.cpp b/src/couch/priv/couch_js/60/http.cpp deleted file mode 100644 index 9ab47b2f0..000000000 --- a/src/couch/priv/couch_js/60/http.cpp +++ /dev/null @@ -1,707 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <jsapi.h> -#include <js/Initialization.h> -#include "config.h" -#include "utf8.h" -#include "util.h" - -// Soft dependency on cURL bindings because they're -// only used when running the JS tests from the -// command line which is rare. -#ifndef HAVE_CURL - -void -http_check_enabled() -{ - fprintf(stderr, "HTTP API was disabled at compile time.\n"); - exit(3); -} - - -bool -http_ctor(JSContext* cx, JSObject* req) -{ - return false; -} - - -void -http_dtor(JSFreeOp* fop, JSObject* req) -{ - return; -} - - -bool -http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc) -{ - return false; -} - - -bool -http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val) -{ - return false; -} - - -bool -http_send(JSContext* cx, JSObject* req, JS::Value body) -{ - return false; -} - - -int -http_status(JSContext* cx, JSObject* req) -{ - return -1; -} - -bool -http_uri(JSContext* cx, JSObject* req, couch_args* args, JS::Value* uri_val) -{ - return false; -} - - -#else -#include <curl/curl.h> -#ifndef XP_WIN -#include <unistd.h> -#endif - - -void -http_check_enabled() -{ - return; -} - - -// Map some of the string function names to things which exist on Windows -#ifdef XP_WIN -#define strcasecmp _strcmpi -#define strncasecmp _strnicmp -#define snprintf _snprintf -#endif - - -typedef struct curl_slist CurlHeaders; - - -typedef struct { - int method; - char* url; - CurlHeaders* req_headers; - int16_t last_status; -} HTTPData; - - -const char* METHODS[] = {"GET", "HEAD", "POST", "PUT", "DELETE", "COPY", "OPTIONS", NULL}; - - -#define GET 0 -#define HEAD 1 -#define POST 2 -#define PUT 3 -#define DELETE 4 -#define COPY 5 -#define OPTIONS 6 - - -static bool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t blen); - - -static JSString* -str_from_binary(JSContext* cx, char* data, size_t length); - - -bool -http_ctor(JSContext* cx, JSObject* req) -{ - HTTPData* http = NULL; - bool ret = false; - - http = (HTTPData*) malloc(sizeof(HTTPData)); - if(!http) - { - JS_ReportErrorUTF8(cx, "Failed to create CouchHTTP instance."); - goto error; - } - - http->method = -1; - http->url = NULL; - http->req_headers = NULL; - http->last_status = -1; - - JS_SetPrivate(req, http); - - ret = true; - goto success; - -error: - if(http) free(http); - -success: - return ret; -} - - -void -http_dtor(JSFreeOp* fop, JSObject* obj) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(obj); - if(http) { - if(http->url) free(http->url); - if(http->req_headers) curl_slist_free_all(http->req_headers); - free(http); - } -} - - -bool -http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* method = NULL; - int methid; - bool ret = false; - - if(!http) { - JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; - } - - if(mth.isUndefined()) { - JS_ReportErrorUTF8(cx, "You must specify a method."); - goto done; - } - - method = enc_string(cx, mth, NULL); - if(!method) { - JS_ReportErrorUTF8(cx, "Failed to encode method."); - goto done; - } - - for(methid = 0; METHODS[methid] != NULL; methid++) { - if(strcasecmp(METHODS[methid], method) == 0) break; - } - - if(methid > OPTIONS) { - JS_ReportErrorUTF8(cx, "Invalid method specified."); - goto done; - } - - http->method = methid; - - if(url.isUndefined()) { - JS_ReportErrorUTF8(cx, "You must specify a URL."); - goto done; - } - - if(http->url != NULL) { - free(http->url); - http->url = NULL; - } - - http->url = enc_string(cx, url, NULL); - if(http->url == NULL) { - JS_ReportErrorUTF8(cx, "Failed to encode URL."); - goto done; - } - - if(snc.isBoolean() && snc.isTrue()) { - JS_ReportErrorUTF8(cx, "Synchronous flag must be false."); - goto done; - } - - if(http->req_headers) { - curl_slist_free_all(http->req_headers); - http->req_headers = NULL; - } - - // Disable Expect: 100-continue - http->req_headers = curl_slist_append(http->req_headers, "Expect:"); - - ret = true; - -done: - if(method) free(method); - return ret; -} - - -bool -http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* keystr = NULL; - char* valstr = NULL; - char* hdrbuf = NULL; - size_t hdrlen = -1; - bool ret = false; - - if(!http) { - JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; - } - - if(name.isUndefined()) - { - JS_ReportErrorUTF8(cx, "You must speciy a header name."); - goto done; - } - - keystr = enc_string(cx, name, NULL); - if(!keystr) - { - JS_ReportErrorUTF8(cx, "Failed to encode header name."); - goto done; - } - - if(val.isUndefined()) - { - JS_ReportErrorUTF8(cx, "You must specify a header value."); - goto done; - } - - valstr = enc_string(cx, val, NULL); - if(!valstr) - { - JS_ReportErrorUTF8(cx, "Failed to encode header value."); - goto done; - } - - hdrlen = strlen(keystr) + strlen(valstr) + 3; - hdrbuf = (char*) malloc(hdrlen * sizeof(char)); - if(!hdrbuf) { - JS_ReportErrorUTF8(cx, "Failed to allocate header buffer."); - goto done; - } - - snprintf(hdrbuf, hdrlen, "%s: %s", keystr, valstr); - http->req_headers = curl_slist_append(http->req_headers, hdrbuf); - - ret = true; - -done: - if(keystr) free(keystr); - if(valstr) free(valstr); - if(hdrbuf) free(hdrbuf); - return ret; -} - -bool -http_send(JSContext* cx, JSObject* req, JS::Value body) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(req); - char* bodystr = NULL; - size_t bodylen = 0; - bool ret = false; - - if(!http) { - JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - goto done; - } - - if(!body.isUndefined()) { - bodystr = enc_string(cx, body, &bodylen); - if(!bodystr) { - JS_ReportErrorUTF8(cx, "Failed to encode body."); - goto done; - } - } - - ret = go(cx, req, http, bodystr, bodylen); - -done: - if(bodystr) free(bodystr); - return ret; -} - -int -http_status(JSContext* cx, JSObject* req) -{ - HTTPData* http = (HTTPData*) JS_GetPrivate(req); - - if(!http) { - JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance."); - return false; - } - - return http->last_status; -} - -bool -http_uri(JSContext* cx, JSObject* req, couch_args* args, JS::Value* uri_val) -{ - FILE* uri_fp = NULL; - JSString* uri_str; - - // Default is http://localhost:15986/ when no uri file is specified - if (!args->uri_file) { - uri_str = JS_NewStringCopyZ(cx, "http://localhost:15986/"); - *uri_val = JS::StringValue(uri_str); - JS_SetReservedSlot(req, 0, *uri_val); - return true; - } - - // Else check to see if the base url is cached in a reserved slot - *uri_val = JS_GetReservedSlot(req, 0); - if (!(*uri_val).isUndefined()) { - return true; - } - - // Read the first line of the couch.uri file. - if(!((uri_fp = fopen(args->uri_file, "r")) && - (uri_str = couch_readline(cx, uri_fp)))) { - JS_ReportErrorUTF8(cx, "Failed to read couch.uri file."); - goto error; - } - - fclose(uri_fp); - *uri_val = JS::StringValue(uri_str); - JS_SetReservedSlot(req, 0, *uri_val); - return true; - -error: - if(uri_fp) fclose(uri_fp); - return false; -} - - -// Curl Helpers - -typedef struct { - HTTPData* http; - JSContext* cx; - JSObject* resp_headers; - char* sendbuf; - size_t sendlen; - size_t sent; - int sent_once; - char* recvbuf; - size_t recvlen; - size_t read; -} CurlState; - -/* - * I really hate doing this but this doesn't have to be - * uber awesome, it just has to work. - */ -CURL* HTTP_HANDLE = NULL; -char ERRBUF[CURL_ERROR_SIZE]; - -static size_t send_body(void *ptr, size_t size, size_t nmem, void *data); -static int seek_body(void *ptr, curl_off_t offset, int origin); -static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data); -static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data); - -static bool -go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen) -{ - CurlState state; - char* referer; - JSString* jsbody; - bool ret = false; - JS::Value tmp; - JS::RootedObject robj(cx, obj); - JS::RootedValue vobj(cx); - - - state.cx = cx; - state.http = http; - - state.sendbuf = body; - state.sendlen = bodylen; - state.sent = 0; - state.sent_once = 0; - - state.recvbuf = NULL; - state.recvlen = 0; - state.read = 0; - - if(HTTP_HANDLE == NULL) { - HTTP_HANDLE = curl_easy_init(); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_READFUNCTION, send_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKFUNCTION, - (curl_seek_callback) seek_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_HEADERFUNCTION, recv_header); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEFUNCTION, recv_body); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOPROGRESS, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_ERRORBUFFER, ERRBUF); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_COOKIEFILE, ""); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_USERAGENT, - "CouchHTTP Client - Relax"); - } - - if(!HTTP_HANDLE) { - JS_ReportErrorUTF8(cx, "Failed to initialize cURL handle."); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - - tmp = JS_GetReservedSlot(obj, 0); - - if(!(referer = enc_string(cx, tmp, NULL))) { - JS_ReportErrorUTF8(cx, "Failed to encode referer."); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer); - free(referer); - - if(http->method < 0 || http->method > OPTIONS) { - JS_ReportErrorUTF8(cx, "INTERNAL: Unknown method."); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - - curl_easy_setopt(HTTP_HANDLE, CURLOPT_CUSTOMREQUEST, METHODS[http->method]); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOBODY, 0); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_UPLOAD, 0); - - if(http->method == HEAD) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_NOBODY, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); - } else if(http->method == POST || http->method == PUT) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_UPLOAD, 1); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0); - } - - if(body && bodylen) { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, bodylen); - } else { - curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, 0); - } - - // curl_easy_setopt(HTTP_HANDLE, CURLOPT_VERBOSE, 1); - - curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_HTTPHEADER, http->req_headers); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_READDATA, &state); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKDATA, &state); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEHEADER, &state); - curl_easy_setopt(HTTP_HANDLE, CURLOPT_WRITEDATA, &state); - - if(curl_easy_perform(HTTP_HANDLE) != 0) { - JS_ReportErrorUTF8(cx, "Failed to execute HTTP request: %s", ERRBUF); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - - if(!state.resp_headers) { - JS_ReportErrorUTF8(cx, "Failed to recieve HTTP headers."); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - tmp = JS::ObjectValue(*state.resp_headers); - JS::RootedValue rtmp(cx, tmp); - - if(!JS_DefineProperty( - cx, robj, - "_headers", - rtmp, - JSPROP_READONLY - )) { - JS_ReportErrorUTF8(cx, "INTERNAL: Failed to set response headers."); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret;; - } - - if(state.recvbuf) { - state.recvbuf[state.read] = '\0'; - jsbody = dec_string(cx, state.recvbuf, state.read+1); - if(!jsbody) { - // If we can't decode the body as UTF-8 we forcefully - // convert it to a string by just forcing each byte - // to a char16_t. - jsbody = str_from_binary(cx, state.recvbuf, state.read); - if(!jsbody) { - if(!JS_IsExceptionPending(cx)) { - JS_ReportErrorUTF8(cx, "INTERNAL: Failed to decode body."); - } - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - } - tmp = JS::StringValue(jsbody); - } else { - tmp = JS_GetEmptyStringValue(cx); - } - - JS::RootedValue rtmp2(cx, tmp); - - if(!JS_DefineProperty( - cx, robj, - "responseText", - rtmp2, - JSPROP_READONLY - )) { - JS_ReportErrorUTF8(cx, "INTERNAL: Failed to set responseText."); - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; - } - - ret = true; - if(state.recvbuf) JS_free(cx, state.recvbuf); - return ret; -} - -static size_t -send_body(void *ptr, size_t size, size_t nmem, void *data) -{ - CurlState* state = (CurlState*) data; - size_t length = size * nmem; - size_t towrite = state->sendlen - state->sent; - - // Assume this is cURL trying to resend a request that - // failed. - if(towrite == 0 && state->sent_once == 0) { - state->sent_once = 1; - return 0; - } else if(towrite == 0) { - state->sent = 0; - state->sent_once = 0; - towrite = state->sendlen; - } - - if(length < towrite) towrite = length; - - memcpy(ptr, state->sendbuf + state->sent, towrite); - state->sent += towrite; - - return towrite; -} - -static int -seek_body(void* ptr, curl_off_t offset, int origin) -{ - CurlState* state = (CurlState*) ptr; - if(origin != SEEK_SET) return -1; - - state->sent = (size_t) offset; - return (int) state->sent; -} - -static size_t -recv_header(void *ptr, size_t size, size_t nmem, void *data) -{ - CurlState* state = (CurlState*) data; - char code[4]; - char* header = (char*) ptr; - size_t length = size * nmem; - JSString* hdr = NULL; - uint32_t hdrlen; - - if(length > 7 && strncasecmp(header, "HTTP/1.", 7) == 0) { - if(length < 12) { - return CURLE_WRITE_ERROR; - } - - memcpy(code, header+9, 3*sizeof(char)); - code[3] = '\0'; - state->http->last_status = atoi(code); - - state->resp_headers = JS_NewArrayObject(state->cx, 0); - if(!state->resp_headers) { - return CURLE_WRITE_ERROR; - } - - return length; - } - - // We get a notice at the \r\n\r\n after headers. - if(length <= 2) { - return length; - } - - // Append the new header to our array. - hdr = dec_string(state->cx, header, length); - if(!hdr) { - return CURLE_WRITE_ERROR; - } - - JS::RootedObject obj(state->cx, state->resp_headers); - if(!JS_GetArrayLength(state->cx, obj, &hdrlen)) { - return CURLE_WRITE_ERROR; - } - - JS::RootedString hdrval(state->cx, hdr); - if(!JS_SetElement(state->cx, obj, hdrlen, hdrval)) { - return CURLE_WRITE_ERROR; - } - - return length; -} - -static size_t -recv_body(void *ptr, size_t size, size_t nmem, void *data) -{ - CurlState* state = (CurlState*) data; - size_t length = size * nmem; - char* tmp = NULL; - - if(!state->recvbuf) { - state->recvlen = 4096; - state->read = 0; - state->recvbuf = (char *)JS_malloc(state->cx, state->recvlen); - } - - if(!state->recvbuf) { - return CURLE_WRITE_ERROR; - } - - // +1 so we can add '\0' back up in the go function. - size_t oldlen = state->recvlen; - while(length+1 > state->recvlen - state->read) state->recvlen *= 2; - tmp = (char *) JS_realloc(state->cx, state->recvbuf, oldlen, state->recvlen); - if(!tmp) return CURLE_WRITE_ERROR; - state->recvbuf = tmp; - - memcpy(state->recvbuf + state->read, ptr, length); - state->read += length; - return length; -} - -JSString* -str_from_binary(JSContext* cx, char* data, size_t length) -{ - char16_t* conv = (char16_t*) JS_malloc(cx, length * sizeof(char16_t)); - JSString* ret = NULL; - size_t i; - - if(!conv) return NULL; - - for(i = 0; i < length; i++) { - conv[i] = (char16_t) data[i]; - } - - ret = JS_NewUCString(cx, conv, length); - if(!ret) JS_free(cx, conv); - - return ret; -} - -#endif /* HAVE_CURL */ diff --git a/src/couch/priv/couch_js/60/http.h b/src/couch/priv/couch_js/60/http.h deleted file mode 100644 index 797b3c060..000000000 --- a/src/couch/priv/couch_js/60/http.h +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#ifndef COUCH_JS_HTTP_H -#define COUCH_JS_HTTP_H - -#include "util.h" - -void http_check_enabled(); -bool http_ctor(JSContext* cx, JSObject* req); -void http_dtor(JSFreeOp* fop, JSObject* req); -bool http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc); -bool http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val); -bool http_send(JSContext* cx, JSObject* req, JS::Value body); -int http_status(JSContext* cx, JSObject* req); -bool http_uri(JSContext* cx, JSObject *req, couch_args* args, JS::Value* uri); - -#endif diff --git a/src/couch/priv/couch_js/60/main.cpp b/src/couch/priv/couch_js/60/main.cpp index b6157ed85..5169b05d7 100644 --- a/src/couch/priv/couch_js/60/main.cpp +++ b/src/couch/priv/couch_js/60/main.cpp @@ -27,8 +27,6 @@ #include <js/Wrapper.h> #include "config.h" -#include "http.h" -#include "utf8.h" #include "util.h" static bool enableSharedMemory = true; @@ -54,129 +52,6 @@ static JSClass global_class = { &global_ops }; - -static void -req_dtor(JSFreeOp* fop, JSObject* obj) -{ - http_dtor(fop, obj); -} - -// With JSClass.construct. -static const JSClassOps clsOps = { - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - req_dtor, - nullptr, - nullptr, - nullptr -}; - -static const JSClass CouchHTTPClass = { - "CouchHTTP", /* name */ - JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(2), /* flags */ - &clsOps -}; - -static bool -req_ctor(JSContext* cx, unsigned int argc, JS::Value* vp) -{ - bool ret; - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - JSObject* obj = JS_NewObjectForConstructor(cx, &CouchHTTPClass, args); - if(!obj) { - JS_ReportErrorUTF8(cx, "Failed to create CouchHTTP instance"); - return false; - } - ret = http_ctor(cx, obj); - args.rval().setObject(*obj); - return ret; -} - -static bool -req_open(JSContext* cx, unsigned int argc, JS::Value* vp) -{ - JSObject* obj = JS_THIS_OBJECT(cx, vp); - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - bool ret = false; - - if(argc == 2) { - ret = http_open(cx, obj, args[0], args[1], JS::BooleanValue(false)); - } else if(argc == 3) { - ret = http_open(cx, obj, args[0], args[1], args[2]); - } else { - JS_ReportErrorUTF8(cx, "Invalid call to CouchHTTP.open"); - } - - args.rval().setUndefined(); - return ret; -} - - -static bool -req_set_hdr(JSContext* cx, unsigned int argc, JS::Value* vp) -{ - JSObject* obj = JS_THIS_OBJECT(cx, vp); - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - bool ret = false; - - if(argc == 2) { - ret = http_set_hdr(cx, obj, args[0], args[1]); - } else { - JS_ReportErrorUTF8(cx, "Invalid call to CouchHTTP.set_header"); - } - - args.rval().setUndefined(); - return ret; -} - - -static bool -req_send(JSContext* cx, unsigned int argc, JS::Value* vp) -{ - JSObject* obj = JS_THIS_OBJECT(cx, vp); - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - bool ret = false; - - if(argc == 1) { - ret = http_send(cx, obj, args[0]); - } else { - JS_ReportErrorUTF8(cx, "Invalid call to CouchHTTP.send"); - } - - args.rval().setUndefined(); - return ret; -} - -static bool -req_status(JSContext* cx, unsigned int argc, JS::Value* vp) -{ - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - JSObject* obj = JS_THIS_OBJECT(cx, vp); - int status = http_status(cx, obj); - - if(status < 0) - return false; - - args.rval().set(JS::Int32Value(status)); - return true; -} - -static bool -base_url(JSContext *cx, unsigned int argc, JS::Value* vp) -{ - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - JSObject* obj = JS_THIS_OBJECT(cx, vp); - couch_args *cargs = (couch_args*)JS_GetContextPrivate(cx); - JS::Value uri_val; - bool rc = http_uri(cx, obj, cargs, &uri_val); - args.rval().set(uri_val); - return rc; -} - static void SetStandardCompartmentOptions(JS::CompartmentOptions& options) { @@ -226,9 +101,15 @@ evalcx(JSContext *cx, unsigned int argc, JS::Value* vp) if (!sandbox) return false; } - JS_BeginRequest(cx); + JSAutoRequest ar(cx); + if (!sandbox) { + sandbox = NewSandbox(cx, false); + if (!sandbox) + return false; + } + js::AutoStableStringChars strChars(cx); if (!strChars.initTwoByte(cx, str)) return false; @@ -237,12 +118,6 @@ evalcx(JSContext *cx, unsigned int argc, JS::Value* vp) size_t srclen = chars.length(); const char16_t* src = chars.begin().get(); - if (!sandbox) { - sandbox = NewSandbox(cx, false); - if (!sandbox) - return false; - } - if(srclen == 0) { args.rval().setObject(*sandbox); } else { @@ -283,7 +158,19 @@ static bool print(JSContext* cx, unsigned int argc, JS::Value* vp) { JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - couch_print(cx, argc, args); + + bool use_stderr = false; + if(argc > 1 && args[1].isTrue()) { + use_stderr = true; + } + + if(!args[0].isString()) { + JS_ReportErrorUTF8(cx, "Unable to print non-string value."); + return false; + } + + couch_print(cx, args[0], use_stderr); + args.rval().setUndefined(); return true; } @@ -335,43 +222,6 @@ seal(JSContext* cx, unsigned int argc, JS::Value* vp) } -static bool -js_sleep(JSContext* cx, unsigned int argc, JS::Value* vp) -{ - JS::CallArgs args = JS::CallArgsFromVp(argc, vp); - - int duration = args[0].toInt32(); - -#ifdef XP_WIN - Sleep(duration); -#else - usleep(duration * 1000); -#endif - - return true; -} - -JSPropertySpec CouchHTTPProperties[] = { - JS_PSG("status", req_status, 0), - JS_PSG("base_url", base_url, 0), - JS_PS_END -}; - - -JSFunctionSpec CouchHTTPFunctions[] = { - JS_FN("_open", req_open, 3, 0), - JS_FN("_setRequestHeader", req_set_hdr, 2, 0), - JS_FN("_send", req_send, 1, 0), - JS_FS_END -}; - - -JSFunctionSpec TestSuiteFunctions[] = { - JS_FN("sleep", js_sleep, 1, 0), - JS_FS_END -}; - - static JSFunctionSpec global_functions[] = { JS_FN("evalcx", evalcx, 0, 0), JS_FN("gc", gc, 0, 0), @@ -386,7 +236,7 @@ static JSFunctionSpec global_functions[] = { static bool csp_allows(JSContext* cx) { - couch_args *args = (couch_args*)JS_GetContextPrivate(cx); + couch_args* args = static_cast<couch_args*>(JS_GetContextPrivate(cx)); if(args->eval) { return true; } else { @@ -405,7 +255,6 @@ int main(int argc, const char* argv[]) { JSContext* cx = NULL; - JSObject* klass = NULL; char* scriptsrc; size_t slen; int i; @@ -443,40 +292,24 @@ main(int argc, const char* argv[]) if(couch_load_funcs(cx, global, global_functions) != true) return 1; - if(args->use_http) { - http_check_enabled(); - - klass = JS_InitClass( - cx, global, - NULL, - &CouchHTTPClass, req_ctor, - 0, - CouchHTTPProperties, CouchHTTPFunctions, - NULL, NULL - ); - - if(!klass) - { - fprintf(stderr, "Failed to initialize CouchHTTP class.\n"); - exit(2); - } - } - - if(args->use_test_funs) { - if(couch_load_funcs(cx, global, TestSuiteFunctions) != true) - return 1; - } - for(i = 0 ; args->scripts[i] ; i++) { slen = couch_readfile(args->scripts[i], &scriptsrc); // Compile and run JS::CompileOptions options(cx); options.setFileAndLine(args->scripts[i], 1); + options.setUTF8(true); JS::RootedScript script(cx); if(!JS_CompileScript(cx, scriptsrc, slen, options, &script)) { - fprintf(stderr, "Failed to compile script.\n"); + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to compile script.\n"); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } return 1; } @@ -484,7 +317,14 @@ main(int argc, const char* argv[]) JS::RootedValue result(cx); if(JS_ExecuteScript(cx, script, &result) != true) { - fprintf(stderr, "Failed to execute script.\n"); + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to execute script.\n"); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } return 1; } diff --git a/src/couch/priv/couch_js/60/utf8.cpp b/src/couch/priv/couch_js/60/utf8.cpp deleted file mode 100644 index 38dfa6224..000000000 --- a/src/couch/priv/couch_js/60/utf8.cpp +++ /dev/null @@ -1,301 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#include <jsapi.h> -#include <js/Initialization.h> -#include <js/Conversions.h> -#include <js/Wrapper.h> -#include "config.h" -#include "util.h" - -static int -enc_char(uint8_t *utf8Buffer, uint32_t ucs4Char) -{ - int utf8Length = 1; - - if (ucs4Char < 0x80) - { - *utf8Buffer = (uint8_t)ucs4Char; - } - else - { - int i; - uint32_t a = ucs4Char >> 11; - utf8Length = 2; - while(a) - { - a >>= 5; - utf8Length++; - } - i = utf8Length; - while(--i) - { - utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80); - ucs4Char >>= 6; - } - *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char); - } - - return utf8Length; -} - -static bool -enc_charbuf(const char16_t* src, size_t srclen, char* dst, size_t* dstlenp) -{ - size_t i; - size_t utf8Len; - size_t dstlen = *dstlenp; - size_t origDstlen = dstlen; - char16_t c; - char16_t c2; - uint32_t v; - uint8_t utf8buf[6]; - - if(!dst) - { - dstlen = origDstlen = (size_t) -1; - } - - while(srclen) - { - c = *src++; - srclen--; - - if(c <= 0xD7FF || c >= 0xE000) - { - v = (uint32_t) c; - } - else if(c >= 0xD800 && c <= 0xDBFF) - { - if(srclen < 1) goto buffer_too_small; - c2 = *src++; - srclen--; - if(c2 >= 0xDC00 && c2 <= 0xDFFF) - { - v = (uint32_t) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000); - } - else - { - // Invalid second half of surrogate pair - v = (uint32_t) 0xFFFD; - // Undo our character advancement - src--; - srclen++; - } - } - else - { - // Invalid first half surrogate pair - v = (uint32_t) 0xFFFD; - } - - if(v < 0x0080) - { - /* no encoding necessary - performance hack */ - if(!dstlen) goto buffer_too_small; - if(dst) *dst++ = (char) v; - utf8Len = 1; - } - else - { - utf8Len = enc_char(utf8buf, v); - if(utf8Len > dstlen) goto buffer_too_small; - if(dst) - { - for (i = 0; i < utf8Len; i++) - { - *dst++ = (char) utf8buf[i]; - } - } - } - dstlen -= utf8Len; - } - - *dstlenp = (origDstlen - dstlen); - return true; - -buffer_too_small: - *dstlenp = (origDstlen - dstlen); - return false; -} - -char* -enc_string(JSContext* cx, JS::Value arg, size_t* buflen) -{ - JSString* str = NULL; - const char16_t* src = NULL; - char* bytes = NULL; - size_t srclen = 0; - size_t byteslen = 0; - js::AutoStableStringChars rawChars(cx); - - str = arg.toString(); - if(!str) goto error; - - if (!rawChars.initTwoByte(cx, str)) - return NULL; - - src = rawChars.twoByteRange().begin().get(); - srclen = JS_GetStringLength(str); - - if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error; - - bytes = (char *)JS_malloc(cx, (byteslen) + 1); - bytes[byteslen] = 0; - - if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error; - - if(buflen) *buflen = byteslen; - goto success; - -error: - if(bytes != NULL) JS_free(cx, bytes); - bytes = NULL; - -success: - return bytes; -} - -static uint32_t -dec_char(const uint8_t *utf8Buffer, int utf8Length) -{ - uint32_t ucs4Char; - uint32_t minucs4Char; - - /* from Unicode 3.1, non-shortest form is illegal */ - static const uint32_t minucs4Table[] = { - 0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000 - }; - - if (utf8Length == 1) - { - ucs4Char = *utf8Buffer; - } - else - { - ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1); - minucs4Char = minucs4Table[utf8Length-2]; - while(--utf8Length) - { - ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F); - } - if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF) - { - ucs4Char = 0xFFFD; - } - } - - return ucs4Char; -} - -static bool -dec_charbuf(const char *src, size_t srclen, char16_t *dst, size_t *dstlenp) -{ - uint32_t v; - size_t offset = 0; - size_t j; - size_t n; - size_t dstlen = *dstlenp; - size_t origDstlen = dstlen; - - if(!dst) dstlen = origDstlen = (size_t) -1; - - while(srclen) - { - v = (uint8_t) *src; - n = 1; - - if(v & 0x80) - { - while(v & (0x80 >> n)) - { - n++; - } - - if(n > srclen) goto buffer_too_small; - if(n == 1 || n > 6) goto bad_character; - - for(j = 1; j < n; j++) - { - if((src[j] & 0xC0) != 0x80) goto bad_character; - } - - v = dec_char((const uint8_t *) src, n); - if(v >= 0x10000) - { - v -= 0x10000; - - if(v > 0xFFFFF || dstlen < 2) - { - *dstlenp = (origDstlen - dstlen); - return false; - } - - if(dstlen < 2) goto buffer_too_small; - - if(dst) - { - *dst++ = (char16_t)((v >> 10) + 0xD800); - v = (char16_t)((v & 0x3FF) + 0xDC00); - } - dstlen--; - } - } - - if(!dstlen) goto buffer_too_small; - if(dst) *dst++ = (char16_t) v; - - dstlen--; - offset += n; - src += n; - srclen -= n; - } - - *dstlenp = (origDstlen - dstlen); - return true; - -bad_character: - *dstlenp = (origDstlen - dstlen); - return false; - -buffer_too_small: - *dstlenp = (origDstlen - dstlen); - return false; -} - -JSString* -dec_string(JSContext* cx, const char* bytes, size_t byteslen) -{ - JSString* str = NULL; - char16_t* chars = NULL; - size_t charslen; - - if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) goto error; - - chars = (char16_t *)JS_malloc(cx, (charslen + 1) * sizeof(char16_t)); - if(!chars) return NULL; - chars[charslen] = 0; - - if(!dec_charbuf(bytes, byteslen, chars, &charslen)) goto error; - - str = JS_NewUCString(cx, chars, charslen - 1); - if(!str) goto error; - - goto success; - -error: - if(chars != NULL) JS_free(cx, chars); - str = NULL; - -success: - return str; -} diff --git a/src/couch/priv/couch_js/60/utf8.h b/src/couch/priv/couch_js/60/utf8.h deleted file mode 100644 index c8b1f4d82..000000000 --- a/src/couch/priv/couch_js/60/utf8.h +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. - -#ifndef COUCH_JS_UTF_8_H -#define COUCH_JS_UTF_8_H - -char* enc_string(JSContext* cx, JS::Value arg, size_t* buflen); -JSString* dec_string(JSContext* cx, const char* buf, size_t buflen); - -#endif diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp index 92c6cbf4a..3bc58a921 100644 --- a/src/couch/priv/couch_js/60/util.cpp +++ b/src/couch/priv/couch_js/60/util.cpp @@ -13,53 +13,76 @@ #include <stdlib.h> #include <string.h> +#include <sstream> + #include <jsapi.h> #include <js/Initialization.h> +#include <js/CharacterEncoding.h> #include <js/Conversions.h> +#include <mozilla/Unused.h> #include "help.h" #include "util.h" -#include "utf8.h" std::string js_to_string(JSContext* cx, JS::HandleValue val) { + JS::AutoSaveExceptionState exc_state(cx); JS::RootedString sval(cx); sval = val.toString(); JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, sval)); if(!chars) { JS_ClearPendingException(cx); - fprintf(stderr, "Error converting value to string.\n"); - exit(3); + return std::string(); } return chars.get(); } -std::string -js_to_string(JSContext* cx, JSString *str) +bool +js_to_string(JSContext* cx, JS::HandleValue val, std::string& str) { - JS::UniqueChars chars(JS_EncodeString(cx, str)); - if(!chars) { - JS_ClearPendingException(cx); - fprintf(stderr, "Error converting to string.\n"); - exit(3); + if(!val.isString()) { + return false; } - return chars.get(); + if(JS_GetStringLength(val.toString()) == 0) { + str = ""; + return true; + } + + std::string conv = js_to_string(cx, val); + if(!conv.size()) { + return false; + } + + str = conv; + return true; } JSString* -string_to_js(JSContext* cx, const std::string& s) +string_to_js(JSContext* cx, const std::string& raw) { - JSString* ret = JS_NewStringCopyN(cx, s.c_str(), s.size()); - if(ret != nullptr) { - return ret; + JS::UTF8Chars utf8(raw.c_str(), raw.size()); + JS::UniqueTwoByteChars utf16; + size_t len; + + utf16.reset(JS::UTF8CharsToNewTwoByteCharsZ(cx, utf8, &len).get()); + if(!utf16) { + return nullptr; + } + + JSString* ret = JS_NewUCString(cx, utf16.get(), len); + + if(ret) { + // JS_NewUCString took ownership on success. We shift + // the resulting pointer into Unused to silence the + // compiler warning. + mozilla::Unused << utf16.release(); } - fprintf(stderr, "Unable to allocate string object.\n"); - exit(3); + return ret; } size_t @@ -84,21 +107,21 @@ couch_readfile(const char* file, char** outbuf_p) while((nread = fread(fbuf, 1, 16384, fp)) > 0) { if(buf == NULL) { - buf = (char*) malloc(nread + 1); + buf = new char[nread + 1]; if(buf == NULL) { fprintf(stderr, "Out of memory.\n"); exit(3); } memcpy(buf, fbuf, nread); } else { - tmp = (char*) malloc(buflen + nread + 1); + tmp = new char[buflen + nread + 1]; if(tmp == NULL) { fprintf(stderr, "Out of memory.\n"); exit(3); } memcpy(tmp, buf, buflen); memcpy(tmp+buflen, fbuf, nread); - free(buf); + delete buf; buf = tmp; } buflen += nread; @@ -114,12 +137,13 @@ couch_parse_args(int argc, const char* argv[]) couch_args* args; int i = 1; - args = (couch_args*) malloc(sizeof(couch_args)); + args = new couch_args(); if(args == NULL) return NULL; - memset(args, '\0', sizeof(couch_args)); + args->eval = 0; args->stack_size = 64L * 1024L * 1024L; + args->scripts = nullptr; while(i < argc) { if(strcmp("-h", argv[i]) == 0) { @@ -128,18 +152,12 @@ couch_parse_args(int argc, const char* argv[]) } else if(strcmp("-V", argv[i]) == 0) { DISPLAY_VERSION; exit(0); - } else if(strcmp("-H", argv[i]) == 0) { - args->use_http = 1; - } else if(strcmp("-T", argv[i]) == 0) { - args->use_test_funs = 1; } else if(strcmp("-S", argv[i]) == 0) { args->stack_size = atoi(argv[++i]); if(args->stack_size <= 0) { fprintf(stderr, "Invalid stack size.\n"); exit(2); } - } else if(strcmp("-u", argv[i]) == 0) { - args->uri_file = argv[++i]; } else if(strcmp("--eval", argv[i]) == 0) { args->eval = 1; } else if(strcmp("--", argv[i]) == 0) { @@ -193,7 +211,7 @@ couch_readline(JSContext* cx, FILE* fp) size_t oldbyteslen = 256; size_t readlen = 0; - bytes = (char *)JS_malloc(cx, byteslen); + bytes = static_cast<char*>(JS_malloc(cx, byteslen)); if(bytes == NULL) return NULL; while((readlen = couch_fgets(bytes+used, byteslen-used, fp)) > 0) { @@ -207,7 +225,7 @@ couch_readline(JSContext* cx, FILE* fp) // Double our buffer and read more. oldbyteslen = byteslen; byteslen *= 2; - tmp = (char *)JS_realloc(cx, bytes, oldbyteslen, byteslen); + tmp = static_cast<char*>(JS_realloc(cx, bytes, oldbyteslen, byteslen)); if(!tmp) { JS_free(cx, bytes); return NULL; @@ -222,8 +240,8 @@ couch_readline(JSContext* cx, FILE* fp) return JS_NewStringCopyZ(cx, nullptr); } - // Shring the buffer to the actual data size - tmp = (char *)JS_realloc(cx, bytes, byteslen, used); + // Shrink the buffer to the actual data size + tmp = static_cast<char*>(JS_realloc(cx, bytes, byteslen, used)); if(!tmp) { JS_free(cx, bytes); return NULL; @@ -238,22 +256,16 @@ couch_readline(JSContext* cx, FILE* fp) void -couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) +couch_print(JSContext* cx, JS::HandleValue obj, bool use_stderr) { - uint8_t* bytes = nullptr; - FILE *stream = stdout; + FILE* stream = stdout; - if (argc) { - if (argc > 1 && argv[1].isTrue()) { - stream = stderr; - } - JSString* str = JS::ToString(cx, argv.get(0)); - bytes = reinterpret_cast<uint8_t*>(JS_EncodeString(cx, str)); - fprintf(stream, "%s", bytes); - JS_free(cx, bytes); + if(use_stderr) { + stream = stderr; } - fputc('\n', stream); + std::string val = js_to_string(cx, obj); + fprintf(stream, "%s\n", val.c_str()); fflush(stream); } @@ -261,51 +273,63 @@ couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv) void couch_error(JSContext* cx, JSErrorReport* report) { - JS::RootedValue v(cx), stack(cx), replace(cx); - char* bytes; - JSObject* regexp; - - if(!report || !JSREPORT_IS_WARNING(report->flags)) - { - fprintf(stderr, "%s\n", report->message().c_str()); - - // Print a stack trace, if available. - if (JSREPORT_IS_EXCEPTION(report->flags) && - JS_GetPendingException(cx, &v)) - { - // Clear the exception before an JS method calls or the result is - // infinite, recursive error report generation. - JS_ClearPendingException(cx); - - // Use JS regexp to indent the stack trace. - // If the regexp can't be created, don't JS_ReportErrorUTF8 since it is - // probably not productive to wind up here again. - JS::RootedObject vobj(cx, v.toObjectOrNull()); - - if(JS_GetProperty(cx, vobj, "stack", &stack) && - (regexp = JS_NewRegExpObject( - cx, "^(?=.)", 6, JSREG_GLOB | JSREG_MULTILINE))) - { - // Set up the arguments to ``String.replace()`` - JS::AutoValueVector re_args(cx); - JS::RootedValue arg0(cx, JS::ObjectValue(*regexp)); - auto arg1 = JS::StringValue(string_to_js(cx, "\t")); - - if (re_args.append(arg0) && re_args.append(arg1)) { - // Perform the replacement - JS::RootedObject sobj(cx, stack.toObjectOrNull()); - if(JS_GetProperty(cx, sobj, "replace", &replace) && - JS_CallFunctionValue(cx, sobj, replace, re_args, &v)) - { - // Print the result - bytes = enc_string(cx, v, NULL); - fprintf(stderr, "Stacktrace:\n%s", bytes); - JS_free(cx, bytes); - } - } - } + if(!report) { + return; + } + + if(JSREPORT_IS_WARNING(report->flags)) { + return; + } + + std::ostringstream msg; + msg << "error: " << report->message().c_str(); + + mozilla::Maybe<JSAutoCompartment> ac; + JS::RootedValue exc(cx); + JS::RootedObject exc_obj(cx); + JS::RootedObject stack_obj(cx); + JS::RootedString stack_str(cx); + JS::RootedValue stack_val(cx); + + if(!JS_GetPendingException(cx, &exc)) { + goto done; + } + + // Clear the exception before an JS method calls or the result is + // infinite, recursive error report generation. + JS_ClearPendingException(cx); + + exc_obj.set(exc.toObjectOrNull()); + stack_obj.set(JS::ExceptionStackOrNull(exc_obj)); + + if(!stack_obj) { + // Compilation errors don't have a stack + + msg << " at "; + + if(report->filename) { + msg << report->filename; + } else { + msg << "<unknown>"; + } + + if(report->lineno) { + msg << ':' << report->lineno << ':' << report->column; } + + goto done; + } + + if(!JS::BuildStackString(cx, stack_obj, &stack_str, 2)) { + goto done; } + + stack_val.set(JS::StringValue(stack_str)); + msg << std::endl << std::endl << js_to_string(cx, stack_val).c_str(); + +done: + msg << std::endl; + fprintf(stderr, "%s", msg.str().c_str()); } diff --git a/src/couch/priv/couch_js/60/util.h b/src/couch/priv/couch_js/60/util.h index 407e3e602..35882a614 100644 --- a/src/couch/priv/couch_js/60/util.h +++ b/src/couch/priv/couch_js/60/util.h @@ -17,23 +17,19 @@ typedef struct { int eval; - int use_http; - int use_test_funs; int stack_size; const char** scripts; - const char* uri_file; - JSString* uri; } couch_args; std::string js_to_string(JSContext* cx, JS::HandleValue val); -std::string js_to_string(JSContext* cx, JSString *str); +bool js_to_string(JSContext* cx, JS::HandleValue val, std::string& str); JSString* string_to_js(JSContext* cx, const std::string& s); couch_args* couch_parse_args(int argc, const char* argv[]); int couch_fgets(char* buf, int size, FILE* fp); JSString* couch_readline(JSContext* cx, FILE* fp); size_t couch_readfile(const char* file, char** outbuf_p); -void couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv); +void couch_print(JSContext* cx, JS::HandleValue str, bool use_stderr); void couch_error(JSContext* cx, JSErrorReport* report); void couch_oom(JSContext* cx, void* data); bool couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs); diff --git a/src/couch/priv/couch_js/68/help.h b/src/couch/priv/couch_js/68/help.h new file mode 100644 index 000000000..7c7550cc2 --- /dev/null +++ b/src/couch/priv/couch_js/68/help.h @@ -0,0 +1,79 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef COUCHJS_HELP_H +#define COUCHJS_HELP_H + +#include "config.h" + +static const char VERSION_TEMPLATE[] = + "%s - %s (SpiderMonkey 68)\n" + "\n" + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may " + "not use\n" + "this file except in compliance with the License. You may obtain a copy of" + "the\n" + "License at\n" + "\n" + " http://www.apache.org/licenses/LICENSE-2.0\n" + "\n" + "Unless required by applicable law or agreed to in writing, software " + "distributed\n" + "under the License is distributed on an \"AS IS\" BASIS, WITHOUT " + "WARRANTIES OR\n" + "CONDITIONS OF ANY KIND, either express or implied. See the License " + "for the\n" + "specific language governing permissions and limitations under the " + "License.\n"; + +static const char USAGE_TEMPLATE[] = + "Usage: %s [FILE]\n" + "\n" + "The %s command runs the %s JavaScript interpreter.\n" + "\n" + "The exit status is 0 for success or 1 for failure.\n" + "\n" + "Options:\n" + "\n" + " -h display a short help message and exit\n" + " -V display version information and exit\n" + " -S SIZE specify that the runtime should allow at\n" + " most SIZE bytes of memory to be allocated\n" + " default is 64 MiB\n" + " --eval Enable runtime code evaluation (dangerous!)\n" + "\n" + "Report bugs at <%s>.\n"; + +#define BASENAME COUCHJS_NAME + +#define couch_version(basename) \ + fprintf( \ + stdout, \ + VERSION_TEMPLATE, \ + basename, \ + PACKAGE_STRING) + +#define DISPLAY_VERSION couch_version(BASENAME) + + +#define couch_usage(basename) \ + fprintf( \ + stdout, \ + USAGE_TEMPLATE, \ + basename, \ + basename, \ + PACKAGE_NAME, \ + PACKAGE_BUGREPORT) + +#define DISPLAY_USAGE couch_usage(BASENAME) + +#endif // Included help.h diff --git a/src/couch/priv/couch_js/68/main.cpp b/src/couch/priv/couch_js/68/main.cpp new file mode 100644 index 000000000..bb62d16ca --- /dev/null +++ b/src/couch/priv/couch_js/68/main.cpp @@ -0,0 +1,337 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#ifdef XP_WIN +#define NOMINMAX +#include <windows.h> +#else +#include <unistd.h> +#endif + +#include <jsapi.h> +#include <js/CompilationAndEvaluation.h> +#include <js/Conversions.h> +#include <js/Initialization.h> +#include <js/SourceText.h> +#include <js/Warnings.h> +#include <js/Wrapper.h> + +#include "config.h" +#include "util.h" + +static bool enableSharedMemory = true; + +static JSClassOps global_ops = { + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + JS_GlobalObjectTraceHook +}; + +/* The class of the global object. */ +static JSClass global_class = { + "global", + JSCLASS_GLOBAL_FLAGS, + &global_ops +}; + +static JSObject* +NewSandbox(JSContext* cx, bool lazy) +{ + JS::RealmOptions options; + options.creationOptions().setSharedMemoryAndAtomicsEnabled(enableSharedMemory); + options.creationOptions().setNewCompartmentAndZone(); + JS::RootedObject obj(cx, JS_NewGlobalObject(cx, &global_class, nullptr, + JS::DontFireOnNewGlobalHook, options)); + if (!obj) + return nullptr; + + { + JSAutoRealm ac(cx, obj); + if (!lazy && !JS::InitRealmStandardClasses(cx)) + return nullptr; + + JS::RootedValue value(cx, JS::BooleanValue(lazy)); + if (!JS_DefineProperty(cx, obj, "lazy", value, JSPROP_PERMANENT | JSPROP_READONLY)) + return nullptr; + + JS_FireOnNewGlobalObject(cx, obj); + } + + if (!JS_WrapObject(cx, &obj)) + return nullptr; + return obj; +} + +static bool +evalcx(JSContext *cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + bool ret = false; + + JS::RootedString str(cx, args[0].toString()); + if (!str) + return false; + + JS::RootedObject sandbox(cx); + if (args.hasDefined(1)) { + sandbox = JS::ToObject(cx, args[1]); + if (!sandbox) + return false; + } + + if (!sandbox) { + sandbox = NewSandbox(cx, false); + if (!sandbox) + return false; + } + + JS::AutoStableStringChars strChars(cx); + if (!strChars.initTwoByte(cx, str)) + return false; + + mozilla::Range<const char16_t> chars = strChars.twoByteRange(); + JS::SourceText<char16_t> srcBuf; + if (!srcBuf.init(cx, chars.begin().get(), chars.length(), + JS::SourceOwnership::Borrowed)) { + return false; + } + + if(srcBuf.length() == 0) { + args.rval().setObject(*sandbox); + } else { + mozilla::Maybe<JSAutoRealm> ar; + unsigned flags; + JSObject* unwrapped = UncheckedUnwrap(sandbox, true, &flags); + if (flags & js::Wrapper::CROSS_COMPARTMENT) { + sandbox = unwrapped; + ar.emplace(cx, sandbox); + } + + JS::CompileOptions opts(cx); + JS::RootedValue rval(cx); + opts.setFileAndLine("<unknown>", 1); + + if (!JS::Evaluate(cx, opts, srcBuf, args.rval())) { + return false; + } + } + ret = true; + if (!JS_WrapValue(cx, args.rval())) + return false; + + return ret; +} + + +static bool +gc(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS_GC(cx); + args.rval().setUndefined(); + return true; +} + + +static bool +print(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + + bool use_stderr = false; + if(argc > 1 && args[1].isTrue()) { + use_stderr = true; + } + + if(!args[0].isString()) { + JS_ReportErrorUTF8(cx, "Unable to print non-string value."); + return false; + } + + couch_print(cx, args[0], use_stderr); + + args.rval().setUndefined(); + return true; +} + + +static bool +quit(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + + int exit_code = args[0].toInt32();; + exit(exit_code); +} + + +static bool +readline(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JSString* line; + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + + /* GC Occasionally */ + JS_MaybeGC(cx); + + line = couch_readline(cx, stdin); + if(line == NULL) return false; + + // return with JSString* instead of JSValue in the past + args.rval().setString(line); + return true; +} + + +static bool +seal(JSContext* cx, unsigned int argc, JS::Value* vp) +{ + JS::CallArgs args = JS::CallArgsFromVp(argc, vp); + JS::RootedObject target(cx); + target = JS::ToObject(cx, args[0]); + if (!target) { + args.rval().setUndefined(); + return true; + } + bool deep = false; + deep = args[1].toBoolean(); + bool ret = deep ? JS_DeepFreezeObject(cx, target) : JS_FreezeObject(cx, target); + args.rval().setUndefined(); + return ret; +} + + +static JSFunctionSpec global_functions[] = { + JS_FN("evalcx", evalcx, 0, 0), + JS_FN("gc", gc, 0, 0), + JS_FN("print", print, 0, 0), + JS_FN("quit", quit, 0, 0), + JS_FN("readline", readline, 0, 0), + JS_FN("seal", seal, 0, 0), + JS_FS_END +}; + + +static bool +csp_allows(JSContext* cx, JS::HandleValue code) +{ + couch_args* args = static_cast<couch_args*>(JS_GetContextPrivate(cx)); + if(args->eval) { + return true; + } else { + return false; + } +} + + +static JSSecurityCallbacks security_callbacks = { + csp_allows, + nullptr +}; + + +int +main(int argc, const char* argv[]) +{ + JSContext* cx = NULL; + int i; + + couch_args* args = couch_parse_args(argc, argv); + + JS_Init(); + cx = JS_NewContext(args->stack_size, 8L * 1024L); + if(cx == NULL) + return 1; + + JS_SetGlobalJitCompilerOption(cx, JSJITCOMPILER_BASELINE_ENABLE, 0); + JS_SetGlobalJitCompilerOption(cx, JSJITCOMPILER_ION_ENABLE, 0); + + if (!JS::InitSelfHostedCode(cx)) + return 1; + + JS::SetWarningReporter(cx, couch_error); + JS::SetOutOfMemoryCallback(cx, couch_oom, NULL); + JS_SetContextPrivate(cx, args); + JS_SetSecurityCallbacks(cx, &security_callbacks); + + JS::RealmOptions options; + JS::RootedObject global(cx, JS_NewGlobalObject(cx, &global_class, nullptr, + JS::FireOnNewGlobalHook, options)); + if (!global) + return 1; + + JSAutoRealm ar(cx, global); + + if(!JS::InitRealmStandardClasses(cx)) + return 1; + + if(couch_load_funcs(cx, global, global_functions) != true) + return 1; + + for(i = 0 ; args->scripts[i] ; i++) { + const char* filename = args->scripts[i]; + + // Compile and run + JS::CompileOptions options(cx); + options.setFileAndLine(filename, 1); + JS::RootedScript script(cx); + FILE* fp; + + fp = fopen(args->scripts[i], "r"); + if(fp == NULL) { + fprintf(stderr, "Failed to read file: %s\n", filename); + return 3; + } + script = JS::CompileUtf8File(cx, options, fp); + fclose(fp); + if (!script) { + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to compile file: %s\n", filename); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } + return 1; + } + + JS::RootedValue result(cx); + if(JS_ExecuteScript(cx, script, &result) != true) { + JS::RootedValue exc(cx); + if(!JS_GetPendingException(cx, &exc)) { + fprintf(stderr, "Failed to execute script.\n"); + } else { + JS::RootedObject exc_obj(cx, &exc.toObject()); + JSErrorReport* report = JS_ErrorFromException(cx, exc_obj); + couch_error(cx, report); + } + } + + // Give the GC a chance to run. + JS_MaybeGC(cx); + } + + return 0; +} diff --git a/src/couch/priv/couch_js/68/util.cpp b/src/couch/priv/couch_js/68/util.cpp new file mode 100644 index 000000000..6e6105df5 --- /dev/null +++ b/src/couch/priv/couch_js/68/util.cpp @@ -0,0 +1,348 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#include <stdlib.h> +#include <string.h> + +#include <sstream> + +#include <jsapi.h> +#include <jsfriendapi.h> +#include <js/CharacterEncoding.h> +#include <js/Conversions.h> +#include <js/Initialization.h> +#include <js/MemoryFunctions.h> +#include <js/RegExp.h> + +#include "help.h" +#include "util.h" + +std::string +js_to_string(JSContext* cx, JS::HandleValue val) +{ + JS::AutoSaveExceptionState exc_state(cx); + JS::RootedString sval(cx); + sval = val.toString(); + + JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, sval)); + if(!chars) { + JS_ClearPendingException(cx); + return std::string(); + } + + return chars.get(); +} + +bool +js_to_string(JSContext* cx, JS::HandleValue val, std::string& str) +{ + if(!val.isString()) { + return false; + } + + if(JS_GetStringLength(val.toString()) == 0) { + str = ""; + return true; + } + + std::string conv = js_to_string(cx, val); + if(!conv.size()) { + return false; + } + + str = conv; + return true; +} + +JSString* +string_to_js(JSContext* cx, const std::string& raw) +{ + JS::UTF8Chars utf8(raw.c_str(), raw.size()); + JS::UniqueTwoByteChars utf16; + size_t len; + + utf16.reset(JS::UTF8CharsToNewTwoByteCharsZ(cx, utf8, &len, js::MallocArena).get()); + if(!utf16) { + return nullptr; + } + + return JS_NewUCString(cx, std::move(utf16), len); +} + +size_t +couch_readfile(const char* file, char** outbuf_p) +{ + FILE* fp; + char fbuf[16384]; + char *buf = NULL; + char* tmp; + size_t nread = 0; + size_t buflen = 0; + + if(strcmp(file, "-") == 0) { + fp = stdin; + } else { + fp = fopen(file, "r"); + if(fp == NULL) { + fprintf(stderr, "Failed to read file: %s\n", file); + exit(3); + } + } + + while((nread = fread(fbuf, 1, 16384, fp)) > 0) { + if(buf == NULL) { + buf = new char[nread + 1]; + if(buf == NULL) { + fprintf(stderr, "Out of memory.\n"); + exit(3); + } + memcpy(buf, fbuf, nread); + } else { + tmp = new char[buflen + nread + 1]; + if(tmp == NULL) { + fprintf(stderr, "Out of memory.\n"); + exit(3); + } + memcpy(tmp, buf, buflen); + memcpy(tmp+buflen, fbuf, nread); + delete buf; + buf = tmp; + } + buflen += nread; + buf[buflen] = '\0'; + } + *outbuf_p = buf; + return buflen ; +} + +couch_args* +couch_parse_args(int argc, const char* argv[]) +{ + couch_args* args; + int i = 1; + + args = new couch_args(); + if(args == NULL) + return NULL; + + args->eval = 0; + args->stack_size = 64L * 1024L * 1024L; + args->scripts = nullptr; + + while(i < argc) { + if(strcmp("-h", argv[i]) == 0) { + DISPLAY_USAGE; + exit(0); + } else if(strcmp("-V", argv[i]) == 0) { + DISPLAY_VERSION; + exit(0); + } else if(strcmp("-S", argv[i]) == 0) { + args->stack_size = atoi(argv[++i]); + if(args->stack_size <= 0) { + fprintf(stderr, "Invalid stack size.\n"); + exit(2); + } + } else if(strcmp("--eval", argv[i]) == 0) { + args->eval = 1; + } else if(strcmp("--", argv[i]) == 0) { + i++; + break; + } else { + break; + } + i++; + } + + if(i >= argc) { + DISPLAY_USAGE; + exit(3); + } + args->scripts = argv + i; + + return args; +} + + +int +couch_fgets(char* buf, int size, FILE* fp) +{ + int n, i, c; + + if(size <= 0) return -1; + n = size - 1; + + for(i = 0; i < n && (c = getc(fp)) != EOF; i++) { + buf[i] = c; + if(c == '\n') { + i++; + break; + } + } + + buf[i] = '\0'; + return i; +} + + +JSString* +couch_readline(JSContext* cx, FILE* fp) +{ + JSString* str; + char* bytes = NULL; + char* tmp = NULL; + size_t used = 0; + size_t byteslen = 256; + size_t oldbyteslen = 256; + size_t readlen = 0; + + bytes = static_cast<char*>(JS_malloc(cx, byteslen)); + if(bytes == NULL) return NULL; + + while((readlen = couch_fgets(bytes+used, byteslen-used, fp)) > 0) { + used += readlen; + + if(bytes[used-1] == '\n') { + bytes[used-1] = '\0'; + break; + } + + // Double our buffer and read more. + oldbyteslen = byteslen; + byteslen *= 2; + tmp = static_cast<char*>(JS_realloc(cx, bytes, oldbyteslen, byteslen)); + if(!tmp) { + JS_free(cx, bytes); + return NULL; + } + + bytes = tmp; + } + + // Treat empty strings specially + if(used == 0) { + JS_free(cx, bytes); + return JS_NewStringCopyZ(cx, nullptr); + } + + // Shrink the buffer to the actual data size + tmp = static_cast<char*>(JS_realloc(cx, bytes, byteslen, used)); + if(!tmp) { + JS_free(cx, bytes); + return NULL; + } + bytes = tmp; + byteslen = used; + + str = string_to_js(cx, std::string(tmp)); + JS_free(cx, bytes); + return str; +} + + +void +couch_print(JSContext* cx, JS::HandleValue obj, bool use_stderr) +{ + FILE *stream = stdout; + + if (use_stderr) { + stream = stderr; + } + std::string val = js_to_string(cx, obj); + fprintf(stream, "%s\n", val.c_str()); + fflush(stream); +} + + +void +couch_error(JSContext* cx, JSErrorReport* report) +{ + if(!report) { + return; + } + + if(JSREPORT_IS_WARNING(report->flags)) { + return; + } + + std::ostringstream msg; + msg << "error: " << report->message().c_str(); + + mozilla::Maybe<JSAutoRealm> ar; + JS::RootedValue exc(cx); + JS::RootedObject exc_obj(cx); + JS::RootedObject stack_obj(cx); + JS::RootedString stack_str(cx); + JS::RootedValue stack_val(cx); + JSPrincipals* principals = GetRealmPrincipals(js::GetContextRealm(cx)); + + if(!JS_GetPendingException(cx, &exc)) { + goto done; + } + + // Clear the exception before an JS method calls or the result is + // infinite, recursive error report generation. + JS_ClearPendingException(cx); + + exc_obj.set(exc.toObjectOrNull()); + stack_obj.set(JS::ExceptionStackOrNull(exc_obj)); + + if(!stack_obj) { + // Compilation errors don't have a stack + + msg << " at "; + + if(report->filename) { + msg << report->filename; + } else { + msg << "<unknown>"; + } + + if(report->lineno) { + msg << ':' << report->lineno << ':' << report->column; + } + + goto done; + } + + if(!JS::BuildStackString(cx, principals, stack_obj, &stack_str, 2)) { + goto done; + } + + stack_val.set(JS::StringValue(stack_str)); + msg << std::endl << std::endl << js_to_string(cx, stack_val).c_str(); + +done: + msg << std::endl; + fprintf(stderr, "%s", msg.str().c_str()); +} + + +void +couch_oom(JSContext* cx, void* data) +{ + fprintf(stderr, "out of memory\n"); + exit(1); +} + + +bool +couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs) +{ + JSFunctionSpec* f; + for(f = funcs; f->name; f++) { + if(!JS_DefineFunction(cx, obj, f->name.string(), f->call.op, f->nargs, f->flags)) { + fprintf(stderr, "Failed to create function: %s\n", f->name.string()); + return false; + } + } + return true; +} diff --git a/src/couch/priv/couch_js/68/util.h b/src/couch/priv/couch_js/68/util.h new file mode 100644 index 000000000..bd7843eb9 --- /dev/null +++ b/src/couch/priv/couch_js/68/util.h @@ -0,0 +1,41 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy of +// the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations under +// the License. + +#ifndef COUCHJS_UTIL_H +#define COUCHJS_UTIL_H + +#include <jsapi.h> + +typedef struct { + int eval; + int use_http; + int use_test_funs; + int stack_size; + const char** scripts; + const char* uri_file; + JSString* uri; +} couch_args; + +std::string js_to_string(JSContext* cx, JS::HandleValue val); +bool js_to_string(JSContext* cx, JS::HandleValue val, std::string& str); +JSString* string_to_js(JSContext* cx, const std::string& s); + +couch_args* couch_parse_args(int argc, const char* argv[]); +int couch_fgets(char* buf, int size, FILE* fp); +JSString* couch_readline(JSContext* cx, FILE* fp); +size_t couch_readfile(const char* file, char** outbuf_p); +void couch_print(JSContext* cx, JS::HandleValue str, bool use_stderr); +void couch_error(JSContext* cx, JSErrorReport* report); +void couch_oom(JSContext* cx, void* data); +bool couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs); + +#endif // Included util.h diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c index 4d9bb982d..ffccf2e9d 100644 --- a/src/couch/priv/icu_driver/couch_icu_driver.c +++ b/src/couch/priv/icu_driver/couch_icu_driver.c @@ -30,6 +30,8 @@ specific language governing permissions and limitations under the License. #include <string.h> /* for memcpy */ #endif +#define BUFFER_SIZE 1024 + typedef struct { ErlDrvPort port; @@ -54,6 +56,8 @@ static ErlDrvData couch_drv_start(ErlDrvPort port, char *buff) UErrorCode status = U_ZERO_ERROR; couch_drv_data* pData = (couch_drv_data*)driver_alloc(sizeof(couch_drv_data)); + set_port_control_flags(port, PORT_CONTROL_FLAG_BINARY); + if (pData == NULL) return ERL_DRV_ERROR_GENERAL; @@ -84,14 +88,17 @@ ErlDrvSSizeT return_control_result(void* pLocalResult, int localLen, char **ppRetBuf, ErlDrvSizeT returnLen) { + ErlDrvBinary* buf = NULL; + if (*ppRetBuf == NULL || localLen > returnLen) { - *ppRetBuf = (char*)driver_alloc_binary(localLen); - if(*ppRetBuf == NULL) { - return -1; - } + buf = driver_alloc_binary(localLen); + memcpy(buf->orig_bytes, pLocalResult, localLen); + *ppRetBuf = (char*) buf; + return localLen; + } else { + memcpy(*ppRetBuf, pLocalResult, localLen); + return localLen; } - memcpy(*ppRetBuf, pLocalResult, localLen); - return localLen; } static ErlDrvSSizeT @@ -147,6 +154,61 @@ couch_drv_control(ErlDrvData drv_data, unsigned int command, return return_control_result(&response, sizeof(response), rbuf, rlen); } + case 2: /* GET_SORT_KEY: */ + { + + UChar source[BUFFER_SIZE]; + UChar* sourcePtr = source; + int32_t sourceLen = BUFFER_SIZE; + + uint8_t sortKey[BUFFER_SIZE]; + uint8_t* sortKeyPtr = sortKey; + int32_t sortKeyLen = BUFFER_SIZE; + + int32_t inputLen; + + UErrorCode status = U_ZERO_ERROR; + ErlDrvSSizeT res; + + /* first 32bits are the length */ + memcpy(&inputLen, pBuf, sizeof(inputLen)); + pBuf += sizeof(inputLen); + + u_strFromUTF8(sourcePtr, BUFFER_SIZE, &sourceLen, pBuf, inputLen, &status); + + if (sourceLen >= BUFFER_SIZE) { + /* reset status or next u_strFromUTF8 call will auto-fail */ + status = U_ZERO_ERROR; + sourcePtr = (UChar*) malloc(sourceLen * sizeof(UChar)); + u_strFromUTF8(sourcePtr, sourceLen, NULL, pBuf, inputLen, &status); + if (U_FAILURE(status)) { + rbuf = NULL; + return 0; + } + } else if (U_FAILURE(status)) { + rbuf = NULL; + return 0; + } + + sortKeyLen = ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, BUFFER_SIZE); + + if (sortKeyLen > BUFFER_SIZE) { + sortKeyPtr = (uint8_t*) malloc(sortKeyLen); + ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, sortKeyLen); + } + + res = return_control_result(sortKeyPtr, sortKeyLen, rbuf, rlen); + + if (sourcePtr != source) { + free(sourcePtr); + } + + if (sortKeyPtr != sortKey) { + free(sortKeyPtr); + } + + return res; + } default: return -1; diff --git a/src/couch/priv/stats_descriptions.cfg b/src/couch/priv/stats_descriptions.cfg index 7c8fd94cb..fd6468ffa 100644 --- a/src/couch/priv/stats_descriptions.cfg +++ b/src/couch/priv/stats_descriptions.cfg @@ -230,6 +230,10 @@ {type, counter}, {desc, <<"number of HTTP 409 Conflict responses">>} ]}. +{[couchdb, httpd_status_codes, 410], [ + {type, counter}, + {desc, <<"number of HTTP 410 Gone responses">>} +]}. {[couchdb, httpd_status_codes, 412], [ {type, counter}, {desc, <<"number of HTTP 412 Precondition Failed responses">>} diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script index 91e24d99e..59bd40fbb 100644 --- a/src/couch/rebar.config.script +++ b/src/couch/rebar.config.script @@ -22,7 +22,7 @@ CopyIfDifferent = fun(Path, Contents) -> false -> file:write_file(Path, Contents) end -end, +end. CouchJSName = case os:type() of @@ -30,21 +30,21 @@ CouchJSName = case os:type() of "couchjs.exe"; _ -> "couchjs" -end, -CouchJSPath = filename:join(["priv", CouchJSName]), +end. +CouchJSPath = filename:join(["priv", CouchJSName]). Version = case os:getenv("COUCHDB_VERSION") of false -> string:strip(os:cmd("git describe --always"), right, $\n); Version0 -> string:strip(Version0, right) -end, +end. GitSha = case os:getenv("COUCHDB_GIT_SHA") of false -> - ""; % release builds won’t get a fallback + ""; % release builds won't get a fallback GitSha0 -> string:strip(GitSha0, right) -end, +end. CouchConfig = case filelib:is_file(os:getenv("COUCHDB_CONFIG")) of true -> @@ -59,6 +59,8 @@ SMVsn = case lists:keyfind(spidermonkey_version, 1, CouchConfig) of "1.8.5"; {_, "60"} -> "60"; + {_, "68"} -> + "68"; undefined -> "1.8.5"; {_, Unsupported} -> @@ -74,28 +76,28 @@ ConfigH = [ {"JSSCRIPT_TYPE", "JSObject*"}, {"COUCHJS_NAME", "\"" ++ CouchJSName++ "\""}, {"PACKAGE", "\"apache-couchdb\""}, - {"PACKAGE_BUGREPORT", "\"https://issues.apache.org/jira/browse/COUCHDB\""}, + {"PACKAGE_BUGREPORT", "\"https://github.com/apache/couchdb/issues\""}, {"PACKAGE_NAME", "\"Apache CouchDB\""}, {"PACKAGE_STRING", "\"Apache CouchDB " ++ Version ++ "\""}, {"PACKAGE_VERSION", "\"" ++ Version ++ "\""} -], +]. -CouchJSConfig = "priv/couch_js/" ++ SMVsn ++ "/config.h", -ConfigSrc = [["#define ", K, " ", V, $\n] || {K, V} <- ConfigH], -ConfigBin = iolist_to_binary(ConfigSrc), -ok = CopyIfDifferent(CouchJSConfig, ConfigBin), +CouchJSConfig = "priv/couch_js/" ++ SMVsn ++ "/config.h". +ConfigSrc = [["#define ", K, " ", V, $\n] || {K, V} <- ConfigH]. +ConfigBin = iolist_to_binary(ConfigSrc). +ok = CopyIfDifferent(CouchJSConfig, ConfigBin). MD5Config = case lists:keyfind(erlang_md5, 1, CouchConfig) of {erlang_md5, true} -> [{d, 'ERLANG_MD5', true}]; _ -> [] -end, +end. ProperConfig = case code:lib_dir(proper) of {error, bad_name} -> []; _ -> [{d, 'WITH_PROPER'}] -end, +end. {JS_CFLAGS, JS_LDFLAGS} = case os:type() of {win32, _} when SMVsn == "1.8.5" -> @@ -120,65 +122,48 @@ end, }; {unix, _} when SMVsn == "60" -> { - "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14", - "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm" + "-DXP_UNIX -I/usr/include/mozjs-60 -I/usr/local/include/mozjs-60 -std=c++14 -Wno-invalid-offsetof", + "-L/usr/local/lib -std=c++14 -lmozjs-60 -lm -lstdc++" + }; + {unix, _} when SMVsn == "68" -> + { + "-DXP_UNIX -I/usr/include/mozjs-68 -I/usr/local/include/mozjs-68 -std=c++14 -Wno-invalid-offsetof", + "-L/usr/local/lib -std=c++14 -lmozjs-68 -lm -lstdc++" } end. -{CURL_CFLAGS, CURL_LDFLAGS} = case lists:keyfind(with_curl, 1, CouchConfig) of - {with_curl, true} -> - case os:type() of - {win32, _} -> - { - "/DHAVE_CURL", - "/DHAVE_CURL libcurl.lib" - }; - {unix, freebsd} -> - { - "-DHAVE_CURL -I/usr/local/include", - "-DHAVE_CURL -lcurl" - }; - _ -> - { - "-DHAVE_CURL", - "-DHAVE_CURL -lcurl" - } - end; - _ -> - {"", ""} -end, - CouchJSSrc = case SMVsn of "1.8.5" -> ["priv/couch_js/1.8.5/*.c"]; - "60" -> ["priv/couch_js/60/*.cpp"] + "60" -> ["priv/couch_js/60/*.cpp"]; + "68" -> ["priv/couch_js/68/*.cpp"] end. CouchJSEnv = case SMVsn of "1.8.5" -> [ - {"CFLAGS", JS_CFLAGS ++ " " ++ CURL_CFLAGS}, - {"LDFLAGS", JS_LDFLAGS ++ " " ++ CURL_LDFLAGS} + {"CFLAGS", JS_CFLAGS}, + {"LDFLAGS", JS_LDFLAGS} ]; - "60" -> + _ -> [ - {"CXXFLAGS", JS_CFLAGS ++ " " ++ CURL_CFLAGS}, - {"LDFLAGS", JS_LDFLAGS ++ " " ++ CURL_LDFLAGS} + {"CXXFLAGS", JS_CFLAGS}, + {"LDFLAGS", JS_LDFLAGS} ] -end, +end. -IcuPath = "priv/couch_icu_driver.so", -IcuSrc = ["priv/icu_driver/*.c"], +IcuPath = "priv/couch_icu_driver.so". +IcuSrc = ["priv/icu_driver/*.c"]. IcuEnv = [{"DRV_CFLAGS", "$DRV_CFLAGS -DPIC -O2 -fno-common"}, - {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}], + {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}]. IcuDarwinEnv = [{"CFLAGS", "-DXP_UNIX -I/usr/local/opt/icu4c/include"}, - {"LDFLAGS", "-L/usr/local/opt/icu4c/lib"}], + {"LDFLAGS", "-L/usr/local/opt/icu4c/lib"}]. IcuBsdEnv = [{"CFLAGS", "-DXP_UNIX -I/usr/local/include"}, - {"LDFLAGS", "-L/usr/local/lib"}], + {"LDFLAGS", "-L/usr/local/lib"}]. IcuWinEnv = [{"CFLAGS", "$DRV_CFLAGS /DXP_WIN"}, - {"LDFLAGS", "icuin.lib icudt.lib icuuc.lib"}], + {"LDFLAGS", "icuin.lib icudt.lib icuuc.lib"}]. -ComparePath = "priv/couch_ejson_compare.so", -CompareSrc = ["priv/couch_ejson_compare/*.c"], +ComparePath = "priv/couch_ejson_compare.so". +CompareSrc = ["priv/couch_ejson_compare/*.c"]. BaseSpecs = [ %% couchjs @@ -193,17 +178,17 @@ BaseSpecs = [ {"linux", ComparePath, CompareSrc, [{env, IcuEnv}]}, {"bsd", ComparePath, CompareSrc, [{env, IcuEnv ++ IcuBsdEnv}]}, {"win32", ComparePath, CompareSrc, [{env, IcuWinEnv}]} -], +]. SpawnSpec = [ {"priv/couchspawnkillable", ["priv/spawnkillable/*.c"]} -], +]. %% hack required until switch to enc/rebar3 PortEnvOverrides = [ {"win32", "EXE_LINK_CXX_TEMPLATE", "$LINKER $PORT_IN_FILES $LDFLAGS $EXE_LDFLAGS /OUT:$PORT_OUT_FILE"} -], +]. PortSpecs = case os:type() of {win32, _} -> @@ -213,10 +198,10 @@ PortSpecs = case os:type() of ok = CopyIfDifferent("priv/couchspawnkillable", CSK), os:cmd("chmod +x priv/couchspawnkillable"), BaseSpecs -end, +end. PlatformDefines = [ {platform_define, "win32", 'WINDOWS'} -], +]. AddConfig = [ {port_specs, PortSpecs}, {erl_opts, PlatformDefines ++ [ @@ -229,5 +214,10 @@ AddConfig = [ ]. lists:foldl(fun({K, V}, CfgAcc) -> - lists:keystore(K, 1, CfgAcc, {K, V}) + case lists:keyfind(K, 1, CfgAcc) of + {K, Existent} when is_list(Existent) andalso is_list(V) -> + lists:keystore(K, 1, CfgAcc, {K, Existent ++ V}); + false -> + lists:keystore(K, 1, CfgAcc, {K, V}) + end end, CONFIG, AddConfig). diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src index 12ec29e12..6116c79ba 100644 --- a/src/couch/src/couch.app.src +++ b/src/couch/src/couch.app.src @@ -33,7 +33,6 @@ sasl, inets, ssl, - os_mon, % Upstream deps ibrowse, diff --git a/src/couch/src/couch.erl b/src/couch/src/couch.erl index 60a8b6626..1c912ac2a 100644 --- a/src/couch/src/couch.erl +++ b/src/couch/src/couch.erl @@ -23,7 +23,6 @@ deps() -> [ sasl, inets, - os_mon, crypto, public_key, ssl, diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index a24de21d6..b4c95e933 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -27,9 +27,10 @@ ]). -export([ + external_size/1, size_info/1, to_disk_term/1, - from_disk_term/2 + from_disk_term/3 ]). -export([ @@ -38,7 +39,8 @@ ]). -export([ - flush/2, + flush/3, + read_data/1, foldl/3, range_foldl/5, foldl_decode/3, @@ -46,11 +48,6 @@ ]). -export([ - upgrade/1, - downgrade/1 -]). - --export([ max_attachment_size/0, validate_attachment_size/3 ]). @@ -58,137 +55,59 @@ -compile(nowarn_deprecated_type). -export_type([att/0]). --include_lib("couch/include/couch_db.hrl"). - - -%% Legacy attachment record. This is going to be phased out by the new proplist -%% based structure. It's needed for now to allow code to perform lazy upgrades -%% while the patch is rolled out to the cluster. Attachments passed as records -%% will remain so until they are required to be represented as property lists. -%% Once this has been widely deployed, this record will be removed entirely and -%% property lists will be the main format. --record(att, { - name :: binary(), - type :: binary(), - att_len :: non_neg_integer(), - - %% length of the attachment in its identity form - %% (that is, without a content encoding applied to it) - %% differs from att_len when encoding /= identity - disk_len :: non_neg_integer(), - - md5 = <<>> :: binary(), - revpos = 0 :: non_neg_integer(), - data :: stub | follows | binary() | {any(), any()} | - {follows, pid(), reference()} | fun(() -> binary()), - - %% Encoding of the attachment - %% currently supported values are: - %% identity, gzip - %% additional values to support in the future: - %% deflate, compress - encoding = identity :: identity | gzip -}). - - -%% Extensible Attachment Type -%% -%% The following types describe the known properties for attachment fields -%% encoded as property lists to allow easier upgrades. Values not in this list -%% should be accepted at runtime but should be treated as opaque data as might -%% be used by upgraded code. If you plan on operating on new data, please add -%% an entry here as documentation. - - -%% The name of the attachment is also used as the mime-part name for file -%% downloads. These must be unique per document. --type name_prop() :: {name, binary()}. - - -%% The mime type of the attachment. This does affect compression of certain -%% attachments if the type is found to be configured as a compressable type. -%% This is commonly reserved for text/* types but could include other custom -%% cases as well. See definition and use of couch_util:compressable_att_type/1. --type type_prop() :: {type, binary()}. - - -%% The attachment length is similar to disk-length but ignores additional -%% encoding that may have occurred. --type att_len_prop() :: {att_len, non_neg_integer()}. - - -%% The size of the attachment as stored in a disk stream. --type disk_len_prop() :: {disk_len, non_neg_integer()}. - - -%% This is a digest of the original attachment data as uploaded by the client. -%% it's useful for checking validity of contents against other attachment data -%% as well as quick digest computation of the enclosing document. --type md5_prop() :: {md5, binary()}. +-include_lib("couch/include/couch_db.hrl"). --type revpos_prop() :: {revpos, 0}. +-define(CURRENT_ATT_FORMAT, 0). -%% This field is currently overloaded with just about everything. The -%% {any(), any()} type is just there until I have time to check the actual -%% values expected. Over time this should be split into more than one property -%% to allow simpler handling. --type data_prop() :: { - data, stub | follows | binary() | {any(), any()} | - {follows, pid(), reference()} | fun(() -> binary()) -}. +-type prop_name() :: + name | + type | + att_len | + disk_len | + md5 | + revpos | + data | + encoding. -%% We will occasionally compress our data. See type_prop() for more information -%% on when this happens. --type encoding_prop() :: {encoding, identity | gzip}. +-type data_prop_type() :: + {loc, #{}, binary(), binary()} | + stub | + follows | + binary() | + {follows, pid(), reference()} | + fun(() -> binary()). --type attachment() :: [ - name_prop() | type_prop() | - att_len_prop() | disk_len_prop() | - md5_prop() | revpos_prop() | - data_prop() | encoding_prop() -]. - --type disk_att_v1() :: { - Name :: binary(), - Type :: binary(), - Sp :: any(), - AttLen :: non_neg_integer(), - RevPos :: non_neg_integer(), - Md5 :: binary() -}. --type disk_att_v2() :: { - Name :: binary(), - Type :: binary(), - Sp :: any(), - AttLen :: non_neg_integer(), - DiskLen :: non_neg_integer(), - RevPos :: non_neg_integer(), - Md5 :: binary(), - Enc :: identity | gzip +-type att() :: #{ + name := binary(), + type := binary(), + att_len := non_neg_integer() | undefined, + disk_len := non_neg_integer() | undefined, + md5 := binary() | undefined, + revpos := non_neg_integer(), + data := data_prop_type(), + encoding := identity | gzip | undefined }. --type disk_att_v3() :: {Base :: tuple(), Extended :: list()}. - --type disk_att() :: disk_att_v1() | disk_att_v2() | disk_att_v3(). - --type att() :: #att{} | attachment() | disk_att(). new() -> - %% We construct a record by default for compatability. This will be - %% upgraded on demand. A subtle effect this has on all attachments - %% constructed via new is that it will pick up the proper defaults - %% from the #att record definition given above. Newer properties do - %% not support special default values and will all be treated as - %% undefined. - #att{}. + #{ + name => <<>>, + type => <<>>, + att_len => undefined, + disk_len => undefined, + md5 => undefined, + revpos => 0, + data => undefined, + encoding => undefined + }. --spec new([{atom(), any()}]) -> att(). +-spec new([{prop_name(), any()}]) -> att(). new(Props) -> store(Props, new()). @@ -197,71 +116,28 @@ new(Props) -> (atom(), att()) -> any(). fetch(Fields, Att) when is_list(Fields) -> [fetch(Field, Att) || Field <- Fields]; -fetch(Field, Att) when is_list(Att) -> - case lists:keyfind(Field, 1, Att) of - {Field, Value} -> Value; - false -> undefined - end; -fetch(name, #att{name = Name}) -> - Name; -fetch(type, #att{type = Type}) -> - Type; -fetch(att_len, #att{att_len = AttLen}) -> - AttLen; -fetch(disk_len, #att{disk_len = DiskLen}) -> - DiskLen; -fetch(md5, #att{md5 = Digest}) -> - Digest; -fetch(revpos, #att{revpos = RevPos}) -> - RevPos; -fetch(data, #att{data = Data}) -> - Data; -fetch(encoding, #att{encoding = Encoding}) -> - Encoding; -fetch(_, _) -> - undefined. +fetch(Field, Att) -> + maps:get(Field, Att). -spec store([{atom(), any()}], att()) -> att(). store(Props, Att0) -> lists:foldl(fun({Field, Value}, Att) -> - store(Field, Value, Att) + maps:update(Field, Value, Att) end, Att0, Props). --spec store(atom(), any(), att()) -> att(). -store(Field, undefined, Att) when is_list(Att) -> - lists:keydelete(Field, 1, Att); -store(Field, Value, Att) when is_list(Att) -> - lists:keystore(Field, 1, Att, {Field, Value}); -store(name, Name, Att) -> - Att#att{name = Name}; -store(type, Type, Att) -> - Att#att{type = Type}; -store(att_len, AttLen, Att) -> - Att#att{att_len = AttLen}; -store(disk_len, DiskLen, Att) -> - Att#att{disk_len = DiskLen}; -store(md5, Digest, Att) -> - Att#att{md5 = Digest}; -store(revpos, RevPos, Att) -> - Att#att{revpos = RevPos}; -store(data, Data, Att) -> - Att#att{data = Data}; -store(encoding, Encoding, Att) -> - Att#att{encoding = Encoding}; store(Field, Value, Att) -> - store(Field, Value, upgrade(Att)). + maps:update(Field, Value, Att). -spec transform(atom(), fun(), att()) -> att(). transform(Field, Fun, Att) -> - NewValue = Fun(fetch(Field, Att)), - store(Field, NewValue, Att). + maps:update_with(Field, Fun, Att). -is_stub(Att) -> - stub == fetch(data, Att). +is_stub(#{data := stub}) -> true; +is_stub(#{}) -> false. %% merge_stubs takes all stub attachments and replaces them with on disk @@ -275,8 +151,7 @@ merge_stubs(MemAtts, DiskAtts) -> merge_stubs(MemAtts, OnDisk, []). -%% restore spec when R14 support is dropped -%% -spec merge_stubs([att()], dict:dict(), [att()]) -> [att()]. +-spec merge_stubs([att()], dict:dict(), [att()]) -> [att()]. merge_stubs([Att | Rest], OnDisk, Merged) -> case fetch(data, Att) of stub -> @@ -304,18 +179,26 @@ merge_stubs([], _, Merged) -> {ok, lists:reverse(Merged)}. +external_size(Att) -> + NameSize = size(fetch(name, Att)), + TypeSize = case fetch(type, Att) of + undefined -> 0; + Type -> size(Type) + end, + AttSize = fetch(att_len, Att), + Md5Size = case fetch(md5, Att) of + undefined -> 0; + Md5 -> size(Md5) + end, + NameSize + TypeSize + AttSize + Md5Size. + + size_info([]) -> {ok, []}; size_info(Atts) -> Info = lists:map(fun(Att) -> - AttLen = fetch(att_len, Att), - case fetch(data, Att) of - {stream, StreamEngine} -> - {ok, SPos} = couch_stream:to_disk_term(StreamEngine), - {SPos, AttLen}; - {_, SPos} -> - {SPos, AttLen} - end + [{loc, _Db, _DocId, AttId}, AttLen] = fetch([data, att_len], Att), + {AttId, AttLen} end, Atts), {ok, lists:usort(Info)}. @@ -324,89 +207,41 @@ size_info(Atts) -> %% old format when possible. This should help make the attachment lazy upgrade %% as safe as possible, avoiding the need for complicated disk versioning %% schemes. -to_disk_term(#att{} = Att) -> - {stream, StreamEngine} = fetch(data, Att), - {ok, Sp} = couch_stream:to_disk_term(StreamEngine), - { +to_disk_term(Att) -> + {loc, #{}, _DocId, AttId} = fetch(data, Att), + {?CURRENT_ATT_FORMAT, { fetch(name, Att), fetch(type, Att), - Sp, + AttId, fetch(att_len, Att), fetch(disk_len, Att), fetch(revpos, Att), fetch(md5, Att), fetch(encoding, Att) - }; -to_disk_term(Att) -> - BaseProps = [name, type, data, att_len, disk_len, revpos, md5, encoding], - {Extended, Base} = lists:foldl( - fun - (data, {Props, Values}) -> - case lists:keytake(data, 1, Props) of - {value, {_, {stream, StreamEngine}}, Other} -> - {ok, Sp} = couch_stream:to_disk_term(StreamEngine), - {Other, [Sp | Values]}; - {value, {_, Value}, Other} -> - {Other, [Value | Values]}; - false -> - {Props, [undefined | Values]} - end; - (Key, {Props, Values}) -> - case lists:keytake(Key, 1, Props) of - {value, {_, Value}, Other} -> {Other, [Value | Values]}; - false -> {Props, [undefined | Values]} - end - end, - {Att, []}, - BaseProps - ), - {list_to_tuple(lists:reverse(Base)), Extended}. - - -%% The new disk term format is a simple wrapper around the legacy format. Base -%% properties will remain in a tuple while the new fields and possibly data from -%% future extensions will be stored in a list of atom/value pairs. While this is -%% slightly less efficient, future work should be able to make use of -%% compression to remove these sorts of common bits (block level compression -%% with something like a shared dictionary that is checkpointed every now and -%% then). -from_disk_term(StreamSrc, {Base, Extended}) - when is_tuple(Base), is_list(Extended) -> - store(Extended, from_disk_term(StreamSrc, Base)); -from_disk_term(StreamSrc, {Name,Type,Sp,AttLen,DiskLen,RevPos,Md5,Enc}) -> - {ok, Stream} = open_stream(StreamSrc, Sp), - #att{ - name=Name, - type=Type, - att_len=AttLen, - disk_len=DiskLen, - md5=Md5, - revpos=RevPos, - data={stream, Stream}, - encoding=upgrade_encoding(Enc) - }; -from_disk_term(StreamSrc, {Name,Type,Sp,AttLen,RevPos,Md5}) -> - {ok, Stream} = open_stream(StreamSrc, Sp), - #att{ - name=Name, - type=Type, - att_len=AttLen, - disk_len=AttLen, - md5=Md5, - revpos=RevPos, - data={stream, Stream} - }; -from_disk_term(StreamSrc, {Name,{Type,Sp,AttLen}}) -> - {ok, Stream} = open_stream(StreamSrc, Sp), - #att{ - name=Name, - type=Type, - att_len=AttLen, - disk_len=AttLen, - md5= <<>>, - revpos=0, - data={stream, Stream} - }. + }}. + + +from_disk_term(#{} = Db, DocId, {?CURRENT_ATT_FORMAT, Props}) -> + { + Name, + Type, + AttId, + AttLen, + DiskLen, + RevPos, + Md5, + Encoding + } = Props, + new([ + {name, Name}, + {type, Type}, + {data, {loc, Db#{tx := undefined}, DocId, AttId}}, + {att_len, AttLen}, + {disk_len, DiskLen}, + {revpos, RevPos}, + {md5, Md5}, + {encoding, Encoding} + ]). %% from_json reads in embedded JSON attachments and creates usable attachment @@ -433,8 +268,12 @@ stub_from_json(Att, Props) -> %% json object. See merge_stubs/3 for the stub check. RevPos = couch_util:get_value(<<"revpos">>, Props), store([ - {md5, Digest}, {revpos, RevPos}, {data, stub}, {disk_len, DiskLen}, - {att_len, EncodedLen}, {encoding, Encoding} + {data, stub}, + {disk_len, DiskLen}, + {att_len, EncodedLen}, + {revpos, RevPos}, + {md5, Digest}, + {encoding, Encoding} ], Att). @@ -443,8 +282,12 @@ follow_from_json(Att, Props) -> Digest = digest_from_json(Props), RevPos = couch_util:get_value(<<"revpos">>, Props, 0), store([ - {md5, Digest}, {revpos, RevPos}, {data, follows}, {disk_len, DiskLen}, - {att_len, EncodedLen}, {encoding, Encoding} + {data, follows}, + {disk_len, DiskLen}, + {att_len, EncodedLen}, + {revpos, RevPos}, + {md5, Digest}, + {encoding, Encoding} ], Att). @@ -455,8 +298,10 @@ inline_from_json(Att, Props) -> Length = size(Data), RevPos = couch_util:get_value(<<"revpos">>, Props, 0), store([ - {data, Data}, {revpos, RevPos}, {disk_len, Length}, - {att_len, Length} + {data, Data}, + {disk_len, Length}, + {att_len, Length}, + {revpos, RevPos} ], Att) catch _:_ -> @@ -466,7 +311,6 @@ inline_from_json(Att, Props) -> end. - encoded_lengths_from_json(Props) -> Len = couch_util:get_value(<<"length">>, Props), case couch_util:get_value(<<"encoding">>, Props) of @@ -488,9 +332,16 @@ digest_from_json(Props) -> to_json(Att, OutputData, DataToFollow, ShowEncoding) -> - [Name, Data, DiskLen, AttLen, Enc, Type, RevPos, Md5] = fetch( - [name, data, disk_len, att_len, encoding, type, revpos, md5], Att - ), + #{ + name := Name, + type := Type, + data := Data, + disk_len := DiskLen, + att_len := AttLen, + revpos := RevPos, + md5 := Md5, + encoding := Encoding + } = Att, Props = [ {<<"content_type">>, Type}, {<<"revpos">>, RevPos} @@ -505,71 +356,87 @@ to_json(Att, OutputData, DataToFollow, ShowEncoding) -> DataToFollow -> [{<<"length">>, DiskLen}, {<<"follows">>, true}]; true -> - AttData = case Enc of + AttData = case Encoding of gzip -> zlib:gunzip(to_binary(Att)); identity -> to_binary(Att) end, [{<<"data">>, base64:encode(AttData)}] end, EncodingProps = if - ShowEncoding andalso Enc /= identity -> + ShowEncoding andalso Encoding /= identity -> [ - {<<"encoding">>, couch_util:to_binary(Enc)}, + {<<"encoding">>, couch_util:to_binary(Encoding)}, {<<"encoded_length">>, AttLen} ]; true -> [] end, - HeadersProp = case fetch(headers, Att) of - undefined -> []; - Headers -> [{<<"headers">>, Headers}] + {Name, {Props ++ DigestProp ++ DataProps ++ EncodingProps}}. + + +flush(Db, DocId, Att1) -> + Data0 = fetch(data, Att1), + case {Data0, Db} of + {{follows, _, _}, #{tx := Tx}} when Tx =/= undefined -> + error(follows_cannot_be_used_in_a_transaction); + {_, #{}} -> + ok + end, + Att2 = read_data(Data0, Att1), + [ + Data, + AttLen, + DiskLen, + ReqMd5, + Encoding + ] = fetch([data, att_len, disk_len, md5, encoding], Att2), + + % Eventually, we'll check if we can compress this + % attachment here and do so if possible. + + % If we were sent a gzip'ed attachment with no + % length data, we have to set it here. + Att3 = case DiskLen of + undefined when AttLen /= undefined -> + store(disk_len, AttLen, Att2); + undefined when is_binary(Data) -> + store(disk_len, size(Data), Att2); + _ -> + Att2 end, - {Name, {Props ++ DigestProp ++ DataProps ++ EncodingProps ++ HeadersProp}}. + % If no encoding has been set, default to + % identity + Att4 = case Encoding of + undefined -> store(encoding, identity, Att3); + _ -> Att3 + end, -flush(Db, Att) -> - flush_data(Db, fetch(data, Att), Att). + case Data of + {loc, _, _, _} -> + % Already flushed + Att1; + _ when is_binary(Data) -> + DataMd5 = couch_hash:md5_hash(Data), + if ReqMd5 == undefined -> ok; true -> + couch_util:check_md5(DataMd5, ReqMd5) + end, + Att5 = store(md5, DataMd5, Att4), + Att6 = maybe_compress(Att5), + fabric2_db:write_attachment(Db, DocId, Att6) + end. -flush_data(Db, Data, Att) when is_binary(Data) -> - couch_db:with_stream(Db, Att, fun(OutputStream) -> - couch_stream:write(OutputStream, Data) - end); -flush_data(Db, Fun, Att) when is_function(Fun) -> - AttName = fetch(name, Att), - MaxAttSize = max_attachment_size(), - case fetch(att_len, Att) of - undefined -> - couch_db:with_stream(Db, Att, fun(OutputStream) -> - % Fun(MaxChunkSize, WriterFun) must call WriterFun - % once for each chunk of the attachment, - Fun(4096, - % WriterFun({Length, Binary}, State) - % WriterFun({0, _Footers}, State) - % Called with Length == 0 on the last time. - % WriterFun returns NewState. - fun({0, Footers}, _Total) -> - F = mochiweb_headers:from_binary(Footers), - case mochiweb_headers:get_value("Content-MD5", F) of - undefined -> - ok; - Md5 -> - {md5, base64:decode(Md5)} - end; - ({Length, Chunk}, Total0) -> - Total = Total0 + Length, - validate_attachment_size(AttName, Total, MaxAttSize), - couch_stream:write(OutputStream, Chunk), - Total - end, 0) - end); - AttLen -> - validate_attachment_size(AttName, AttLen, MaxAttSize), - couch_db:with_stream(Db, Att, fun(OutputStream) -> - write_streamed_attachment(OutputStream, Fun, AttLen) - end) - end; -flush_data(Db, {follows, Parser, Ref}, Att) -> +read_data(Att) -> + Data = fetch(data, Att), + read_data(Data, Att). + + +read_data({loc, #{}, _DocId, _AttId}, Att) -> + % Attachment already written to fdb + Att; + +read_data({follows, Parser, Ref}, Att) -> ParserRef = erlang:monitor(process, Parser), Fun = fun() -> Parser ! {get_bytes, Ref, self()}, @@ -583,41 +450,75 @@ flush_data(Db, {follows, Parser, Ref}, Att) -> end end, try - flush_data(Db, Fun, store(data, Fun, Att)) + read_data(Fun, store(data, Fun, Att)) after erlang:demonitor(ParserRef, [flush]) end; -flush_data(Db, {stream, StreamEngine}, Att) -> - case couch_db:is_active_stream(Db, StreamEngine) of - true -> - % Already written - Att; - false -> - NewAtt = couch_db:with_stream(Db, Att, fun(OutputStream) -> - couch_stream:copy(StreamEngine, OutputStream) - end), - InMd5 = fetch(md5, Att), - OutMd5 = fetch(md5, NewAtt), - couch_util:check_md5(OutMd5, InMd5), - NewAtt + +read_data(Data, Att) when is_binary(Data) -> + case fetch(att_len, Att) of + undefined -> store(att_len, size(Data), Att); + Int when is_integer(Int) -> Att + end; + +read_data(Fun, Att) when is_function(Fun) -> + [AttName, AttLen, InMd5] = fetch([name, att_len, md5], Att), + MaxAttSize = max_attachment_size(), + case AttLen of + undefined -> + % Fun(MaxChunkSize, WriterFun) must call WriterFun + % once for each chunk of the attachment, + WriterFun = fun + ({0, Footers}, {Len, Acc}) -> + F = mochiweb_headers:from_binary(Footers), + Md5 = case mochiweb_headers:get_value("Content-MD5", F) of + undefined -> undefined; + Value -> base64:decode(Value) + end, + Props0 = [ + {data, iolist_to_binary(lists:reverse(Acc))}, + {att_len, Len} + ], + Props1 = if InMd5 /= md5_in_footer -> Props0; true -> + [{md5, Md5} | Props0] + end, + store(Props1, Att); + ({ChunkLen, Chunk}, {Len, Acc}) -> + NewLen = Len + ChunkLen, + validate_attachment_size(AttName, NewLen, MaxAttSize), + {NewLen, [Chunk | Acc]} + end, + Fun(8192, WriterFun, {0, []}); + AttLen -> + validate_attachment_size(AttName, AttLen, MaxAttSize), + read_streamed_attachment(Att, Fun, AttLen, []) end. -write_streamed_attachment(_Stream, _F, 0) -> - ok; -write_streamed_attachment(_Stream, _F, LenLeft) when LenLeft < 0 -> +read_streamed_attachment(Att, _F, 0, Acc) -> + Bin = iolist_to_binary(lists:reverse(Acc)), + store([ + {data, Bin}, + {att_len, size(Bin)} + ], Att); + +read_streamed_attachment(_Att, _F, LenLeft, _Acc) when LenLeft < 0 -> throw({bad_request, <<"attachment longer than expected">>}); -write_streamed_attachment(Stream, F, LenLeft) when LenLeft > 0 -> - Bin = try read_next_chunk(F, LenLeft) + +read_streamed_attachment(Att, F, LenLeft, Acc) when LenLeft > 0 -> + Bin = try + read_next_chunk(F, LenLeft) catch {mp_parser_died, normal} -> throw({bad_request, <<"attachment shorter than expected">>}) end, - ok = couch_stream:write(Stream, Bin), - write_streamed_attachment(Stream, F, LenLeft - iolist_size(Bin)). + Size = iolist_size(Bin), + read_streamed_attachment(Att, F, LenLeft - Size, [Bin | Acc]). + read_next_chunk(F, _) when is_function(F, 0) -> F(); + read_next_chunk(F, LenLeft) when is_function(F, 1) -> F(lists:min([LenLeft, 16#2000])). @@ -626,14 +527,17 @@ foldl(Att, Fun, Acc) -> foldl(fetch(data, Att), Att, Fun, Acc). +foldl({loc, Db, DocId, AttId}, _Att, Fun, Acc) -> + Bin = fabric2_db:read_attachment(Db#{tx := undefined}, DocId, AttId), + Fun(Bin, Acc); + foldl(Bin, _Att, Fun, Acc) when is_binary(Bin) -> Fun(Bin, Acc); -foldl({stream, StreamEngine}, Att, Fun, Acc) -> - Md5 = fetch(md5, Att), - couch_stream:foldl(StreamEngine, Md5, Fun, Acc); + foldl(DataFun, Att, Fun, Acc) when is_function(DataFun) -> Len = fetch(att_len, Att), fold_streamed_data(DataFun, Len, Fun, Acc); + foldl({follows, Parser, Ref}, Att, Fun, Acc) -> ParserRef = erlang:monitor(process, Parser), DataFun = fun() -> @@ -654,18 +558,40 @@ foldl({follows, Parser, Ref}, Att, Fun, Acc) -> end. +range_foldl(Bin1, From, To, Fun, Acc) when is_binary(Bin1) -> + ReadLen = To - From, + Bin2 = case Bin1 of + _ when size(Bin1) < From -> <<>>; + <<_:From/binary, B2/binary>> -> B2 + end, + Bin3 = case Bin2 of + _ when size(Bin2) < ReadLen -> Bin2; + <<B3:ReadLen/binary, _/binary>> -> B3 + end, + Fun(Bin3, Acc); + range_foldl(Att, From, To, Fun, Acc) -> - {stream, StreamEngine} = fetch(data, Att), - couch_stream:range_foldl(StreamEngine, From, To, Fun, Acc). + {loc, Db, DocId, AttId} = fetch(data, Att), + Bin = fabric2_db:read_attachment(Db, DocId, AttId), + range_foldl(Bin, From, To, Fun, Acc). foldl_decode(Att, Fun, Acc) -> - case fetch([data, encoding], Att) of - [{stream, StreamEngine}, Enc] -> - couch_stream:foldl_decode( - StreamEngine, fetch(md5, Att), Enc, Fun, Acc); - [Fun2, identity] -> - fold_streamed_data(Fun2, fetch(att_len, Att), Fun, Acc) + [Encoding, Data] = fetch([encoding, data], Att), + case {Encoding, Data} of + {gzip, {loc, Db, DocId, AttId}} -> + NoTxDb = Db#{tx := undefined}, + Bin = fabric2_db:read_attachment(NoTxDb, DocId, AttId), + foldl_decode(store(data, Bin, Att), Fun, Acc); + {gzip, _} when is_binary(Data) -> + Z = zlib:open(), + ok = zlib:inflateInit(Z, 16 + 15), + Inflated = iolist_to_binary(zlib:inflate(Z, Data)), + ok = zlib:inflateEnd(Z), + ok = zlib:close(Z), + foldl(Inflated, Att, Fun, Acc); + _ -> + foldl(Att, Fun, Acc) end. @@ -677,10 +603,9 @@ to_binary(Bin, _Att) when is_binary(Bin) -> Bin; to_binary(Iolist, _Att) when is_list(Iolist) -> iolist_to_binary(Iolist); -to_binary({stream, _StreamEngine}, Att) -> - iolist_to_binary( - lists:reverse(foldl(Att, fun(Bin,Acc) -> [Bin|Acc] end, [])) - ); +to_binary({loc, Db, DocId, AttId}, _Att) -> + NoTxDb = Db#{tx := undefined}, + fabric2_db:read_attachment(NoTxDb, DocId, AttId); to_binary(DataFun, Att) when is_function(DataFun)-> Len = fetch(att_len, Att), iolist_to_binary( @@ -695,46 +620,60 @@ to_binary(DataFun, Att) when is_function(DataFun)-> fold_streamed_data(_RcvFun, 0, _Fun, Acc) -> Acc; + fold_streamed_data(RcvFun, LenLeft, Fun, Acc) when LenLeft > 0-> Bin = RcvFun(), ResultAcc = Fun(Bin, Acc), fold_streamed_data(RcvFun, LenLeft - size(Bin), Fun, ResultAcc). -%% Upgrade an attachment record to a property list on demand. This is a one-way -%% operation as downgrading potentially truncates fields with important data. --spec upgrade(#att{}) -> attachment(). -upgrade(#att{} = Att) -> - Map = lists:zip( - record_info(fields, att), - lists:seq(2, record_info(size, att)) - ), - %% Don't store undefined elements since that is default - [{F, element(I, Att)} || {F, I} <- Map, element(I, Att) /= undefined]; -upgrade(Att) -> - Att. +maybe_compress(Att) -> + [Encoding, Type] = fetch([encoding, type], Att), + IsCompressible = is_compressible(Type), + CompLevel = config:get_integer("attachments", "compression_level", 0), + case Encoding of + identity when IsCompressible, CompLevel >= 1, CompLevel =< 9 -> + compress(Att, CompLevel); + _ -> + Att + end. -%% Downgrade is exposed for interactive convenience. In practice, unless done -%% manually, upgrades are always one-way. -downgrade(#att{} = Att) -> - Att; -downgrade(Att) -> - #att{ - name = fetch(name, Att), - type = fetch(type, Att), - att_len = fetch(att_len, Att), - disk_len = fetch(disk_len, Att), - md5 = fetch(md5, Att), - revpos = fetch(revpos, Att), - data = fetch(data, Att), - encoding = fetch(encoding, Att) - }. +compress(Att, Level) -> + Data = fetch(data, Att), + + Z = zlib:open(), + % 15 = ?MAX_WBITS (defined in the zlib module) + % the 16 + ?MAX_WBITS formula was obtained by inspecting zlib:gzip/1 + ok = zlib:deflateInit(Z, Level, deflated, 16 + 15, 8, default), + CompData = iolist_to_binary(zlib:deflate(Z, Data, finish)), + ok = zlib:deflateEnd(Z), + ok = zlib:close(Z), + + store([ + {att_len, size(CompData)}, + {md5, couch_hash:md5_hash(CompData)}, + {data, CompData}, + {encoding, gzip} + ], Att). -upgrade_encoding(true) -> gzip; -upgrade_encoding(false) -> identity; -upgrade_encoding(Encoding) -> Encoding. +is_compressible(Type) when is_binary(Type) -> + is_compressible(binary_to_list(Type)); +is_compressible(Type) -> + TypeExpList = re:split( + config:get("attachments", "compressible_types", ""), + "\\s*,\\s*", + [{return, list}] + ), + lists:any( + fun(TypeExp) -> + Regexp = ["^\\s*", re:replace(TypeExp, "\\*", ".*"), + "(?:\\s*;.*?)?\\s*", $$], + re:run(Type, Regexp, [caseless]) =/= nomatch + end, + [T || T <- TypeExpList, T /= []] + ). max_attachment_size() -> @@ -753,204 +692,190 @@ validate_attachment_size(_AttName, _AttSize, _MAxAttSize) -> ok. -open_stream(StreamSrc, Data) -> - case couch_db:is_db(StreamSrc) of - true -> - couch_db:open_read_stream(StreamSrc, Data); - false -> - case is_function(StreamSrc, 1) of - true -> - StreamSrc(Data); - false -> - erlang:error({invalid_stream_source, StreamSrc}) - end - end. - - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -% Eww... --include("couch_bt_engine.hrl"). - -%% Test utilities - - -empty_att() -> new(). - - -upgraded_empty_att() -> - new([{headers, undefined}]). - - -%% Test groups - - -attachment_upgrade_test_() -> - {"Lazy record upgrade tests", [ - {"Existing record fields don't upgrade", - {with, empty_att(), [fun test_non_upgrading_fields/1]} - }, - {"New fields upgrade", - {with, empty_att(), [fun test_upgrading_fields/1]} - } - ]}. - - -attachment_defaults_test_() -> - {"Attachment defaults tests", [ - {"Records retain old default values", [ - {with, empty_att(), [fun test_legacy_defaults/1]} - ]}, - {"Upgraded records inherit defaults", [ - {with, upgraded_empty_att(), [fun test_legacy_defaults/1]} - ]}, - {"Undefined entries are elided on upgrade", [ - {with, upgraded_empty_att(), [fun test_elided_entries/1]} - ]} - ]}. - -attachment_field_api_test_() -> - {"Basic attachment field api", [ - fun test_construction/0, - fun test_store_and_fetch/0, - fun test_transform/0 - ]}. - - -attachment_disk_term_test_() -> - BaseAttachment = new([ - {name, <<"empty">>}, - {type, <<"application/octet-stream">>}, - {att_len, 0}, - {disk_len, 0}, - {md5, <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>}, - {revpos, 4}, - {data, {stream, {couch_bt_engine_stream, {fake_fd, fake_sp}}}}, - {encoding, identity} - ]), - BaseDiskTerm = { - <<"empty">>, - <<"application/octet-stream">>, - fake_sp, - 0, 0, 4, - <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>, - identity - }, - Headers = [{<<"X-Foo">>, <<"bar">>}], - ExtendedAttachment = store(headers, Headers, BaseAttachment), - ExtendedDiskTerm = {BaseDiskTerm, [{headers, Headers}]}, - FakeDb = test_util:fake_db([{engine, {couch_bt_engine, #st{fd=fake_fd}}}]), - {"Disk term tests", [ - ?_assertEqual(BaseDiskTerm, to_disk_term(BaseAttachment)), - ?_assertEqual(BaseAttachment, from_disk_term(FakeDb, BaseDiskTerm)), - ?_assertEqual(ExtendedDiskTerm, to_disk_term(ExtendedAttachment)), - ?_assertEqual(ExtendedAttachment, from_disk_term(FakeDb, ExtendedDiskTerm)) - ]}. - - -attachment_json_term_test_() -> - Props = [ - {<<"content_type">>, <<"application/json">>}, - {<<"digest">>, <<"md5-QCNtWUNXV0UzJnEjMk92YUk1JA==">>}, - {<<"length">>, 14}, - {<<"revpos">>, 1} - ], - PropsInline = [{<<"data">>, <<"eyJhbnN3ZXIiOiA0Mn0=">>}] ++ Props, - InvalidProps = [{<<"data">>, <<"!Base64Encoded$">>}] ++ Props, - Att = couch_att:new([ - {name, <<"attachment.json">>}, - {type, <<"application/json">>} - ]), - ResultStub = couch_att:new([ - {name, <<"attachment.json">>}, - {type, <<"application/json">>}, - {att_len, 14}, - {disk_len, 14}, - {md5, <<"@#mYCWWE3&q#2OvaI5$">>}, - {revpos, 1}, - {data, stub}, - {encoding, identity} - ]), - ResultFollows = ResultStub#att{data = follows}, - ResultInline = ResultStub#att{md5 = <<>>, data = <<"{\"answer\": 42}">>}, - {"JSON term tests", [ - ?_assertEqual(ResultStub, stub_from_json(Att, Props)), - ?_assertEqual(ResultFollows, follow_from_json(Att, Props)), - ?_assertEqual(ResultInline, inline_from_json(Att, PropsInline)), - ?_assertThrow({bad_request, _}, inline_from_json(Att, Props)), - ?_assertThrow({bad_request, _}, inline_from_json(Att, InvalidProps)) - ]}. - - -attachment_stub_merge_test_() -> - %% Stub merging needs to demonstrate revpos matching, skipping, and missing - %% attachment errors. - {"Attachment stub merging tests", []}. - - -%% Test generators - - -test_non_upgrading_fields(Attachment) -> - Pairs = [ - {name, "cat.gif"}, - {type, "text/very-very-plain"}, - {att_len, 1024}, - {disk_len, 42}, - {md5, <<"md5-hashhashhash">>}, - {revpos, 4}, - {data, stub}, - {encoding, gzip} - ], - lists:foreach( - fun({Field, Value}) -> - ?assertMatch(#att{}, Attachment), - Updated = store(Field, Value, Attachment), - ?assertMatch(#att{}, Updated) - end, - Pairs). - - -test_upgrading_fields(Attachment) -> - ?assertMatch(#att{}, Attachment), - UpdatedHeaders = store(headers, [{<<"Ans">>, <<"42">>}], Attachment), - ?assertMatch(X when is_list(X), UpdatedHeaders), - UpdatedHeadersUndefined = store(headers, undefined, Attachment), - ?assertMatch(X when is_list(X), UpdatedHeadersUndefined). - - -test_legacy_defaults(Attachment) -> - ?assertEqual(<<>>, fetch(md5, Attachment)), - ?assertEqual(0, fetch(revpos, Attachment)), - ?assertEqual(identity, fetch(encoding, Attachment)). - - -test_elided_entries(Attachment) -> - ?assertNot(lists:keymember(name, 1, Attachment)), - ?assertNot(lists:keymember(type, 1, Attachment)), - ?assertNot(lists:keymember(att_len, 1, Attachment)), - ?assertNot(lists:keymember(disk_len, 1, Attachment)), - ?assertNot(lists:keymember(data, 1, Attachment)). - - -test_construction() -> - ?assert(new() == new()), - Initialized = new([{name, <<"foo.bar">>}, {type, <<"application/qux">>}]), - ?assertEqual(<<"foo.bar">>, fetch(name, Initialized)), - ?assertEqual(<<"application/qux">>, fetch(type, Initialized)). - - -test_store_and_fetch() -> - Attachment = empty_att(), - ?assertEqual(<<"abc">>, fetch(name, store(name, <<"abc">>, Attachment))), - ?assertEqual(42, fetch(ans, store(ans, 42, Attachment))). - - -test_transform() -> - Attachment = new([{counter, 0}]), - Transformed = transform(counter, fun(Count) -> Count + 1 end, Attachment), - ?assertEqual(1, fetch(counter, Transformed)). - - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% % Eww... +%% -include("couch_bt_engine.hrl"). +%% +%% %% Test utilities +%% +%% +%% empty_att() -> new(). +%% +%% +%% upgraded_empty_att() -> +%% new([{headers, undefined}]). +%% +%% +%% %% Test groups +%% +%% +%% attachment_upgrade_test_() -> +%% {"Lazy record upgrade tests", [ +%% {"Existing record fields don't upgrade", +%% {with, empty_att(), [fun test_non_upgrading_fields/1]} +%% }, +%% {"New fields upgrade", +%% {with, empty_att(), [fun test_upgrading_fields/1]} +%% } +%% ]}. +%% +%% +%% attachment_defaults_test_() -> +%% {"Attachment defaults tests", [ +%% {"Records retain old default values", [ +%% {with, empty_att(), [fun test_legacy_defaults/1]} +%% ]}, +%% {"Upgraded records inherit defaults", [ +%% {with, upgraded_empty_att(), [fun test_legacy_defaults/1]} +%% ]}, +%% {"Undefined entries are elided on upgrade", [ +%% {with, upgraded_empty_att(), [fun test_elided_entries/1]} +%% ]} +%% ]}. +%% +%% attachment_field_api_test_() -> +%% {"Basic attachment field api", [ +%% fun test_construction/0, +%% fun test_store_and_fetch/0, +%% fun test_transform/0 +%% ]}. +%% +%% +%% attachment_disk_term_test_() -> +%% BaseAttachment = new([ +%% {name, <<"empty">>}, +%% {type, <<"application/octet-stream">>}, +%% {att_len, 0}, +%% {disk_len, 0}, +%% {md5, <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>}, +%% {revpos, 4}, +%% {data, {stream, {couch_bt_engine_stream, {fake_fd, fake_sp}}}}, +%% {encoding, identity} +%% ]), +%% BaseDiskTerm = { +%% <<"empty">>, +%% <<"application/octet-stream">>, +%% fake_sp, +%% 0, 0, 4, +%% <<212,29,140,217,143,0,178,4,233,128,9,152,236,248,66,126>>, +%% identity +%% }, +%% Headers = [{<<"X-Foo">>, <<"bar">>}], +%% ExtendedAttachment = store(headers, Headers, BaseAttachment), +%% ExtendedDiskTerm = {BaseDiskTerm, [{headers, Headers}]}, +%% FakeDb = test_util:fake_db([{engine, {couch_bt_engine, #st{fd=fake_fd}}}]), +%% {"Disk term tests", [ +%% ?_assertEqual(BaseDiskTerm, to_disk_term(BaseAttachment)), +%% ?_assertEqual(BaseAttachment, from_disk_term(FakeDb, BaseDiskTerm)), +%% ?_assertEqual(ExtendedDiskTerm, to_disk_term(ExtendedAttachment)), +%% ?_assertEqual(ExtendedAttachment, from_disk_term(FakeDb, ExtendedDiskTerm)) +%% ]}. +%% +%% +%% attachment_json_term_test_() -> +%% Props = [ +%% {<<"content_type">>, <<"application/json">>}, +%% {<<"digest">>, <<"md5-QCNtWUNXV0UzJnEjMk92YUk1JA==">>}, +%% {<<"length">>, 14}, +%% {<<"revpos">>, 1} +%% ], +%% PropsInline = [{<<"data">>, <<"eyJhbnN3ZXIiOiA0Mn0=">>}] ++ Props, +%% InvalidProps = [{<<"data">>, <<"!Base64Encoded$">>}] ++ Props, +%% Att = couch_att:new([ +%% {name, <<"attachment.json">>}, +%% {type, <<"application/json">>} +%% ]), +%% ResultStub = couch_att:new([ +%% {name, <<"attachment.json">>}, +%% {type, <<"application/json">>}, +%% {att_len, 14}, +%% {disk_len, 14}, +%% {md5, <<"@#mYCWWE3&q#2OvaI5$">>}, +%% {revpos, 1}, +%% {data, stub}, +%% {encoding, identity} +%% ]), +%% ResultFollows = ResultStub#att{data = follows}, +%% ResultInline = ResultStub#att{md5 = <<>>, data = <<"{\"answer\": 42}">>}, +%% {"JSON term tests", [ +%% ?_assertEqual(ResultStub, stub_from_json(Att, Props)), +%% ?_assertEqual(ResultFollows, follow_from_json(Att, Props)), +%% ?_assertEqual(ResultInline, inline_from_json(Att, PropsInline)), +%% ?_assertThrow({bad_request, _}, inline_from_json(Att, Props)), +%% ?_assertThrow({bad_request, _}, inline_from_json(Att, InvalidProps)) +%% ]}. +%% +%% +%% attachment_stub_merge_test_() -> +%% %% Stub merging needs to demonstrate revpos matching, skipping, and missing +%% %% attachment errors. +%% {"Attachment stub merging tests", []}. +%% +%% +%% %% Test generators +%% +%% +%% test_non_upgrading_fields(Attachment) -> +%% Pairs = [ +%% {name, "cat.gif"}, +%% {type, "text/very-very-plain"}, +%% {att_len, 1024}, +%% {disk_len, 42}, +%% {md5, <<"md5-hashhashhash">>}, +%% {revpos, 4}, +%% {data, stub}, +%% {encoding, gzip} +%% ], +%% lists:foreach( +%% fun({Field, Value}) -> +%% ?assertMatch(#att{}, Attachment), +%% Updated = store(Field, Value, Attachment), +%% ?assertMatch(#att{}, Updated) +%% end, +%% Pairs). +%% +%% +%% test_upgrading_fields(Attachment) -> +%% ?assertMatch(#att{}, Attachment), +%% UpdatedHeaders = store(headers, [{<<"Ans">>, <<"42">>}], Attachment), +%% ?assertMatch(X when is_list(X), UpdatedHeaders), +%% UpdatedHeadersUndefined = store(headers, undefined, Attachment), +%% ?assertMatch(X when is_list(X), UpdatedHeadersUndefined). +%% +%% +%% test_legacy_defaults(Attachment) -> +%% ?assertEqual(<<>>, fetch(md5, Attachment)), +%% ?assertEqual(0, fetch(revpos, Attachment)), +%% ?assertEqual(identity, fetch(encoding, Attachment)). +%% +%% +%% test_elided_entries(Attachment) -> +%% ?assertNot(lists:keymember(name, 1, Attachment)), +%% ?assertNot(lists:keymember(type, 1, Attachment)), +%% ?assertNot(lists:keymember(att_len, 1, Attachment)), +%% ?assertNot(lists:keymember(disk_len, 1, Attachment)), +%% ?assertNot(lists:keymember(data, 1, Attachment)). +%% +%% +%% test_construction() -> +%% ?assert(new() == new()), +%% Initialized = new([{name, <<"foo.bar">>}, {type, <<"application/qux">>}]), +%% ?assertEqual(<<"foo.bar">>, fetch(name, Initialized)), +%% ?assertEqual(<<"application/qux">>, fetch(type, Initialized)). +%% +%% +%% test_store_and_fetch() -> +%% Attachment = empty_att(), +%% ?assertEqual(<<"abc">>, fetch(name, store(name, <<"abc">>, Attachment))), +%% ?assertEqual(42, fetch(ans, store(ans, 42, Attachment))). +%% +%% +%% test_transform() -> +%% Attachment = new([{counter, 0}]), +%% Transformed = transform(counter, fun(Count) -> Count + 1 end, Attachment), +%% ?assertEqual(1, fetch(counter, Transformed)). +%% +%% +%% -endif. diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index e1d726dc9..60d2bb796 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -37,6 +37,7 @@ get_committed_update_seq/1, get_compacted_seq/1, get_compactor_pid/1, + get_compactor_pid_sync/1, get_db_info/1, get_partition_info/2, get_del_doc_count/1, @@ -212,6 +213,8 @@ is_db(_) -> is_system_db(#db{options = Options}) -> lists:member(sys_db, Options). +is_clustered(#{}) -> + true; is_clustered(#db{main_pid = nil}) -> true; is_clustered(#db{}) -> @@ -572,6 +575,14 @@ get_compacted_seq(#db{}=Db) -> get_compactor_pid(#db{compactor_pid = Pid}) -> Pid. +get_compactor_pid_sync(#db{main_pid=Pid}=Db) -> + case gen_server:call(Pid, compactor_pid, infinity) of + CPid when is_pid(CPid) -> + CPid; + _ -> + nil + end. + get_db_info(Db) -> #db{ name = Name, diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 33ad14f0b..7224921d4 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -379,6 +379,17 @@ rev_info({#doc{} = Doc, {Pos, [RevId | _]}}) -> body_sp = undefined, seq = undefined, rev = {Pos, RevId} + }; +rev_info({#{} = RevInfo, {Pos, [RevId | _]}}) -> + #{ + deleted := Deleted, + sequence := Sequence + } = RevInfo, + #rev_info{ + deleted = Deleted, + body_sp = undefined, + seq = fabric2_fdb:vs_to_seq(Sequence), + rev = {Pos, RevId} }. is_deleted(#full_doc_info{rev_tree=Tree}) -> diff --git a/src/couch/src/couch_httpd.erl b/src/couch/src/couch_httpd.erl index ef90d6b2a..8f7fedd5e 100644 --- a/src/couch/src/couch_httpd.erl +++ b/src/couch/src/couch_httpd.erl @@ -931,6 +931,8 @@ error_info({error, {illegal_database_name, Name}}) -> {400, <<"illegal_database_name">>, Message}; error_info({missing_stub, Reason}) -> {412, <<"missing_stub">>, Reason}; +error_info({misconfigured_server, Reason}) -> + {500, <<"misconfigured_server">>, couch_util:to_binary(Reason)}; error_info({Error, Reason}) -> {500, couch_util:to_binary(Error), couch_util:to_binary(Reason)}; error_info(Error) -> diff --git a/src/couch/src/couch_httpd_auth.erl b/src/couch/src/couch_httpd_auth.erl index 5e4450301..e81cf040e 100644 --- a/src/couch/src/couch_httpd_auth.erl +++ b/src/couch/src/couch_httpd_auth.erl @@ -31,6 +31,8 @@ -export([cookie_auth_cookie/4, cookie_scheme/1]). -export([maybe_value/3]). +-export([jwt_authentication_handler/1]). + -import(couch_httpd, [header_value/2, send_json/2,send_json/4, send_method_not_allowed/2]). -compile({no_auto_import,[integer_to_binary/1, integer_to_binary/2]}). @@ -186,6 +188,42 @@ proxy_auth_user(Req) -> end end. +jwt_authentication_handler(Req) -> + case header_value(Req, "Authorization") of + "Bearer " ++ Jwt -> + RequiredClaims = get_configured_claims(), + case jwtf:decode(?l2b(Jwt), [alg | RequiredClaims], fun jwtf_keystore:get/2) of + {ok, {Claims}} -> + case lists:keyfind(<<"sub">>, 1, Claims) of + false -> throw({unauthorized, <<"Token missing sub claim.">>}); + {_, User} -> Req#httpd{user_ctx=#user_ctx{ + name = User, + roles = couch_util:get_value(?l2b(config:get("jwt_auth", "roles_claim_name", "_couchdb.roles")), Claims, []) + }} + end; + {error, Reason} -> + throw(Reason) + end; + _ -> Req + end. + +get_configured_claims() -> + Claims = config:get("jwt_auth", "required_claims", ""), + Re = "((?<key1>[a-z]+)|{(?<key2>[a-z]+)\s*,\s*\"(?<val>[^\"]+)\"})", + case re:run(Claims, Re, [global, {capture, [key1, key2, val], binary}]) of + nomatch when Claims /= "" -> + couch_log:error("[jwt_auth] required_claims is set to an invalid value.", []), + throw({misconfigured_server, <<"JWT is not configured correctly">>}); + nomatch -> + []; + {match, Matches} -> + lists:map(fun to_claim/1, Matches) + end. + +to_claim([Key, <<>>, <<>>]) -> + binary_to_atom(Key, latin1); +to_claim([<<>>, Key, Value]) -> + {binary_to_atom(Key, latin1), Value}. cookie_authentication_handler(Req) -> cookie_authentication_handler(Req, couch_auth_cache). @@ -355,17 +393,19 @@ handle_session_req(#httpd{method='GET', user_ctx=UserCtx}=Req, _AuthModule) -> {roles, UserCtx#user_ctx.roles} ]}}, {info, {[ - {authentication_db, ?l2b(config:get("couch_httpd_auth", "authentication_db"))}, {authentication_handlers, [ N || {N, _Fun} <- Req#httpd.authentication_handlers]} ] ++ maybe_value(authenticated, UserCtx#user_ctx.handler, fun(Handler) -> Handler + end) ++ maybe_value(authentication_db, config:get("chttpd_auth", "authentication_db"), fun(Val) -> + ?l2b(Val) end)}} ]}) end; % logout by deleting the session handle_session_req(#httpd{method='DELETE'}=Req, _AuthModule) -> - Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}] ++ cookie_scheme(Req)), + Cookie = mochiweb_cookies:cookie("AuthSession", "", [{path, "/"}] ++ + cookie_domain() ++ cookie_scheme(Req)), {Code, Headers} = case couch_httpd:qs_value(Req, "next", nil) of nil -> {200, [Cookie]}; diff --git a/src/couch/src/couch_lru.erl b/src/couch/src/couch_lru.erl index 6ad7c65cd..a3057136f 100644 --- a/src/couch/src/couch_lru.erl +++ b/src/couch/src/couch_lru.erl @@ -11,13 +11,16 @@ % the License. -module(couch_lru). --export([new/0, insert/2, update/2, close/1]). +-export([new/0, sizes/1, insert/2, update/2, close/1]). -include("couch_server_int.hrl"). new() -> {gb_trees:empty(), dict:new()}. +sizes({Tree, Dict}) -> + {gb_trees:size(Tree), dict:size(Dict)}. + insert(DbName, {Tree0, Dict0}) -> Lru = couch_util:unique_monotonic_integer(), {gb_trees:insert(Lru, DbName, Tree0), dict:store(DbName, Lru, Dict0)}. diff --git a/src/couch/src/couch_multidb_changes.erl b/src/couch/src/couch_multidb_changes.erl index e2bbda3e3..09278656e 100644 --- a/src/couch/src/couch_multidb_changes.erl +++ b/src/couch/src/couch_multidb_changes.erl @@ -24,7 +24,8 @@ handle_call/3, handle_info/2, handle_cast/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -174,6 +175,17 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #state{ + pids=Pids + } = State, + Scrubbed = State#state{ + pids={length, length(Pids)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + % Private functions -spec register_with_event_server(pid()) -> reference(). diff --git a/src/couch/src/couch_native_process.erl b/src/couch/src/couch_native_process.erl index eee8b2860..0a228d4c5 100644 --- a/src/couch/src/couch_native_process.erl +++ b/src/couch/src/couch_native_process.erl @@ -42,7 +42,7 @@ -vsn(1). -export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, - handle_info/2]). + handle_info/2,format_status/2]). -export([set_timeout/2, prompt/2]). -define(STATE, native_proc_state). @@ -125,6 +125,21 @@ handle_info({'EXIT',_,Reason}, State) -> terminate(_Reason, _State) -> ok. code_change(_OldVersion, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #evstate{ + ddocs = DDocs, + funs = Functions, + query_config = QueryConfig + } = State, + Scrubbed = State#evstate{ + ddocs = {dict_size, dict:size(DDocs)}, + funs = {length, length(Functions)}, + query_config = {length, length(QueryConfig)} + }, + [{data, [{"State", + ?record_to_keyval(evstate, Scrubbed) + }]}]. + run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> Pid ! {self(), list_row, Row}, receive diff --git a/src/couch/src/couch_proc_manager.erl b/src/couch/src/couch_proc_manager.erl index 0daef3ee9..b83d78882 100644 --- a/src/couch/src/couch_proc_manager.erl +++ b/src/couch/src/couch_proc_manager.erl @@ -31,7 +31,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -109,7 +110,6 @@ init([]) -> ets:new(?SERVERS, [public, named_table, set]), ets:insert(?SERVERS, get_servers_from_env("COUCHDB_QUERY_SERVER_")), ets:insert(?SERVERS, get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_")), - ets:insert(?SERVERS, [{"QUERY", {mango_native_proc, start_link, []}}]), maybe_configure_erlang_native_servers(), {ok, #state{ @@ -269,6 +269,19 @@ handle_info(_Msg, State) -> code_change(_OldVsn, #state{}=State, _Extra) -> {ok, State}. + +format_status(_Opt, [_PDict, State]) -> + #state{ + counts=Counts + } = State, + Scrubbed = State#state{ + counts={dict_size, dict:size(Counts)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + + handle_config_terminate(_, stop, _) -> ok; handle_config_terminate(_Server, _Reason, _State) -> diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index c6d255f17..8eb07abbf 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -111,6 +111,8 @@ rereduce(Lang, RedSrcs, ReducedValues) -> reduce(_Lang, [], _KVs) -> {ok, []}; +reduce(Lang, [<<"_", _/binary>>] = RedSrcs, KVs) -> + builtin_reduce(reduce, RedSrcs, KVs, []); reduce(Lang, RedSrcs, KVs) -> {OsRedSrcs, BuiltinReds} = lists:partition(fun (<<"_", _/binary>>) -> false; @@ -171,7 +173,10 @@ builtin_reduce(_Re, [], _KVs, Acc) -> {ok, lists:reverse(Acc)}; builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> Sum = builtin_sum_rows(KVs, 0), - Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum), + Red = case is_number(Sum) of + true -> Sum; + false -> check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum) + end, builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]); builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> Count = length(KVs), @@ -519,7 +524,7 @@ with_ddoc_proc(#doc{id=DDocId,revs={Start, [DiskRev|_]}}=DDoc, Fun) -> proc_prompt(Proc, Args) -> case proc_prompt_raw(Proc, Args) of {json, Json} -> - ?JSON_DECODE(Json); + raw_to_ejson({json, Json}); EJson -> EJson end. @@ -528,10 +533,76 @@ proc_prompt_raw(#proc{prompt_fun = {Mod, Func}} = Proc, Args) -> apply(Mod, Func, [Proc#proc.pid, Args]). raw_to_ejson({json, Json}) -> - ?JSON_DECODE(Json); + try + ?JSON_DECODE(Json) + catch throw:{invalid_json, {_, invalid_string}} -> + Forced = try + force_utf8(Json) + catch _:_ -> + Json + end, + ?JSON_DECODE(Forced) + end; raw_to_ejson(EJson) -> EJson. +force_utf8(Bin) -> + case binary:match(Bin, <<"\\u">>) of + {Start, 2} -> + <<Prefix:Start/binary, Rest1/binary>> = Bin, + {Insert, Rest3} = case check_uescape(Rest1) of + {ok, Skip} -> + <<Skipped:Skip/binary, Rest2/binary>> = Rest1, + {Skipped, Rest2}; + {error, Skip} -> + <<_:Skip/binary, Rest2/binary>> = Rest1, + {<<16#EF, 16#BF, 16#BD>>, Rest2} + end, + RestForced = force_utf8(Rest3), + <<Prefix/binary, Insert/binary, RestForced/binary>>; + nomatch -> + Bin + end. + +check_uescape(Data) -> + case extract_uescape(Data) of + {Hi, Rest} when Hi >= 16#D800, Hi < 16#DC00 -> + case extract_uescape(Rest) of + {Lo, _} when Lo >= 16#DC00, Lo =< 16#DFFF -> + % A low surrogate pair + UTF16 = << + Hi:16/big-unsigned-integer, + Lo:16/big-unsigned-integer + >>, + try + [_] = xmerl_ucs:from_utf16be(UTF16), + {ok, 12} + catch _:_ -> + {error, 6} + end; + {_, _} -> + % Found a uescape that's not a low half + {error, 6}; + false -> + % No hex escape found + {error, 6} + end; + {Hi, _} when Hi >= 16#DC00, Hi =< 16#DFFF -> + % Found a low surrogate half without a high half + {error, 6}; + {_, _} -> + % Found a uescape we don't care about + {ok, 6}; + false -> + % Incomplete uescape which we don't care about + {ok, 2} + end. + +extract_uescape(<<"\\u", Code:4/binary, Rest/binary>>) -> + {binary_to_integer(Code, 16), Rest}; +extract_uescape(_) -> + false. + proc_stop(Proc) -> {Mod, Func} = Proc#proc.stop_fun, apply(Mod, Func, [Proc#proc.pid]). @@ -680,4 +751,41 @@ test_reduce(Reducer, KVs) -> {ok, Finalized} = finalize(Reducer, Reduced), Finalized. +force_utf8_test() -> + % "\uDCA5\uD83D" + Ok = [ + <<"foo">>, + <<"\\u00A0">>, + <<"\\u0032">>, + <<"\\uD83D\\uDCA5">>, + <<"foo\\uD83D\\uDCA5bar">>, + % Truncated but we doesn't break replacements + <<"\\u0FA">> + ], + lists:foreach(fun(Case) -> + ?assertEqual(Case, force_utf8(Case)) + end, Ok), + + NotOk = [ + <<"\\uDCA5">>, + <<"\\uD83D">>, + <<"fo\\uDCA5bar">>, + <<"foo\\uD83Dbar">>, + <<"\\uDCA5\\uD83D">>, + <<"\\uD83Df\\uDCA5">>, + <<"\\uDCA5\\u00A0">>, + <<"\\uD83D\\u00A0">> + ], + ToJSON = fun(Bin) -> <<34, Bin/binary, 34>> end, + lists:foreach(fun(Case) -> + try + ?assertNotEqual(Case, force_utf8(Case)), + ?assertThrow(_, ?JSON_DECODE(ToJSON(Case))), + ?assertMatch(<<_/binary>>, ?JSON_DECODE(ToJSON(force_utf8(Case)))) + catch + T:R -> + io:format(standard_error, "~p~n~p~n", [T, R]) + end + end, NotOk). + -endif. diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index 909e23898..42eab738c 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -18,7 +18,7 @@ -export([open/2,create/2,delete/2,get_version/0,get_version/1,get_git_sha/0,get_uuid/0]). -export([all_databases/0, all_databases/2]). -export([init/1, handle_call/3,sup_start_link/0]). --export([handle_cast/2,code_change/3,handle_info/2,terminate/2]). +-export([handle_cast/2,code_change/3,handle_info/2,terminate/2,format_status/2]). -export([dev_start/0,is_admin/2,has_admins/0,get_stats/0]). -export([close_lru/0]). -export([close_db_if_idle/1]). @@ -237,15 +237,19 @@ init([]) -> couch_util:set_mqd_off_heap(?MODULE), couch_util:set_process_priority(?MODULE, high), - % Mark pluggable storage engines as a supported feature - config:enable_feature('pluggable-storage-engines'), - - % Mark partitioned databases as a supported feature - config:enable_feature(partitioned), - % Mark being able to receive documents with an _access property as a supported feature config:enable_feature('access-ready'), + % Mark if fips is enabled + case + erlang:function_exported(crypto, info_fips, 0) andalso + crypto:info_fips() == enabled of + true -> + config:enable_feature('fips'); + false -> + ok + end, + % read config and register for configuration changes % just stop if one of the config settings change. couch_server_sup @@ -294,6 +298,10 @@ terminate(Reason, Srv) -> end, nil, couch_dbs), ok. +format_status(_Opt, [_PDict, Srv]) -> + Scrubbed = Srv#server{lru=couch_lru:sizes(Srv#server.lru)}, + [{data, [{"State", ?record_to_keyval(server, Scrubbed)}]}]. + handle_config_change("couchdb", "database_dir", _, _, _) -> exit(whereis(couch_server), config_change), remove_handler; @@ -381,10 +389,13 @@ maybe_close_lru_db(#server{lru=Lru}=Server) -> end. open_async(Server, From, DbName, Options) -> + NoLRUServer = Server#server{ + lru = redacted + }, Parent = self(), T0 = os:timestamp(), Opener = spawn_link(fun() -> - Res = open_async_int(Server, DbName, Options), + Res = open_async_int(NoLRUServer, DbName, Options), IsSuccess = case Res of {ok, _} -> true; _ -> false diff --git a/src/couch/src/couch_stream.erl b/src/couch/src/couch_stream.erl index 2ab46d7e7..d8b7e0ffe 100644 --- a/src/couch/src/couch_stream.erl +++ b/src/couch/src/couch_stream.erl @@ -36,7 +36,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). @@ -294,6 +295,19 @@ handle_info(_Info, State) -> {noreply, State}. +format_status(_Opt, [_PDict, Stream]) -> + #stream{ + written_pointers=Pointers, + buffer_list = Buffer + } = Stream, + Scrubbed = Stream#stream{ + written_pointers={length, length(Pointers)}, + buffer_list = {length, length(Buffer)} + }, + [{data, [{"State", + ?record_to_keyval(stream, Scrubbed) + }]}]. + do_seek({Engine, EngineState}, Offset) -> {ok, NewState} = Engine:seek(EngineState, Offset), {Engine, NewState}. diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl index a785e2e44..8d643398c 100644 --- a/src/couch/src/couch_util.erl +++ b/src/couch/src/couch_util.erl @@ -14,14 +14,14 @@ -export([priv_dir/0, normpath/1, fold_files/5]). -export([should_flush/0, should_flush/1, to_existing_atom/1]). --export([rand32/0, implode/2, collate/2, collate/3]). +-export([rand32/0, implode/2, collate/2, collate/3, get_sort_key/1]). -export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]). -export([encodeBase64Url/1, decodeBase64Url/1]). -export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]). -export([get_nested_json_value/2, json_user_ctx/1]). -export([proplist_apply_field/2, json_apply_field/2]). -export([to_binary/1, to_integer/1, to_list/1, url_encode/1]). --export([json_encode/1, json_decode/1]). +-export([json_encode/1, json_decode/1, json_decode/2]). -export([verify/2,simple_call/2,shutdown_sync/1]). -export([get_value/2, get_value/3]). -export([reorder_results/2]). @@ -31,6 +31,7 @@ -export([with_db/2]). -export([rfc1123_date/0, rfc1123_date/1]). -export([integer_to_boolean/1, boolean_to_integer/1]). +-export([validate_positive_int/1]). -export([find_in_binary/2]). -export([callback_exists/3, validate_callback_exists/3]). -export([with_proc/4]). @@ -47,15 +48,17 @@ -define(FLUSH_MAX_MEM, 10000000). -define(BLACKLIST_CONFIG_SECTIONS, [ - <<"daemons">>, - <<"external">>, - <<"httpd_design_handlers">>, - <<"httpd_db_handlers">>, - <<"httpd_global_handlers">>, - <<"native_query_servers">>, - <<"os_daemons">>, - <<"query_servers">>, - <<"feature_flags">> + <<"^daemons$">>, + <<"^external$">>, + <<"^httpd_design_handlers$">>, + <<"^httpd_db_handlers$">>, + <<"^httpd_global_handlers$">>, + <<"^native_query_servers$">>, + <<"^os_daemons$">>, + <<"^query_servers$">>, + <<"^feature_flags$">>, + <<"^tracing\..*$">>, + <<"^tracing$">> ]). @@ -407,11 +410,20 @@ collate(A, B, Options) when is_binary(A), is_binary(B) -> SizeA = byte_size(A), SizeB = byte_size(B), Bin = <<SizeA:32/native, A/binary, SizeB:32/native, B/binary>>, - [Result] = erlang:port_control(drv_port(), Operation, Bin), + <<Result>> = erlang:port_control(drv_port(), Operation, Bin), % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the % expected typical -1, 0, 1 Result - 1. +get_sort_key(Str) when is_binary(Str) -> + Operation = 2, % get_sort_key + Size = byte_size(Str), + Bin = <<Size:32/native, Str/binary>>, + case erlang:port_control(drv_port(), Operation, Bin) of + <<>> -> error; + Res -> Res + end. + should_flush() -> should_flush(?FLUSH_MAX_MEM). @@ -498,8 +510,11 @@ json_encode(V) -> jiffy:encode(V, [force_utf8]). json_decode(V) -> + json_decode(V, []). + +json_decode(V, Opts) -> try - jiffy:decode(V, [dedupe_keys]) + jiffy:decode(V, [dedupe_keys | Opts]) catch error:Error -> throw({invalid_json, Error}) @@ -621,6 +636,17 @@ boolean_to_integer(false) -> 0. +validate_positive_int(N) when is_list(N) -> + try + I = list_to_integer(N), + validate_positive_int(I) + catch error:badarg -> + false + end; +validate_positive_int(N) when is_integer(N), N > 0 -> true; +validate_positive_int(_) -> false. + + find_in_binary(_B, <<>>) -> not_found; @@ -753,10 +779,13 @@ unique_monotonic_integer() -> check_config_blacklist(Section) -> - case lists:member(Section, ?BLACKLIST_CONFIG_SECTIONS) of - true -> - Msg = <<"Config section blacklisted for modification over HTTP API.">>, - throw({forbidden, Msg}); - _ -> - ok - end. + lists:foreach(fun(RegExp) -> + case re:run(Section, RegExp) of + nomatch -> + ok; + _ -> + Msg = <<"Config section blacklisted for modification over HTTP API.">>, + throw({forbidden, Msg}) + end + end, ?BLACKLIST_CONFIG_SECTIONS), + ok. diff --git a/src/couch/src/couch_work_queue.erl b/src/couch/src/couch_work_queue.erl index 5d747de82..01271bb35 100644 --- a/src/couch/src/couch_work_queue.erl +++ b/src/couch/src/couch_work_queue.erl @@ -21,7 +21,7 @@ % gen_server callbacks -export([init/1, terminate/2]). --export([handle_call/3, handle_cast/2, code_change/3, handle_info/2]). +-export([handle_call/3, handle_cast/2, code_change/3, handle_info/2, format_status/2]). -record(q, { queue = queue:new(), @@ -49,7 +49,7 @@ queue(Wq, Item) -> dequeue(Wq) -> dequeue(Wq, all). - + dequeue(Wq, MaxItems) -> try gen_server:call(Wq, {dequeue, MaxItems}, infinity) @@ -76,7 +76,7 @@ size(Wq) -> close(Wq) -> gen_server:cast(Wq, close). - + init(Options) -> Q = #q{ @@ -90,7 +90,7 @@ init(Options) -> terminate(_Reason, #q{work_waiters=Workers}) -> lists:foreach(fun({W, _}) -> gen_server:reply(W, closed) end, Workers). - + handle_call({queue, Item, Size}, From, #q{work_waiters = []} = Q0) -> Q = Q0#q{size = Q0#q.size + Size, items = Q0#q.items + 1, @@ -172,7 +172,7 @@ dequeue_items(NumItems, Size, Queue, Blocked, DequeuedAcc) -> end, dequeue_items( NumItems - 1, Size - ItemSize, Queue2, Blocked2, [Item | DequeuedAcc]). - + handle_cast(close, #q{items = 0} = Q) -> {stop, normal, Q}; @@ -186,3 +186,18 @@ code_change(_OldVsn, State, _Extra) -> handle_info(X, Q) -> {stop, X, Q}. + +format_status(_Opt, [_PDict, Queue]) -> + #q{ + queue = Q, + blocked = Blocked, + work_waiters = Waiters + } = Queue, + Scrubbed = Queue#q{ + queue = {queue_length, queue:len(Q)}, + blocked = {length, length(Blocked)}, + work_waiters = {length, length(Waiters)} + }, + [{data, [{"State", + ?record_to_keyval(q, Scrubbed) + }]}]. diff --git a/src/couch/test/eunit/couch_js_tests.erl b/src/couch/test/eunit/couch_js_tests.erl index cd6452cf9..693cd9772 100644 --- a/src/couch/test/eunit/couch_js_tests.erl +++ b/src/couch/test/eunit/couch_js_tests.erl @@ -14,17 +14,6 @@ -include_lib("eunit/include/eunit.hrl"). --define(FUNC, << - "var state = [];\n" - "function(doc) {\n" - " var val = \"0123456789ABCDEF\";\n" - " for(var i = 0; i < 165535; i++) {\n" - " state.push([val, val]);\n" - " }\n" - "}\n" ->>). - - couch_js_test_() -> { "Test couchjs", @@ -33,15 +22,141 @@ couch_js_test_() -> fun test_util:start_couch/0, fun test_util:stop_couch/1, [ + fun should_create_sandbox/0, + fun should_roundtrip_utf8/0, + fun should_roundtrip_modified_utf8/0, + fun should_replace_broken_utf16/0, + fun should_allow_js_string_mutations/0, {timeout, 60000, fun should_exit_on_oom/0} ] } }. +should_create_sandbox() -> + % Try and detect whether we can see out of the + % sandbox or not. + Src = << + "function(doc) {\n" + " try {\n" + " emit(false, typeof(Couch.compile_function));\n" + " } catch (e) {\n" + " emit(true, e.message);\n" + " }\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, <<"{}">>]), + ?assertEqual([[[true, <<"Couch is not defined">>]]], Result). + + +should_roundtrip_utf8() -> + % Try round tripping UTF-8 both directions through + % couchjs. These tests use hex encoded values of + % Ä (C384) and Ü (C39C) so as to avoid odd editor/Erlang encoding + % strangeness. + Src = << + "function(doc) {\n" + " emit(doc.value, \"", 16#C3, 16#9C, "\");\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Doc = {[ + {<<"value">>, <<16#C3, 16#84>>} + ]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + ?assertEqual([[[<<16#C3, 16#84>>, <<16#C3, 16#9C>>]]], Result). + + +should_roundtrip_modified_utf8() -> + % Mimicing the test case from the mailing list + Src = << + "function(doc) {\n" + " emit(doc.value.toLowerCase(), \"", 16#C3, 16#9C, "\");\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Doc = {[ + {<<"value">>, <<16#C3, 16#84>>} + ]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + ?assertEqual([[[<<16#C3, 16#A4>>, <<16#C3, 16#9C>>]]], Result). + + +should_replace_broken_utf16() -> + % This test reverse the surrogate pair of + % the Boom emoji U+1F4A5 + Src = << + "function(doc) {\n" + " emit(doc.value.split(\"\").reverse().join(\"\"), 1);\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), + Doc = {[ + {<<"value">>, list_to_binary(xmerl_ucs:to_utf8([16#1F4A5]))} + ]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + % Invalid UTF-8 gets replaced with the 16#FFFD replacement + % marker + Markers = list_to_binary(xmerl_ucs:to_utf8([16#FFFD, 16#FFFD])), + ?assertEqual([[[Markers, 1]]], Result). + + +should_allow_js_string_mutations() -> + % This binary corresponds to this string: мама мыла раму + % Which I'm told translates to: "mom was washing the frame" + MomWashedTheFrame = << + 16#D0, 16#BC, 16#D0, 16#B0, 16#D0, 16#BC, 16#D0, 16#B0, 16#20, + 16#D0, 16#BC, 16#D1, 16#8B, 16#D0, 16#BB, 16#D0, 16#B0, 16#20, + 16#D1, 16#80, 16#D0, 16#B0, 16#D0, 16#BC, 16#D1, 16#83 + >>, + Mom = <<16#D0, 16#BC, 16#D0, 16#B0, 16#D0, 16#BC, 16#D0, 16#B0>>, + Washed = <<16#D0, 16#BC, 16#D1, 16#8B, 16#D0, 16#BB, 16#D0, 16#B0>>, + Src1 = << + "function(doc) {\n" + " emit(\"length\", doc.value.length);\n" + "}\n" + >>, + Src2 = << + "function(doc) {\n" + " emit(\"substring\", doc.value.substring(5, 9));\n" + "}\n" + >>, + Src3 = << + "function(doc) {\n" + " emit(\"slice\", doc.value.slice(0, 4));\n" + "}\n" + >>, + Proc = couch_query_servers:get_os_process(<<"javascript">>), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src1]), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src2]), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src3]), + Doc = {[{<<"value">>, MomWashedTheFrame}]}, + Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]), + Expect = [ + [[<<"length">>, 14]], + [[<<"substring">>, Washed]], + [[<<"slice">>, Mom]] + ], + ?assertEqual(Expect, Result). + + should_exit_on_oom() -> + Src = << + "var state = [];\n" + "function(doc) {\n" + " var val = \"0123456789ABCDEF\";\n" + " for(var i = 0; i < 165535; i++) {\n" + " state.push([val, val]);\n" + " }\n" + "}\n" + >>, Proc = couch_query_servers:get_os_process(<<"javascript">>), - true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, ?FUNC]), + true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]), trigger_oom(Proc). trigger_oom(Proc) -> diff --git a/src/couch/test/eunit/couch_util_tests.erl b/src/couch/test/eunit/couch_util_tests.erl index 3e145c4f6..e37691110 100644 --- a/src/couch/test/eunit/couch_util_tests.erl +++ b/src/couch/test/eunit/couch_util_tests.erl @@ -14,6 +14,12 @@ -include_lib("couch/include/couch_eunit.hrl"). +% For generating poisson distributed string lengths +% in the random unicode generation. This shoots +% for lengths centered around 24 characters. To +% change, replace this value with math:exp(-Length). +-define(POISSON_LIMIT, 3.775134544279098e-11). +-define(RANDOM_TEST_SIZE, 10000). setup() -> %% We cannot start driver from here since it becomes bounded to eunit @@ -168,3 +174,144 @@ to_hex_test_() -> ?_assertEqual("", couch_util:to_hex(<<>>)), ?_assertEqual("010203faff", couch_util:to_hex(<<1, 2, 3, 250, 255>>)) ]. + +json_decode_test_() -> + [ + ?_assertEqual({[]}, couch_util:json_decode(<<"{}">>)), + ?_assertEqual({[]}, couch_util:json_decode(<<"{}">>, [])), + ?_assertEqual(#{}, couch_util:json_decode(<<"{}">>, [return_maps])) + ]. + +sort_key_test_() -> + { + "Sort Key tests", + [ + { + foreach, + fun setup/0, fun teardown/1, + [ + fun test_get_sort_key/1, + fun test_get_sort_key_jiffy_string/1, + fun test_get_sort_key_fails_on_bad_input/1, + fun test_get_sort_key_longer_than_buffer/1, + fun test_sort_key_collation/1, + fun test_sort_key_list_sort/1 + ] + } + ] + }. + +test_get_sort_key(_) -> + Strs = [ + <<"">>, + <<"foo">>, + <<"bar">>, + <<"Bar">>, + <<"baz">>, + <<"BAZ">>, + <<"quaz">>, + <<"1234fdsa">>, + <<"1234">>, + <<"pizza">> + ], + Pairs = [{S1, S2} || S1 <- Strs, S2 <- Strs], + lists:map(fun({S1, S2}) -> + S1K = couch_util:get_sort_key(S1), + S2K = couch_util:get_sort_key(S2), + SortRes = sort_keys(S1K, S2K), + Comment = list_to_binary(io_lib:format("strcmp(~p, ~p)", [S1, S2])), + CollRes = couch_util:collate(S1, S2), + {Comment, ?_assertEqual(SortRes, CollRes)} + end, Pairs). + +test_get_sort_key_jiffy_string(_) -> + %% jiffy:decode does not null terminate strings + %% so we use it here to test unterminated strings + {[{S1,S2}]} = jiffy:decode(<<"{\"foo\": \"bar\"}">>), + S1K = couch_util:get_sort_key(S1), + S2K = couch_util:get_sort_key(S2), + SortRes = sort_keys(S1K, S2K), + CollRes = couch_util:collate(S1, S2), + ?_assertEqual(SortRes, CollRes). + +test_get_sort_key_fails_on_bad_input(_) -> + %% generated with crypto:strong_rand_bytes + %% contains invalid character, should error + S = <<209,98,222,144,60,163,72,134,206,157>>, + Res = couch_util:get_sort_key(S), + ?_assertEqual(error, Res). + +test_get_sort_key_longer_than_buffer(_) -> + %% stack allocated buffer is 1024 units + %% test resize logic with strings > 1024 char + Extra = list_to_binary(["a" || _ <- lists:seq(1, 1200)]), + ?_assert(is_binary(Extra)). + +test_sort_key_collation(_) -> + ?_test(begin + lists:foreach(fun(_) -> + K1 = random_unicode_binary(), + SK1 = couch_util:get_sort_key(K1), + + K2 = random_unicode_binary(), + SK2 = couch_util:get_sort_key(K2), + + % Probably kinda silly but whatevs + ?assertEqual(couch_util:collate(K1, K1), sort_keys(SK1, SK1)), + ?assertEqual(couch_util:collate(K2, K2), sort_keys(SK2, SK2)), + + ?assertEqual(couch_util:collate(K1, K2), sort_keys(SK1, SK2)), + ?assertEqual(couch_util:collate(K2, K1), sort_keys(SK2, SK1)) + end, lists:seq(1, ?RANDOM_TEST_SIZE)) + end). + +test_sort_key_list_sort(_) -> + ?_test(begin + RandomKeys = lists:map(fun(_) -> + random_unicode_binary() + end, lists:seq(1, ?RANDOM_TEST_SIZE)), + + CollationSorted = lists:sort(fun(A, B) -> + couch_util:collate(A, B) =< 0 + end, RandomKeys), + + SortKeys = lists:map(fun(K) -> + {couch_util:get_sort_key(K), K} + end, RandomKeys), + {_, SortKeySorted} = lists:unzip(lists:sort(SortKeys)), + + ?assertEqual(CollationSorted, SortKeySorted) + end). + +sort_keys(S1, S2) -> + case S1 < S2 of + true -> + -1; + false -> case S1 =:= S2 of + true -> + 0; + false -> + 1 + end + end. + +random_unicode_binary() -> + Size = poisson_length(0, rand:uniform()), + Chars = [random_unicode_char() || _ <- lists:seq(1, Size)], + <<_/binary>> = unicode:characters_to_binary(Chars). + +poisson_length(N, Acc) when Acc > ?POISSON_LIMIT -> + poisson_length(N + 1, Acc * rand:uniform()); +poisson_length(N, _) -> + N. + +random_unicode_char() -> + BaseChar = rand:uniform(16#FFFD + 1) - 1, + case BaseChar of + BC when BC >= 16#D800, BC =< 16#DFFF -> + % This range is reserved for surrogate pair + % encodings. + random_unicode_char(); + BC -> + BC + end. diff --git a/src/couch/test/eunit/couchdb_cookie_domain_tests.erl b/src/couch/test/eunit/couchdb_cookie_domain_tests.erl index e66ab31e6..c46352f35 100755 --- a/src/couch/test/eunit/couchdb_cookie_domain_tests.erl +++ b/src/couch/test/eunit/couchdb_cookie_domain_tests.erl @@ -43,7 +43,8 @@ cookie_test_() -> fun({ok, Url, ContentType, Payload, _}) -> [ should_set_cookie_domain(Url, ContentType, Payload), - should_not_set_cookie_domain(Url, ContentType, Payload) + should_not_set_cookie_domain(Url, ContentType, Payload), + should_delete_cookie_domain(Url, ContentType, Payload) ] end } @@ -67,3 +68,13 @@ should_not_set_cookie_domain(Url, ContentType, Payload) -> Cookie = proplists:get_value("Set-Cookie", Headers), ?assertEqual(0, string:str(Cookie, "; Domain=")) end). + +should_delete_cookie_domain(Url, ContentType, Payload) -> + ?_test(begin + ok = config:set("couch_httpd_auth", "cookie_domain", + "example.com", false), + {ok, Code, Headers, _} = test_request:delete(Url, ContentType, Payload), + ?assertEqual(200, Code), + Cookie = proplists:get_value("Set-Cookie", Headers), + ?assert(string:str(Cookie, "; Domain=example.com") > 0) + end). diff --git a/src/couch_epi/test/eunit/couch_epi_tests.erl b/src/couch_epi/test/eunit/couch_epi_tests.erl index 12d8610c1..23b9e6103 100644 --- a/src/couch_epi/test/eunit/couch_epi_tests.erl +++ b/src/couch_epi/test/eunit/couch_epi_tests.erl @@ -162,7 +162,8 @@ start_epi(Plugins) -> Module end, Plugins), application:set_env(couch_epi, plugins, PluginsModules), - application:start(couch_epi). + {ok, _} = application:ensure_all_started(couch_epi), + ok. setup(data_file) -> error_logger:tty(false), diff --git a/src/couch_eval/README.md b/src/couch_eval/README.md new file mode 100644 index 000000000..048a165fb --- /dev/null +++ b/src/couch_eval/README.md @@ -0,0 +1,5 @@ +couch_eval +===== + +An an initial abstraction layer for evaluating user provided code. So far +this is only used by `couch_views` to provide map function support. Currently this is implemented in `couch_js` by reusing the existing `couchjs` mechanics. diff --git a/src/couch_replicator/src/couch_replicator_scheduler.hrl b/src/couch_eval/rebar.config index 5203b0caa..362c8785e 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler.hrl +++ b/src/couch_eval/rebar.config @@ -10,6 +10,5 @@ % License for the specific language governing permissions and limitations under % the License. - --type job_id() :: term(). --type job_args() :: term(). +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_eval/src/couch_eval.app.src b/src/couch_eval/src/couch_eval.app.src new file mode 100644 index 000000000..87193d806 --- /dev/null +++ b/src/couch_eval/src/couch_eval.app.src @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_eval, [ + {description, "An OTP application"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + couch_log, + config + ]} + ]}. diff --git a/src/couch_eval/src/couch_eval.erl b/src/couch_eval/src/couch_eval.erl new file mode 100644 index 000000000..3541a5b94 --- /dev/null +++ b/src/couch_eval/src/couch_eval.erl @@ -0,0 +1,100 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_eval). + + +-export([ + acquire_map_context/6, + release_map_context/1, + map_docs/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-type db_name() :: binary(). +-type doc_id() :: binary(). +-type ddoc_id() :: binary(). +-type language() :: binary(). +-type sig() :: binary(). +-type lib() :: any(). +-type map_fun() :: binary(). +-type map_funs() :: [map_fun()]. +-type result() :: {doc_id(), [[{any(), any()}]]}. +-type api_mod() :: atom(). +-type context() :: {api_mod(), any()}. + +-type context_opts() :: #{ + db_name := db_name(), + ddoc_id => ddoc_id(), + language => language(), + sig => sig(), + lib => lib(), + map_funs => map_funs(), + api_mod => api_mod() +}. + + +-callback acquire_map_context(context_opts()) -> {ok, any()} | {error, any()}. +-callback release_map_context(context()) -> ok | {error, any()}. +-callback map_docs(context(), [doc()]) -> {ok, [result()]} | {error, any()}. + + +-spec acquire_map_context( + db_name(), + ddoc_id(), + language(), + sig(), + lib(), + map_funs() + ) -> {ok, context()} | {error, any()}. +acquire_map_context(DbName, DDocId, Language, Sig, Lib, MapFuns) -> + ApiMod = get_api_mod(Language), + CtxOpts = #{ + db_name => DbName, + ddoc_id => DDocId, + language => Language, + sig => Sig, + lib => Lib, + map_funs => MapFuns + }, + {ok, Ctx} = ApiMod:acquire_map_context(CtxOpts), + {ok, {ApiMod, Ctx}}. + + +-spec release_map_context(context()) -> ok | {error, any()}. +release_map_context(nil) -> + ok; + +release_map_context({ApiMod, Ctx}) -> + ApiMod:release_map_context(Ctx). + + +-spec map_docs(context(), [doc()]) -> {ok, result()} | {error, any()}. +map_docs({ApiMod, Ctx}, Docs) -> + ApiMod:map_docs(Ctx, Docs). + + +get_api_mod(Language) when is_binary(Language) -> + try + LangStr = binary_to_list(Language), + ModStr = config:get("couch_eval.languages", LangStr), + if ModStr /= undefined -> ok; true -> + erlang:error({unknown_eval_api_language, Language}) + end, + list_to_existing_atom(ModStr) + catch error:badarg -> + erlang:error({invalid_eval_api_mod, Language}) + end. diff --git a/src/couch_expiring_cache/README.md b/src/couch_expiring_cache/README.md new file mode 100644 index 000000000..2ab1699db --- /dev/null +++ b/src/couch_expiring_cache/README.md @@ -0,0 +1,71 @@ +# Couch Expiring Cache + +This is a library for creating an FDB backed key value cache, where +each entry has a `stale` and `expires` time associated with it. Once +the current time exceeds the `expires` time, the entry is +automatically removed. The `stale` time can be used to indicate that a +refresh is necessary, while still returning a non-expired value. It is +potentially useful for implementing e.g. caches to external systems of +record, such as OAuth 2. + +The data model is based on this [FDB forum discussion]( +https://forums.foundationdb.org/t/designing-key-value-expiration-in-fdb/156). + +``` +(?EXPIRING_CACHE, Name, ?PK, Key) := (Val, StaleTS, ExpireTS) +(?EXPIRING_CACHE, Name, ?EXP, ExpireTS, Key) := () +``` +where `Name` is a unique namespace for a particular use case. N.B. +that it's possible for cache data remain indefinitely in FDB when a +`Name` is changed or retired with unexpired entries. For such cases, +we provide `couch_expiring_cache_fdb:clear_all/1` to manually clean +up those entries. + +## Example + +Typical usage for this library is to create a separate behaviour +module for each `Name`, which internally starts a uniquely named +`couch_expiring_cache_server` to handle expiration and removal of +entries for that `Name`. For example, to cache authorization decisions +from an external source, one could implement a module like the +following: + +```erlang +-module(auth_fdb_decision_cache). + +-behaviour(couch_expiring_cache_server). + +-export([ + start_link/0 +]). + + +-define(CACHE_NAME, <<"auth-decision">>). + + +start_link() -> + Opts = #{ + cache_name => ?CACHE_NAME, + period => 1000, % clear expired entries every second + batch_size => 500, % clear at most 500 entries each period + max_jitter => 10 + }, + couch_expiring_cache_server:start_link(?MODULE, Opts). +``` + +## Modules + +* `couch_expiring_cache`: The API module, it contains functions for + inserting and looking up cache entries, which are simply + pass-throughs to `couch_expiring_cache_fdb`. + +* `couch_expiring_cache_fdb`: The module which interacts with FDB, in + addition to insertion and lookup functions, it also contains a + function to clear an expired range, which is called periodically + from instances of `couch_expiring_cache_server`. + +* `couch_expiring_cache_server`: An "abstract" gen_server, a specific + behaviour of this module should be created for each `Name`, which + can override the default expiration parameters. It periodically + removes expired cache entries using configurable parameters for + period, jitter, and batch size. diff --git a/src/couch_expiring_cache/include/couch_expiring_cache.hrl b/src/couch_expiring_cache/include/couch_expiring_cache.hrl new file mode 100644 index 000000000..78e6a8552 --- /dev/null +++ b/src/couch_expiring_cache/include/couch_expiring_cache.hrl @@ -0,0 +1,17 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(TIME_UNIT, millisecond). + +-type millisecond() :: non_neg_integer(). + +-type jtx() :: map() | undefined | tuple(). % copied from couch_jobs.hrl diff --git a/src/couch_expiring_cache/rebar.config b/src/couch_expiring_cache/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_expiring_cache/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_expiring_cache/src/couch_expiring_cache.app.src b/src/couch_expiring_cache/src/couch_expiring_cache.app.src new file mode 100644 index 000000000..27d58ee0e --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_expiring_cache, [ + {description, "CouchDB Expiring Cache"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + erlfdb, + config, + couch_log, + couch_stats, + couch_jobs, + fabric + ]} +]}. diff --git a/src/couch_expiring_cache/src/couch_expiring_cache.erl b/src/couch_expiring_cache/src/couch_expiring_cache.erl new file mode 100644 index 000000000..b26556e98 --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache.erl @@ -0,0 +1,56 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache). + +-export([ + insert/5, + insert/6, + lookup/2, + lookup/3 +]). + + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +-spec insert(Name :: binary(), Key :: binary(), Value :: binary(), + StaleTS :: ?TIME_UNIT(), ExpiresTS :: ?TIME_UNIT()) -> ok. +insert(Name, Key, Value, StaleTS, ExpiresTS) + when is_binary(Name), is_binary(Key), is_binary(Value), + is_integer(StaleTS), is_integer(ExpiresTS) -> + insert(undefined, Name, Key, Value, StaleTS, ExpiresTS). + + +-spec insert(Tx :: jtx(), Name :: binary(), Key :: binary(), Value :: binary(), + StaleTS :: ?TIME_UNIT(), ExpiresTS :: ?TIME_UNIT()) -> ok. +insert(Tx, Name, Key, Value, StaleTS, ExpiresTS) + when is_binary(Name), is_binary(Key), is_binary(Value), + is_integer(StaleTS), is_integer(ExpiresTS) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_expiring_cache_fdb:insert( + JTx, Name, Key, Value, StaleTS, ExpiresTS) + end). + + +-spec lookup(Name :: binary(), Key :: binary()) -> + not_found | {fresh, Val :: binary()} | {stale, Val :: binary()} | expired. +lookup(Name, Key) when is_binary(Name), is_binary(Key) -> + lookup(undefined, Name, Key). + + +-spec lookup(Tx :: jtx(), Name :: binary(), Key :: binary()) -> + not_found | {fresh, Val :: binary()} | {stale, Val :: binary()} | expired. +lookup(Tx, Name, Key) when is_binary(Name), is_binary(Key) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_expiring_cache_fdb:lookup(JTx, Name, Key) + end). diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl new file mode 100644 index 000000000..7c4ad8f6f --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache_fdb.erl @@ -0,0 +1,155 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache_fdb). + +-export([ + get_range_to/3, + insert/6, + lookup/3, + clear_all/1, + clear_range_to/3 +]). + + +-define(PK, 1). +-define(EXP, 2). + + +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +% Data model +% see: https://forums.foundationdb.org/t/designing-key-value-expiration-in-fdb/156 +% +% (?EXPIRING_CACHE, Name, ?PK, Key) := (Val, StaleTS, ExpiresTS) +% (?EXPIRING_CACHE, Name, ?EXP, ExpiresTS, Key) := () + + +-spec insert(JTx :: jtx(), Name :: binary(), Key :: binary(), Value :: binary(), + StaleTS :: ?TIME_UNIT, ExpiresTS :: ?TIME_UNIT) -> ok. +insert(#{jtx := true} = JTx, Name, Key, Val, StaleTS, ExpiresTS) -> + #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), + PK = primary_key(Name, Key, LayerPrefix), + case get_val(Tx, PK) of + not_found -> + ok; + {_OldVal, _OldStaleTS, OldExpiresTS} -> + % Clean up current expiry key for this primary key. No + % need to clean up the existing primary key since it will + % be overwritten below. + OldXK = expiry_key(OldExpiresTS, Name, Key, LayerPrefix), + ok = erlfdb:clear(Tx, OldXK) + end, + PV = erlfdb_tuple:pack({Val, StaleTS, ExpiresTS}), + ok = erlfdb:set(Tx, PK, PV), + XK = expiry_key(ExpiresTS, Name, Key, LayerPrefix), + XV = erlfdb_tuple:pack({}), + ok = erlfdb:set(Tx, XK, XV). + + +-spec lookup(JTx :: jtx(), Name :: binary(), Key :: binary()) -> + not_found | {fresh, Val :: binary()} | {stale, Val :: binary()} | expired. +lookup(#{jtx := true} = JTx, Name, Key) -> + #{tx := Tx, layer_prefix := LayerPrefix} = couch_jobs_fdb:get_jtx(JTx), + PK = primary_key(Name, Key, LayerPrefix), + case get_val(Tx, PK) of + not_found -> + not_found; + {Val, StaleTS, ExpiresTS} -> + Now = erlang:system_time(?TIME_UNIT), + if + Now < StaleTS -> {fresh, Val}; + Now < ExpiresTS -> {stale, Val}; + true -> expired + end + end. + + +-spec clear_all(Name :: binary()) -> + ok. +clear_all(Name) -> + fabric2_fdb:transactional(fun(Tx) -> + LayerPrefix = fabric2_fdb:get_dir(Tx), + NamePrefix = erlfdb_tuple:pack({?EXPIRING_CACHE, Name}, LayerPrefix), + erlfdb:clear_range_startswith(Tx, NamePrefix) + end). + + +-spec clear_range_to(Name :: binary(), EndTS :: ?TIME_UNIT, + Limit :: non_neg_integer()) -> + OldestTS :: ?TIME_UNIT. +clear_range_to(Name, EndTS, Limit) when Limit > 0 -> + fold_range(Name, EndTS, Limit, + fun(Tx, PK, XK, _Key, ExpiresTS, Acc) -> + ok = erlfdb:clear(Tx, PK), + ok = erlfdb:clear(Tx, XK), + oldest_ts(ExpiresTS, Acc) + end, 0). + + +-spec get_range_to(Name :: binary(), EndTS :: ?TIME_UNIT, + Limit :: non_neg_integer()) -> + [{Key :: binary(), Val :: binary()}]. +get_range_to(Name, EndTS, Limit) when Limit > 0 -> + fold_range(Name, EndTS, Limit, + fun(Tx, PK, _XK, Key, _ExpiresTS, Acc) -> + case get_val(Tx, PK) of + not_found -> + couch_log:error("~p:entry missing Key: ~p", [?MODULE, Key]), + Acc; + Val -> + [{Key, Val} | Acc] + end + end, []). + + +%% Private + + +fold_range(Name, EndTS, Limit, Fun, Acc0) when Limit > 0 -> + fabric2_fdb:transactional(fun(Tx) -> + {LayerPrefix, ExpiresPrefix} = prefixes(Tx, Name), + fabric2_fdb:fold_range({tx, Tx}, ExpiresPrefix, fun({XK, _XV}, Acc) -> + {ExpiresTS, Key} = erlfdb_tuple:unpack(XK, ExpiresPrefix), + PK = primary_key(Name, Key, LayerPrefix), + Fun(Tx, PK, XK, Key, ExpiresTS, Acc) + end, Acc0, [{end_key, EndTS}, {limit, Limit}]) + end). + + +oldest_ts(TS, 0) -> TS; % handle initial Acc = 0 case +oldest_ts(TS, OldestTS) -> min(TS, OldestTS). + + +primary_key(Name, Key, Prefix) -> + erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?PK, Key}, Prefix). + + +expiry_key(ExpiresTS, Name, Key, Prefix) -> + erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?EXP, ExpiresTS, Key}, Prefix). + + +prefixes(Tx, Name) -> + Layer = fabric2_fdb:get_dir(Tx), + Expires = erlfdb_tuple:pack({?EXPIRING_CACHE, Name, ?EXP}, Layer), + {Layer, Expires}. + + +get_val(Tx, PK) -> + case erlfdb:wait(erlfdb:get(Tx, PK)) of + not_found -> + not_found; + Bin when is_binary(Bin) -> + erlfdb_tuple:unpack(Bin) + end. diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl new file mode 100644 index 000000000..74c432e25 --- /dev/null +++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl @@ -0,0 +1,123 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache_server). + +-behaviour(gen_server). + +-callback start_link() -> {ok, pid()} | ignore | {error, term()}. + +-export([ + now_ts/0, + start_link/2 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-define(DEFAULT_BATCH_SIZE, 1000). +-define(DEFAULT_PERIOD_MSEC, 5000). +-define(DEFAULT_MAX_JITTER_MSEC, 1000). + + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +start_link(Name, Opts) when is_atom(Name) -> + gen_server:start_link({local, Name}, ?MODULE, Opts#{name => Name}, []). + + +init(Opts) -> + DefaultCacheName = atom_to_binary(maps:get(name, Opts), utf8), + Period = maps:get(period, Opts, ?DEFAULT_PERIOD_MSEC), + MaxJitter = maps:get(max_jitter, Opts, ?DEFAULT_MAX_JITTER_MSEC), + {ok, #{ + cache_name => maps:get(cache_name, Opts, DefaultCacheName), + batch_size => maps:get(batch_size, Opts, ?DEFAULT_BATCH_SIZE), + period => Period, + max_jitter => MaxJitter, + timer_ref => schedule_remove_expired(Period, MaxJitter), + oldest_ts => 0, + elapsed => 0, + largest_elapsed => 0, + lag => 0}}. + + +terminate(_, _) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(remove_expired, St) -> + #{ + cache_name := Name, + batch_size := BatchSize, + period := Period, + max_jitter := MaxJitter, + oldest_ts := OldestTS0, + largest_elapsed := LargestElapsed + } = St, + + NowTS = now_ts(), + OldestTS = max(OldestTS0, + couch_expiring_cache_fdb:clear_range_to(Name, NowTS, BatchSize)), + Elapsed = now_ts() - NowTS, + + {noreply, St#{ + timer_ref := schedule_remove_expired(Period, MaxJitter), + oldest_ts := OldestTS, + elapsed := Elapsed, + largest_elapsed := max(Elapsed, LargestElapsed), + lag := NowTS - OldestTS}}; + + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Prevent crashing server and application + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +now_ts() -> + {Mega, Sec, Micro} = os:timestamp(), + ((Mega * 1000000) + Sec) * 1000 + Micro div 1000. + + +%% Private + + +schedule_remove_expired(Timeout, MaxJitter) -> + Jitter = max(Timeout div 2, MaxJitter), + Wait = Timeout + rand:uniform(max(1, Jitter)), + erlang:send_after(Wait, self(), remove_expired). diff --git a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl new file mode 100644 index 000000000..0780b8847 --- /dev/null +++ b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl @@ -0,0 +1,147 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_expiring_cache_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). + +-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl"). + + +-define(CACHE_NAME, atom_to_binary(?MODULE, utf8)). + +-define(FOREVER, 576460752303423488). % max int 64 bit + + +couch_expiring_cache_basic_test_() -> + { + "Test expiring cache basics", + { + setup, + fun setup_couch/0, fun teardown_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun simple_lifecycle/1 + ] + } + } + }. + + +setup_couch() -> + test_util:start_couch([fabric, couch_jobs]). + + +teardown_couch(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + Opts = #{ + cache_name => ?CACHE_NAME, + period => 10, + max_jitter => 0}, + {ok, Pid} = couch_expiring_cache_server:start_link(?MODULE, Opts), + true = unlink(Pid), + #{pid => Pid}. + + +teardown(#{pid := Pid}) -> + exit(Pid, kill). + + +simple_lifecycle(_) -> + % The entire test is racing against FDB being faster than timeout seconds + {timeout, 20, ?_test(begin + Start = couch_expiring_cache_server:now_ts(), + % Race Alert! + % We're betting on FDB returning a lookup faster than these: + Stale = 500, + Expires = 1000, + Timeout = 5000, + Interval = 5, + + StaleTS = Start + Stale, + ExpiresTS = Start + Expires, + Name = ?CACHE_NAME, + Key = <<"key">>, + Val = <<"val">>, + + ?assertEqual(ok, couch_expiring_cache_fdb:clear_all(Name)), + ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)), + ?assertEqual([], entries(Name)), + ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val, + StaleTS, ExpiresTS)), + ok = attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval), + + % Refresh the existing key with updated timestamps + Refresh = couch_expiring_cache_server:now_ts(), + ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val, + Refresh + Stale, Refresh + Expires)), + ok = attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval), + ?assertEqual(1, length(entries(Name))), + % These last 2 are also races, betting on FDB to be reasonably + % fast on the home stretch + ok = wait_lookup(Name, Key, expired, Timeout, Interval), + ok = wait_lookup(Name, Key, not_found, Timeout, Interval), + ?assertEqual([], entries(Name)) + end)}. + + +% In this race we're betting on FDB to take less than `Stale` and then +% `Expired` milliseconds to respond +attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval) -> + case couch_expiring_cache:lookup(Name, Key) of + {fresh, Val} -> + % We won that race, let's bet on another! + ok = wait_lookup(Name, Key, {stale, Val}, Timeout, Interval); + _ -> + % Unlucky! But don't fail the test just yet... + ok + end. + + +entries(Name) -> + couch_expiring_cache_fdb:get_range_to(Name, ?FOREVER, _Limit=100). + + +% This lookup races against Timeout +wait_lookup(Name, Key, Expect, Timeout, Interval) -> + wait(fun() -> + case couch_expiring_cache:lookup(Name, Key) of + Expect -> ok; + _ -> wait + end + end, Timeout, Interval). + + +wait(Fun, Timeout, Delay) -> + Now = couch_expiring_cache_server:now_ts(), + wait(Fun, Timeout, Delay, Now, Now). + + +wait(_Fun, Timeout, _Delay, Started, Prev) when Prev - Started > Timeout -> + timeout; + +wait(Fun, Timeout, Delay, Started, _Prev) -> + case Fun() of + wait -> + % http://erlang.org/doc/man/timer.html#sleep-1 + ok = timer:sleep(Delay), % always millisecond + wait(Fun, Timeout, Delay, Started, + couch_expiring_cache_server:now_ts()); + Else -> + Else + end. diff --git a/src/couch_index/src/couch_index.erl b/src/couch_index/src/couch_index.erl index cfe0d9e4f..09bd48c61 100644 --- a/src/couch_index/src/couch_index.erl +++ b/src/couch_index/src/couch_index.erl @@ -23,7 +23,7 @@ -export([compact/1, compact/2, get_compactor_pid/1]). %% gen_server callbacks --export([init/1, terminate/2, code_change/3]). +-export([init/1, terminate/2, code_change/3, format_status/2]). -export([handle_call/3, handle_cast/2, handle_info/2]). @@ -375,6 +375,23 @@ handle_info({'DOWN', _, _, _Pid, _}, #st{mod=Mod, idx_state=IdxState}=State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(Opt, [PDict, State]) -> + #st{ + mod = Mod, + waiters = Waiters, + idx_state = IdxState + } = State, + Scrubbed = State#st{waiters = {length, length(Waiters)}}, + IdxSafeState = case erlang:function_exported(Mod, format_status, 2) of + true -> + Mod:format_status(Opt, [PDict, IdxState]); + false -> + [] + end, + [{data, [{"State", + ?record_to_keyval(st, Scrubbed) ++ IdxSafeState + }]}]. + maybe_restart_updater(#st{waiters=[]}) -> ok; maybe_restart_updater(#st{idx_state=IdxState}=State) -> diff --git a/src/couch_index/src/couch_index_server.erl b/src/couch_index/src/couch_index_server.erl index 49d1e61b7..6bebff2d8 100644 --- a/src/couch_index/src/couch_index_server.erl +++ b/src/couch_index/src/couch_index_server.erl @@ -243,9 +243,15 @@ reset_indexes(DbName, Root) -> end, dict:new(), ets:lookup(?BY_DB, DbName)), Fun = fun({Sig, DDocIds}) -> [{_, Pid}] = ets:lookup(?BY_SIG, {DbName, Sig}), - MRef = erlang:monitor(process, Pid), + unlink(Pid), gen_server:cast(Pid, delete), - receive {'DOWN', MRef, _, _, _} -> ok end, + receive + {'EXIT', Pid, _} -> + ok + after + 0 -> + ok + end, rem_from_ets(DbName, Sig, DDocIds, Pid) end, lists:foreach(Fun, dict:to_list(SigDDocIds)), @@ -278,7 +284,11 @@ handle_db_event(<<"shards/", _/binary>> = DbName, {ddoc_updated, DDocResult = couch_util:with_db(DbName, fun(Db) -> couch_db:open_doc(Db, DDocId, [ejson_body, ?ADMIN_CTX]) end), - DbShards = [mem3:name(Sh) || Sh <- mem3:local_shards(mem3:dbname(DbName))], + LocalShards = try mem3:local_shards(mem3:dbname(DbName)) + catch error:database_does_not_exist -> + [] + end, + DbShards = [mem3:name(Sh) || Sh <- LocalShards], lists:foreach(fun(DbShard) -> lists:foreach(fun({_DbShard, {_DDocId, Sig}}) -> % check if there are other ddocs with the same Sig for the same db diff --git a/src/couch_jobs/.gitignore b/src/couch_jobs/.gitignore new file mode 100644 index 000000000..6ef4c5212 --- /dev/null +++ b/src/couch_jobs/.gitignore @@ -0,0 +1,4 @@ +*.beam +.eunit +ebin/couch_jobs.app +.DS_Store
\ No newline at end of file diff --git a/src/couch_jobs/README.md b/src/couch_jobs/README.md new file mode 100644 index 000000000..bc45d323c --- /dev/null +++ b/src/couch_jobs/README.md @@ -0,0 +1,62 @@ +CouchDB Jobs Application +======================== + +Run background jobs in CouchDB + +Design (RFC) discussion: https://github.com/apache/couchdb-documentation/pull/409/files + +This is a description of some of the modules: + + * `couch_jobs`: The main API module. It contains functions for creating, + accepting, executing, and monitoring jobs. A common pattern in this module + is to get a jobs transaction object (named `JTx` throughout the code), then + start a transaction and call a bunch of functions from `couch_jobs_fdb` in + that transaction. + + * `couch_jobs_fdb`: This is a layer that talks to FDB. There is a lot of tuple + packing and unpacking, reading ranges and also managing transaction objects. + + * `couch_jobs_pending`: This module implements the pending jobs queue. These + functions could all go in `couch_jobs_fdb` but the implemention was fairly + self-contained, with its own private helper functions, so it made sense to + move to a separate module. + + * `couch_jobs_activity_monitor`: Here is where the "activity monitor" + functionality is implemented. That's done with a `gen_server` instance + running for each type. This `gen_server` periodically check if there are + inactive jobs for its type, and if they are, it re-enqueues them. If the + timeout value changes, then it skips the pending check, until the new + timeout expires. + + * `couch_jobs_activity_monitor_sup` : This is a simple one-for-one supervisor + to spawn `couch_jobs_activity_monitor` instances for each type. + + * `couch_jobs_type_monitor` : This is a helper process meant to be + `spawn_link`-ed from a parent `gen_server`. It then monitors activity for a + particular job type. If any jobs of that type have an update it notifies the + parent process. + + * `couch_jobs_notifier`: Is responsible for subscriptions. Just like + with activity monitor there is a `gen_server` instance running per + each type. It uses a linked `couch_jobs_type_monitor` process to wait for + any job updates. When an update notification arrives, it can efficiently + find out if any active jobs have been updated, by reading the `(?JOBS, + ?ACTIVITY, Type, Sequence)` range. That should account for the bulk of + changes. The jobs that are not active anymore, are queried individually. + Subscriptions are managed in an ordered set ETS table. + + * `couch_jobs_notifier_sup`: A simple one-for-one supervisor to spawn + `couch_jobs_notifier` processes for each type. + + * `couch_jobs_server`: This is a `gen_server` which keeps track of job + types. It then starts or stops activity monitors and notifiers for each + type. To do that it queries the ` (?JOBS, ?ACTIVITY_TIMEOUT)` periodically. + + * `couch_jobs_sup`: This is the main application supervisor. The restart + strategy is `rest_for_one`, meaning that a when a child restarts, the + sibling following it will restart. One interesting entry there is the first + child which is used just to create an ETS table used by `couch_jobs_fdb` to + cache transaction object (`JTx` mentioned above). That child calls + `init_cache/0`, where it creates the ETS then returns with `ignore` so it + doesn't actually spawn a process. The ETS table will be owned by the + supervisor process. diff --git a/src/couch_jobs/rebar.config b/src/couch_jobs/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_jobs/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_jobs/src/couch_jobs.app.src b/src/couch_jobs/src/couch_jobs.app.src new file mode 100644 index 000000000..8ded14c6f --- /dev/null +++ b/src/couch_jobs/src/couch_jobs.app.src @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_jobs, [ + {description, "CouchDB Jobs"}, + {vsn, git}, + {mod, {couch_jobs_app, []}}, + {registered, [ + couch_jobs_sup, + couch_jobs_activity_monitor_sup, + couch_jobs_notifier_sup, + couch_jobs_server + ]}, + {applications, [ + kernel, + stdlib, + erlfdb, + couch_log, + config, + fabric + ]} +]}. diff --git a/src/couch_jobs/src/couch_jobs.erl b/src/couch_jobs/src/couch_jobs.erl new file mode 100644 index 000000000..6c40f5dff --- /dev/null +++ b/src/couch_jobs/src/couch_jobs.erl @@ -0,0 +1,453 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs). + +-export([ + % Job creation + add/4, + add/5, + remove/3, + + % Job monitoring + get_types/1, + get_job_data/3, + get_job_state/3, + get_active_jobs_ids/2, + fold_jobs/4, + pending_count/2, + pending_count/3, + + % Job processing + accept/1, + accept/2, + finish/2, + finish/3, + resubmit/2, + resubmit/3, + resubmit/4, + is_resubmitted/1, + update/2, + update/3, + + % Subscriptions + subscribe/2, + subscribe/3, + unsubscribe/1, + wait/2, + wait/3, + + % Type timeouts + set_type_timeout/2, + clear_type_timeout/1, + get_type_timeout/1 +]). + + +-include("couch_jobs.hrl"). + + +-define(MIN_ACCEPT_WAIT_MSEC, 100). + + +%% Job Creation API + +-spec add(jtx(), job_type(), job_id(), job_data()) -> ok | {error, any()}. +add(Tx, Type, JobId, JobData) -> + add(Tx, Type, JobId, JobData, 0). + + +-spec add(jtx(), job_type(), job_id(), job_data(), scheduled_time()) -> + ok | {error, any()}. +add(Tx, Type, JobId, JobData, ScheduledTime) when is_binary(JobId), + is_map(JobData), is_integer(ScheduledTime) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs_fdb:add(JTx, Type, JobId, JobData, ScheduledTime) of + {ok, _, _, _} -> ok; + {error, Error} -> {error, Error} + end + end). + + +-spec remove(jtx(), job_type(), job_id()) -> ok | {error, any()}. +remove(Tx, Type, JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:remove(JTx, job(Type, JobId)) + end). + + +-spec get_types(jtx()) -> [job_type()] | {error, any()}. +get_types(Tx) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:get_types(JTx) + end). + + +-spec get_job_data(jtx(), job_type(), job_id()) -> {ok, job_data()} | {error, + any()}. +get_job_data(Tx, Type, JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs_fdb:get_job_state_and_data(JTx, job(Type, JobId)) of + {ok, _Seq, _State, Data} -> + {ok, couch_jobs_fdb:decode_data(Data)}; + {error, Error} -> + {error, Error} + end + end). + + +-spec get_job_state(jtx(), job_type(), job_id()) -> {ok, job_state()} | {error, + any()}. +get_job_state(Tx, Type, JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs_fdb:get_job_state_and_data(JTx, job(Type, JobId)) of + {ok, _Seq, State, _Data} -> + {ok, State}; + {error, Error} -> + {error, Error} + end + end). + + +-spec get_active_jobs_ids(jtx(), job_type()) -> [job_id()] | {error, + any()}. +get_active_jobs_ids(Tx, Type) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + Since = couch_jobs_fdb:get_active_since(JTx, Type, + {versionstamp, 0, 0}), + maps:keys(Since) + end). + + +-spec fold_jobs(jtx(), job_type(), fun(), any()) -> any(). +fold_jobs(Tx, Type, Fun, UserAcc) when is_function(Fun, 5) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + maps:fold(fun(JobId, {_Seq, JobState, DataEnc}, Acc) -> + Data = couch_jobs_fdb:decode_data(DataEnc), + Fun(JTx, JobId, JobState, Data, Acc) + end, UserAcc, couch_jobs_fdb:get_jobs(JTx, Type)) + end). + + +-spec pending_count(jtx(), job_type()) -> integer(). +pending_count(Tx, Type) -> + pending_count(Tx, Type, #{}). + + +-spec pending_count(jtx(), job_type(), #{}) -> integer(). +pending_count(Tx, Type, Opts) -> + MaxSTime = maps:get(max_sched_time, Opts, ?UNDEFINED_MAX_SCHEDULED_TIME), + Limit = maps:get(limit, Opts, 1024), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_pending:pending_count(JTx, Type, MaxSTime, Limit) + end). + + +%% Job processor API + +-spec accept(job_type()) -> {ok, job(), job_data()} | {error, any()}. +accept(Type) -> + accept(Type, #{}). + + +-spec accept(job_type(), job_accept_opts()) -> {ok, job()} | {error, any()}. +accept(Type, #{} = Opts) -> + NoSched = maps:get(no_schedule, Opts, false), + MaxSchedTimeDefault = case NoSched of + true -> 0; + false -> ?UNDEFINED_MAX_SCHEDULED_TIME + end, + MaxSchedTime = maps:get(max_sched_time, Opts, MaxSchedTimeDefault), + Timeout = maps:get(timeout, Opts, infinity), + case NoSched andalso MaxSchedTime =/= 0 of + true -> + {error, no_schedule_require_0_max_sched_time}; + false -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout) + end. + + +-spec finish(jtx(), job()) -> ok | {error, any()}. +finish(Tx, Job) -> + finish(Tx, Job, undefined). + + +-spec finish(jtx(), job(), job_data()) -> ok | {error, any()}. +finish(Tx, #{jlock := <<_/binary>>} = Job, JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:finish(JTx, Job, JobData) + end). + + +-spec resubmit(jtx(), job()) -> {ok, job()} | {error, any()}. +resubmit(Tx, Job) -> + resubmit(Tx, Job, ?UNDEFINED_MAX_SCHEDULED_TIME). + + +-spec resubmit(jtx(), job(), scheduled_time()) -> {ok, job()} | {error, any()}. +resubmit(Tx, #{jlock := <<_/binary>>} = Job, SchedTime) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:resubmit(JTx, Job, SchedTime) + end). + + +-spec resubmit(jtx(), job(), scheduled_time(), job_data()) -> {ok, job()} | {error, any()}. +resubmit(Tx, #{jlock := <<_/binary>>} = Job, SchedTime, Data) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:resubmit(JTx, Job, SchedTime, Data) + end). + + +-spec is_resubmitted(job()) -> true | false. +is_resubmitted(#{job := true} = Job) -> + maps:get(resubmit, Job, false). + + +-spec update(jtx(), job()) -> {ok, job()} | {error, any()}. +update(Tx, Job) -> + update(Tx, Job, undefined). + + +-spec update(jtx(), job(), job_data()) -> {ok, job()} | {error, any()}. +update(Tx, #{jlock := <<_/binary>>} = Job, JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs_fdb:update(JTx, Job, JobData) + end). + + +%% Subscription API + +% Receive events as messages. Wait for them using `wait/2,3` +% functions. +% + +-spec subscribe(job_type(), job_id()) -> {ok, job_subscription(), job_state(), + job_data()} | {ok, finished, job_data()} | {error, any()}. +subscribe(Type, JobId) -> + subscribe(undefined, Type, JobId). + + +-spec subscribe(jtx(), job_type(), job_id()) -> {ok, job_subscription(), + job_state(), job_data()} | {ok, finished, job_data()} | {error, any()}. +subscribe(Tx, Type, JobId) -> + StateData = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + Job = #{job => true, type => Type, id => JobId}, + couch_jobs_fdb:get_job_state_and_data(JTx, Job) + end), + case StateData of + {ok, _Seq, finished, Data} -> + {ok, finished, couch_jobs_fdb:decode_data(Data)}; + {ok, Seq, State, Data} -> + case couch_jobs_notifier:subscribe(Type, JobId, State, Seq) of + {ok, SubRef} -> + Data1 = couch_jobs_fdb:decode_data(Data), + {ok, SubRef, State, Data1}; + {error, Error} -> + {error, Error} + end; + {error, Error} -> + {error, Error} + end. + + +% Unsubscribe from getting notifications based on a particular subscription. +% Each subscription should be followed by its own unsubscription call. However, +% subscriber processes are also monitored and auto-unsubscribed if they exit. +% If subscribing process is exiting, calling this function is optional. +% +-spec unsubscribe(job_subscription()) -> ok. +unsubscribe({Server, Ref}) when is_pid(Server), is_reference(Ref) -> + try + couch_jobs_notifier:unsubscribe(Server, Ref) + after + flush_notifications(Ref) + end. + + +% Wait to receive job state updates +% +-spec wait(job_subscription() | [job_subscription()], timeout()) -> + {job_type(), job_id(), job_state(), job_data()} | timeout. +wait({_, Ref}, Timeout) -> + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data} -> + {Type, Id, State, couch_jobs_fdb:decode_data(Data)} + after + Timeout -> timeout + end; + +wait(Subs, Timeout) when is_list(Subs) -> + {Result, ResendQ} = wait_any(Subs, Timeout, []), + lists:foreach(fun(Msg) -> self() ! Msg end, ResendQ), + Result. + + +-spec wait(job_subscription() | [job_subscription()], job_state(), timeout()) + -> {job_type(), job_id(), job_state(), job_data()} | timeout. +wait({_, Ref} = Sub, State, Timeout) when is_atom(State) -> + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, MsgState, Data0} -> + case MsgState =:= State of + true -> + Data = couch_jobs_fdb:decode_data(Data0), + {Type, Id, State, Data}; + false -> + wait(Sub, State, Timeout) + end + after + Timeout -> timeout + end; + +wait(Subs, State, Timeout) when is_list(Subs), + is_atom(State) -> + {Result, ResendQ} = wait_any(Subs, State, Timeout, []), + lists:foreach(fun(Msg) -> self() ! Msg end, ResendQ), + Result. + + +%% Job type timeout API + +% These functions manipulate the activity timeout for each job type. + +-spec set_type_timeout(job_type(), timeout()) -> ok. +set_type_timeout(Type, Timeout) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:set_type_timeout(JTx, Type, Timeout) + end), + ok = couch_jobs_server:force_check_types(). + + +-spec clear_type_timeout(job_type()) -> ok. +clear_type_timeout(Type) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:clear_type_timeout(JTx, Type) + end). + + +-spec get_type_timeout(job_type()) -> timeout(). +get_type_timeout(Type) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:get_type_timeout(JTx, Type) + end). + + +%% Private utilities + +accept_loop(Type, NoSched, MaxSchedTime, Timeout) -> + TxFun = fun(JTx) -> + couch_jobs_fdb:accept(JTx, Type, MaxSchedTime, NoSched) + end, + AcceptResult = try + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), TxFun) + catch + error:{timeout, _} -> + retry; + error:{erlfdb_error, Err} when Err =:= 1020 orelse Err =:= 1031 -> + retry + end, + case AcceptResult of + {ok, Job, Data} -> + {ok, Job, Data}; + retry -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout); + {not_found, PendingWatch} -> + case wait_pending(PendingWatch, MaxSchedTime, Timeout, NoSched) of + {error, not_found} -> + {error, not_found}; + retry -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout); + ok -> + accept_loop(Type, NoSched, MaxSchedTime, Timeout) + end + end. + + +job(Type, JobId) -> + #{job => true, type => Type, id => JobId}. + + +wait_pending(PendingWatch, _MaxSTime, _UserTimeout = 0, _NoSched) -> + erlfdb:cancel(PendingWatch, [flush]), + {error, not_found}; + +wait_pending(PendingWatch, MaxSTime, UserTimeout, NoSched) -> + NowMSec = erlang:system_time(millisecond), + Timeout0 = max(?MIN_ACCEPT_WAIT_MSEC, MaxSTime * 1000 - NowMSec), + Timeout = min(limit_timeout(Timeout0, NoSched), UserTimeout), + try + erlfdb:wait(PendingWatch, [{timeout, Timeout}]), + ok + catch + error:{erlfdb_error, ?FUTURE_VERSION} -> + erlfdb:cancel(PendingWatch, [flush]), + retry; + error:{timeout, _} -> + erlfdb:cancel(PendingWatch, [flush]), + {error, not_found} + end. + + +wait_any(Subs, Timeout0, ResendQ) when is_list(Subs) -> + Timeout = limit_timeout(Timeout0, false), + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data0} = Msg -> + case lists:keyfind(Ref, 2, Subs) of + false -> + wait_any(Subs, Timeout, [Msg | ResendQ]); + {_, Ref} -> + Data = couch_jobs_fdb:decode_data(Data0), + {{Type, Id, State, Data}, ResendQ} + end + after + Timeout -> {timeout, ResendQ} + end. + + +wait_any(Subs, State, Timeout0, ResendQ) when + is_list(Subs) -> + Timeout = limit_timeout(Timeout0, false), + receive + {?COUCH_JOBS_EVENT, Ref, Type, Id, MsgState, Data0} = Msg -> + case lists:keyfind(Ref, 2, Subs) of + false -> + wait_any(Subs, Timeout, [Msg | ResendQ]); + {_, Ref} -> + case MsgState =:= State of + true -> + Data = couch_jobs_fdb:decode_data(Data0), + {{Type, Id, State, Data}, ResendQ}; + false -> + wait_any(Subs, Timeout, ResendQ) + end + end + after + Timeout -> {timeout, ResendQ} + end. + + +limit_timeout(_Timeout, true) -> + infinity; + +limit_timeout(Timeout, false) when is_integer(Timeout), Timeout < 16#FFFFFFFF -> + Timeout; + +limit_timeout(_Timeout, false) -> + infinity. + + +flush_notifications(Ref) -> + receive + {?COUCH_JOBS_EVENT, Ref, _, _, _} -> + flush_notifications(Ref) + after + 0 -> ok + end. diff --git a/src/couch_jobs/src/couch_jobs.hrl b/src/couch_jobs/src/couch_jobs.hrl new file mode 100644 index 000000000..bb561b136 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs.hrl @@ -0,0 +1,52 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-include_lib("fabric/include/fabric2.hrl"). + + +% Job map/json field definitions +% +-define(OPT_PRIORITY, <<"priority">>). +-define(OPT_DATA, <<"data">>). +-define(OPT_CANCEL, <<"cancel">>). +-define(OPT_RESUBMIT, <<"resubmit">>). + +% These might be in a fabric public hrl eventually +% +-define(UNSET_VS, {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF}). + +% Data model definitions +% +-define(JOBS, 51). % coordinate with fabric2.hrl +-define(DATA, 1). +-define(PENDING, 2). +-define(WATCHES_PENDING, 3). +-define(WATCHES_ACTIVITY, 4). +-define(ACTIVITY_TIMEOUT, 5). +-define(ACTIVITY, 6). + + +-define(COUCH_JOBS_MD_VERSION, <<"couch_jobs_md_version">>). +-define(COUCH_JOBS_EVENT, '$couch_jobs_event'). +-define(COUCH_JOBS_CURRENT, '$couch_jobs_current'). +-define(UNDEFINED_MAX_SCHEDULED_TIME, 1 bsl 36). + + +-type jtx() :: map() | undefined | tuple(). +-type job_id() :: binary(). +-type job_type() :: tuple() | binary() | non_neg_integer(). +-type job() :: map(). +-type job_data() :: map() | undefined. +-type job_accept_opts() :: map(). +-type scheduled_time() :: non_neg_integer() | undefined. +-type job_state() :: running | pending | finished. +-type job_subscription() :: {pid(), reference()}. diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor.erl b/src/couch_jobs/src/couch_jobs_activity_monitor.erl new file mode 100644 index 000000000..9802f5798 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_activity_monitor.erl @@ -0,0 +1,140 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_activity_monitor). + +-behaviour(gen_server). + + +-export([ + start_link/1 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + +-record(st, { + jtx, + type, + tref, + timeout = 0, + vs = not_found +}). + + +-define(MAX_JITTER_DEFAULT, 10000). +-define(MISSING_TIMEOUT_CHECK, 5000). + + +start_link(Type) -> + gen_server:start_link(?MODULE, [Type], []). + + +%% gen_server callbacks + +init([Type]) -> + St = #st{jtx = couch_jobs_fdb:get_jtx(), type = Type}, + {ok, schedule_check(St)}. + + +terminate(_, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(check_activity, St) -> + St1 = try + check_activity(St) + catch + error:{erlfdb_error, Err} when Err =:= 1020 orelse Err =:= 1031 -> + LogMsg = "~p : type:~p got ~p error, possibly from overload", + couch_log:error(LogMsg, [?MODULE, St#st.type, Err]), + St + end, + St2 = schedule_check(St1), + {noreply, St2}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Don't crash out couch_jobs_server and the whole application would need to + % eventually do proper cleanup in erlfdb:wait timeout code. + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +% Private helper functions + +check_activity(#st{jtx = JTx, type = Type, vs = not_found} = St) -> + NewVS = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_activity_vs(JTx1, Type) + end), + St#st{vs = NewVS}; + +check_activity(#st{jtx = JTx, type = Type, vs = VS} = St) -> + NewVS = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + NewVS = couch_jobs_fdb:get_activity_vs(JTx1, Type), + JobIds = couch_jobs_fdb:get_inactive_since(JTx1, Type, VS), + couch_jobs_fdb:re_enqueue_inactive(JTx1, Type, JobIds), + NewVS + end), + St#st{vs = NewVS}. + + +get_timeout_msec(JTx, Type) -> + TimeoutVal = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_type_timeout(JTx1, Type) + end), + case TimeoutVal of + not_found -> not_found; + ValSeconds -> timer:seconds(ValSeconds) + end. + + +schedule_check(#st{jtx = JTx, type = Type, timeout = OldTimeout} = St) -> + % Reset versionstamp if timeout changed. + St1 = case get_timeout_msec(JTx, Type) of + not_found -> + St#st{vs = not_found, timeout = ?MISSING_TIMEOUT_CHECK}; + OldTimeout -> + St; + NewTimeout -> + St#st{vs = not_found, timeout = NewTimeout} + end, + #st{timeout = Timeout} = St1, + MaxJitter = min(Timeout div 2, get_max_jitter_msec()), + Wait = Timeout + rand:uniform(max(1, MaxJitter)), + St1#st{tref = erlang:send_after(Wait, self(), check_activity)}. + + +get_max_jitter_msec()-> + config:get_integer("couch_jobs", "activity_monitor_max_jitter_msec", + ?MAX_JITTER_DEFAULT). diff --git a/src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl b/src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl new file mode 100644 index 000000000..b11161a24 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_activity_monitor_sup.erl @@ -0,0 +1,64 @@ +% +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_activity_monitor_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0, + + start_monitor/1, + stop_monitor/1, + get_child_pids/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +start_monitor(Type) -> + supervisor:start_child(?MODULE, [Type]). + + +stop_monitor(Pid) -> + supervisor:terminate_child(?MODULE, Pid). + + +get_child_pids() -> + lists:map(fun({_Id, Pid, _Type, _Mod}) -> + Pid + end, supervisor:which_children(?MODULE)). + + +init(_) -> + Flags = #{ + strategy => simple_one_for_one, + intensity => 10, + period => 3 + }, + Children = [ + #{ + id => couch_jobs_monitor, + restart => temporary, + start => {couch_jobs_activity_monitor, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_jobs/src/couch_jobs_app.erl b/src/couch_jobs/src/couch_jobs_app.erl new file mode 100644 index 000000000..720b94891 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_app.erl @@ -0,0 +1,26 @@ +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_app). + + +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_Type, []) -> + couch_jobs_sup:start_link(). + + +stop([]) -> + ok. diff --git a/src/couch_jobs/src/couch_jobs_fdb.erl b/src/couch_jobs/src/couch_jobs_fdb.erl new file mode 100644 index 000000000..27131ec86 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_fdb.erl @@ -0,0 +1,725 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_fdb). + + +-export([ + add/5, + remove/2, + get_job_state_and_data/2, + get_jobs/2, + get_jobs/3, + + accept/4, + finish/3, + resubmit/3, + resubmit/4, + update/3, + + set_type_timeout/3, + clear_type_timeout/2, + get_type_timeout/2, + get_types/1, + + get_activity_vs/2, + get_activity_vs_and_watch/2, + get_active_since/3, + get_inactive_since/3, + re_enqueue_inactive/3, + + init_cache/0, + + encode_data/1, + decode_data/1, + + get_jtx/0, + get_jtx/1, + tx/2, + + get_job/2, + get_jobs/0, + + bump_metadata_version/0, + bump_metadata_version/1 +]). + + +-include("couch_jobs.hrl"). + + +-record(jv, { + seq, + jlock, + stime, + resubmit, + data +}). + + +-define(JOBS_ETS_KEY, jobs). +-define(MD_TIMESTAMP_ETS_KEY, md_timestamp). +-define(MD_VERSION_MAX_AGE_SEC, 10). +-define(PENDING_SEQ, 0). + + +% Data model +% +% (?JOBS, ?DATA, Type, JobId) = (Sequence, Lock, SchedTime, Resubmit, JobData) +% (?JOBS, ?PENDING, Type, ScheduledTime, JobId) = "" +% (?JOBS, ?WATCHES_PENDING, Type) = Counter +% (?JOBS, ?WATCHES_ACTIVITY, Type) = Sequence +% (?JOBS, ?ACTIVITY_TIMEOUT, Type) = ActivityTimeout +% (?JOBS, ?ACTIVITY, Type, Sequence) = JobId +% +% In the ?DATA row Sequence can have these values: +% 0 - when the job is pending +% null - when the job is finished +% Versionstamp - when the job is running + + +% Job creation API + +add(#{jtx := true} = JTx0, Type, JobId, Data, STime) -> + #{tx := Tx} = JTx = get_jtx(JTx0), + Job = #{job => true, type => Type, id => JobId}, + case get_type_timeout(JTx, Type) of + not_found -> + {error, no_type_timeout}; + Int when is_integer(Int) -> + Key = job_key(JTx, Job), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + <<_/binary>> -> + {ok, Job1} = resubmit(JTx, Job, STime, Data), + #{seq := Seq, state := State, data := Data1} = Job1, + {ok, State, Seq, Data1}; + not_found -> + try + maybe_enqueue(JTx, Type, JobId, STime, true, Data), + {ok, pending, ?PENDING_SEQ, Data} + catch + error:{json_encoding_error, Error} -> + {error, {json_encoding_error, Error}} + end + end + end. + + +remove(#{jtx := true} = JTx0, #{job := true} = Job) -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{type := Type, id := JobId} = Job, + Key = job_key(JTx, Job), + case get_job_val(Tx, Key) of + #jv{stime = STime, seq = Seq} -> + couch_jobs_pending:remove(JTx, Type, JobId, STime), + clear_activity(JTx, Type, Seq), + erlfdb:clear(Tx, Key), + update_watch(JTx, Type), + ok; + not_found -> + {error, not_found} + end. + + +get_job_state_and_data(#{jtx := true} = JTx, #{job := true} = Job) -> + case get_job_val(get_jtx(JTx), Job) of + #jv{seq = Seq, jlock = JLock, data = Data} -> + {ok, Seq, job_state(JLock, Seq), Data}; + not_found -> + {error, not_found} + end. + + +get_jobs(JTx, Type) -> + get_jobs(JTx, Type, fun(_) -> true end). + + +get_jobs(#{jtx := true} = JTx, Type, Filter) when is_function(Filter, 1) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?DATA, Type}, Jobs), + Opts = [{streaming_mode, want_all}], + Result = erlfdb:wait(erlfdb:get_range_startswith(Tx, Prefix, Opts)), + lists:foldl(fun({K, V}, #{} = Acc) -> + {JobId} = erlfdb_tuple:unpack(K, Prefix), + case Filter(JobId) of + true -> + {Seq, JLock, _, _, Data} = erlfdb_tuple:unpack(V), + Acc#{JobId => {Seq, job_state(JLock, Seq), Data}}; + false -> + Acc + end + end, #{}, Result). + + +% Job processor API + +accept(#{jtx := true} = JTx0, Type, MaxSTime, NoSched) + when is_integer(MaxSTime), is_boolean(NoSched) -> + #{jtx := true, tx := Tx} = JTx = get_jtx(JTx0), + case couch_jobs_pending:dequeue(JTx, Type, MaxSTime, NoSched) of + {not_found, PendingWatch} -> + {not_found, PendingWatch}; + {ok, JobId} -> + JLock = fabric2_util:uuid(), + Key = job_key(JTx, Type, JobId), + JV0 = get_job_val(Tx, Key), + #jv{jlock = null, data = Data} = JV0, + JV = JV0#jv{seq = ?UNSET_VS, jlock = JLock, resubmit = false}, + set_job_val(Tx, Key, JV), + update_activity(JTx, Type, JobId, null, Data), + Job = #{ + job => true, + type => Type, + id => JobId, + jlock => JLock + }, + {ok, Job, decode_data(Data)} + end. + + +finish(#{jtx := true} = JTx0, #{jlock := <<_/binary>>} = Job, Data) when + is_map(Data) orelse Data =:= undefined -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{type := Type, jlock := JLock, id := JobId} = Job, + case get_job_or_halt(Tx, job_key(JTx, Job), JLock) of + #jv{seq = Seq, stime = STime, resubmit = Resubmit, data = OldData} -> + NewData = case Data =:= undefined of + true -> OldData; + false -> Data + end, + try maybe_enqueue(JTx, Type, JobId, STime, Resubmit, NewData) of + ok -> + clear_activity(JTx, Type, Seq), + update_watch(JTx, Type) + catch + error:{json_encoding_error, Error} -> + {error, {json_encoding_error, Error}} + end; + halt -> + {error, halt} + end. + +resubmit(JTx0, Job, NewSTime) -> + resubmit(JTx0, Job, NewSTime, undefined). + + +resubmit(#{jtx := true} = JTx0, #{job := true} = Job, NewSTime, NewData) -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{type := Type, id := JobId} = Job, + Key = job_key(JTx, Job), + case get_job_val(Tx, Key) of + #jv{seq = Seq, jlock = JLock, stime = OldSTime, data = Data} = JV -> + STime = case NewSTime =:= undefined of + true -> OldSTime; + false -> NewSTime + end, + case job_state(JLock, Seq) of + finished -> + ok = maybe_enqueue(JTx, Type, JobId, STime, true, NewData), + NewData1 = update_job_data(Data, NewData), + Job1 = Job#{ + seq => ?PENDING_SEQ, + state => pending, + data => NewData1 + }, + {ok, Job1}; + pending when STime == OldSTime -> + % If pending and scheduled time doesn't change avoid generating + % un-necessary writes by removing and re-adding the jobs into the + % pending queue. + Job1 = Job#{ + stime => STime, + seq => ?PENDING_SEQ, + state => pending, + data => Data + }, + {ok, Job1}; + pending -> + JV1 = JV#jv{seq = ?PENDING_SEQ, stime = STime, data = NewData}, + set_job_val(Tx, Key, JV1), + couch_jobs_pending:remove(JTx, Type, JobId, OldSTime), + couch_jobs_pending:enqueue(JTx, Type, STime, JobId), + NewData1 = update_job_data(Data, NewData), + Job1 = Job#{ + stime => STime, + seq => ?PENDING_SEQ, + state => pending, + data => NewData1 + }, + {ok, Job1}; + running -> + JV1 = JV#jv{stime = STime, resubmit = true}, + set_job_val(Tx, Key, JV1), + {ok, Job#{resubmit => true, stime => STime, + state => running, seq => Seq, data => Data}} + end; + not_found -> + {error, not_found} + end. + + +update(#{jtx := true} = JTx0, #{jlock := <<_/binary>>} = Job, Data0) when + is_map(Data0) orelse Data0 =:= undefined -> + #{tx := Tx} = JTx = get_jtx(JTx0), + #{jlock := JLock, type := Type, id := JobId} = Job, + Key = job_key(JTx, Job), + case get_job_or_halt(Tx, Key, JLock) of + #jv{seq = Seq, stime = STime, resubmit = Resubmit} = JV0 -> + Data = case Data0 =:= undefined of + true -> JV0#jv.data; + false -> Data0 + end, + JV = JV0#jv{seq = ?UNSET_VS, data = Data}, + try set_job_val(Tx, Key, JV) of + ok -> + update_activity(JTx, Type, JobId, Seq, Data), + {ok, Job#{resubmit => Resubmit, stime => STime}} + catch + error:{json_encoding_error, Error} -> + {error, {json_encoding_error, Error}} + end; + halt -> + {error, halt} + end. + + +% Type and activity monitoring API + +set_type_timeout(#{jtx := true} = JTx, Type, Timeout) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT, Type}, Jobs), + Val = erlfdb_tuple:pack({Timeout}), + erlfdb:set(Tx, Key, Val). + + +clear_type_timeout(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT, Type}, Jobs), + erlfdb:clear(Tx, Key). + + +get_type_timeout(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT, Type}, Jobs), + case erlfdb:wait(erlfdb:get_ss(Tx, Key)) of + not_found -> + not_found; + Val -> + {Timeout} = erlfdb_tuple:unpack(Val), + Timeout + end. + + +get_types(#{jtx := true} = JTx) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?ACTIVITY_TIMEOUT}, Jobs), + Opts = [{streaming_mode, want_all}], + Result = erlfdb:wait(erlfdb:get_range_startswith(Tx, Prefix, Opts)), + lists:map(fun({K, _V}) -> + {Type} = erlfdb_tuple:unpack(K, Prefix), + Type + end, Result). + + +get_activity_vs(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?WATCHES_ACTIVITY, Type}, Jobs), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> + not_found; + Val -> + {VS} = erlfdb_tuple:unpack(Val), + VS + end. + + +get_activity_vs_and_watch(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Key = erlfdb_tuple:pack({?WATCHES_ACTIVITY, Type}, Jobs), + Future = erlfdb:get(Tx, Key), + Watch = erlfdb:watch(Tx, Key), + case erlfdb:wait(Future) of + not_found -> + {not_found, Watch}; + Val -> + {VS} = erlfdb_tuple:unpack(Val), + {VS, Watch} + end. + + +get_active_since(#{jtx := true} = JTx, Type, Versionstamp) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?ACTIVITY}, Jobs), + StartKey = erlfdb_tuple:pack({Type, Versionstamp}, Prefix), + StartKeySel = erlfdb_key:first_greater_or_equal(StartKey), + {_, EndKey} = erlfdb_tuple:range({Type}, Prefix), + Opts = [{streaming_mode, want_all}], + Future = erlfdb:get_range(Tx, StartKeySel, EndKey, Opts), + maps:from_list(lists:map(fun({_K, V}) -> + erlfdb_tuple:unpack(V) + end, erlfdb:wait(Future))). + + +get_inactive_since(#{jtx := true} = JTx, Type, Versionstamp) -> + #{tx := Tx, jobs_path := Jobs} = get_jtx(JTx), + Prefix = erlfdb_tuple:pack({?ACTIVITY}, Jobs), + {StartKey, _} = erlfdb_tuple:range({Type}, Prefix), + EndKey = erlfdb_tuple:pack({Type, Versionstamp}, Prefix), + EndKeySel = erlfdb_key:first_greater_than(EndKey), + Opts = [{streaming_mode, want_all}], + Future = erlfdb:get_range(Tx, StartKey, EndKeySel, Opts), + lists:map(fun({_K, V}) -> + {JobId, _} = erlfdb_tuple:unpack(V), + JobId + end, erlfdb:wait(Future)). + + +re_enqueue_inactive(#{jtx := true} = JTx, Type, JobIds) when is_list(JobIds) -> + #{tx := Tx} = get_jtx(JTx), + lists:foreach(fun(JobId) -> + case get_job_val(Tx, job_key(JTx, Type, JobId)) of + #jv{seq = Seq, stime = STime, data = Data} -> + clear_activity(JTx, Type, Seq), + maybe_enqueue(JTx, Type, JobId, STime, true, Data); + not_found -> + ok + end + end, JobIds), + case length(JobIds) > 0 of + true -> update_watch(JTx, Type); + false -> ok + end. + + +% Cache initialization API. Called from the supervisor just to create the ETS +% table. It returns `ignore` to tell supervisor it won't actually start any +% process, which is what we want here. +% +init_cache() -> + ConcurrencyOpts = [{read_concurrency, true}, {write_concurrency, true}], + ets:new(?MODULE, [public, named_table] ++ ConcurrencyOpts), + ignore. + + +% Functions to encode / decode JobData +% +encode_data(#{} = JobData) -> + try + iolist_to_binary(jiffy:encode(JobData, [force_utf8])) + catch + throw:{error, Error} -> + % legacy clause since new versions of jiffy raise error instead + error({json_encoding_error, Error}); + error:Error -> + error({json_encoding_error, Error}) + end. + + +decode_data(not_found) -> + not_found; + +decode_data(#{} = JobData) -> + JobData; + +decode_data(<<_/binary>> = JobData) -> + jiffy:decode(JobData, [dedupe_keys, return_maps]). + + +% Cached job transaction object. This object wraps a transaction, caches the +% directory lookup path, and the metadata version. The function can be used +% from inside or outside the transaction. When used from a transaction it will +% verify if the metadata was changed, and will refresh automatically. +% +get_jtx() -> + get_jtx(undefined). + + +get_jtx(#{tx := Tx} = _TxDb) -> + get_jtx(Tx); + +get_jtx(undefined = _Tx) -> + case ets:lookup(?MODULE, ?JOBS_ETS_KEY) of + [{_, #{} = JTx}] -> + JTx; + [] -> + JTx = update_jtx_cache(init_jtx(undefined)), + JTx#{tx := undefined} + end; + +get_jtx({erlfdb_transaction, _} = Tx) -> + case ets:lookup(?MODULE, ?JOBS_ETS_KEY) of + [{_, #{} = JTx}] -> + ensure_current(JTx#{tx := Tx}); + [] -> + update_jtx_cache(init_jtx(Tx)) + end. + + +% Transaction processing to be used with couch jobs' specific transaction +% contexts +% +tx(#{jtx := true} = JTx, Fun) when is_function(Fun, 1) -> + fabric2_fdb:transactional(JTx, Fun). + + +% Debug and testing API + +get_job(Type, JobId) -> + fabric2_fdb:transactional(fun(Tx) -> + JTx = init_jtx(Tx), + case get_job_val(Tx, job_key(JTx, Type, JobId)) of + #jv{seq = Seq, jlock = JLock} = JV -> + #{ + job => true, + type => Type, + id => JobId, + seq => Seq, + jlock => JLock, + stime => JV#jv.stime, + resubmit => JV#jv.resubmit, + data => decode_data(JV#jv.data), + state => job_state(JLock, Seq) + }; + not_found -> + not_found + end + end). + + +get_jobs() -> + fabric2_fdb:transactional(fun(Tx) -> + #{jobs_path := Jobs} = init_jtx(Tx), + Prefix = erlfdb_tuple:pack({?DATA}, Jobs), + Opts = [{streaming_mode, want_all}], + Result = erlfdb:wait(erlfdb:get_range_startswith(Tx, Prefix, Opts)), + lists:map(fun({K, V}) -> + {Type, JobId} = erlfdb_tuple:unpack(K, Prefix), + {Seq, JLock, _, _, Data} = erlfdb_tuple:unpack(V), + JobState = job_state(JLock, Seq), + {Type, JobId, JobState, decode_data(Data)} + end, Result) + end). + + +% Call this function if the top level "couchdb" FDB directory layer +% changes. +% +bump_metadata_version() -> + fabric2_fdb:transactional(fun(Tx) -> + bump_metadata_version(Tx) + end). + + +bump_metadata_version(Tx) -> + erlfdb:set_versionstamped_value(Tx, ?COUCH_JOBS_MD_VERSION, <<0:112>>). + + +% Private helper functions + +maybe_enqueue(#{jtx := true} = JTx, Type, JobId, STime, Resubmit, Data) -> + #{tx := Tx} = JTx, + Key = job_key(JTx, Type, JobId), + JV = #jv{ + seq = null, + jlock = null, + stime = STime, + resubmit = false, + data = Data + }, + case Resubmit of + true -> + set_job_val(Tx, Key, JV#jv{seq = ?PENDING_SEQ}), + couch_jobs_pending:enqueue(JTx, Type, STime, JobId); + false -> + set_job_val(Tx, Key, JV) + end, + ok. + + +job_key(#{jtx := true, jobs_path := Jobs}, Type, JobId) -> + erlfdb_tuple:pack({?DATA, Type, JobId}, Jobs). + + +job_key(JTx, #{type := Type, id := JobId}) -> + job_key(JTx, Type, JobId). + + +get_job_val(#{jtx := true, tx := Tx} = JTx, #{job := true} = Job) -> + get_job_val(Tx, job_key(JTx, Job)); + +get_job_val(Tx = {erlfdb_transaction, _}, Key) -> + case erlfdb:wait(erlfdb:get(Tx, Key)) of + <<_/binary>> = Val -> + {Seq, JLock, STime, Resubmit, Data} = erlfdb_tuple:unpack(Val), + #jv{ + seq = Seq, + jlock = JLock, + stime = STime, + resubmit = Resubmit, + data = Data + }; + not_found -> + not_found + end. + + +set_job_val(Tx = {erlfdb_transaction, _}, Key, #jv{} = JV) -> + #jv{ + seq = Seq, + jlock = JLock, + stime = STime, + resubmit = Resubmit, + data = Data0 + } = JV, + Data = case Data0 of + #{} -> encode_data(Data0); + <<_/binary>> -> Data0 + end, + case Seq of + ?UNSET_VS -> + Val = erlfdb_tuple:pack_vs({Seq, JLock, STime, Resubmit, Data}), + erlfdb:set_versionstamped_value(Tx, Key, Val); + _Other -> + Val = erlfdb_tuple:pack({Seq, JLock, STime, Resubmit, Data}), + erlfdb:set(Tx, Key, Val) + end, + ok. + + +get_job_or_halt(Tx, Key, JLock) -> + case get_job_val(Tx, Key) of + #jv{jlock = CurJLock} when CurJLock =/= JLock -> + halt; + #jv{} = Res -> + Res; + not_found -> + halt + end. + + +update_activity(#{jtx := true} = JTx, Type, JobId, Seq, Data0) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + case Seq =/= null of + true -> clear_activity(JTx, Type, Seq); + false -> ok + end, + Key = erlfdb_tuple:pack_vs({?ACTIVITY, Type, ?UNSET_VS}, Jobs), + Data = case Data0 of + #{} -> encode_data(Data0); + <<_/binary>> -> Data0 + end, + Val = erlfdb_tuple:pack({JobId, Data}), + erlfdb:set_versionstamped_key(Tx, Key, Val), + update_watch(JTx, Type). + + +clear_activity(#{jtx := true} = JTx, Type, Seq) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?ACTIVITY, Type, Seq}, Jobs), + erlfdb:clear(Tx, Key). + + +update_watch(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?WATCHES_ACTIVITY, Type}, Jobs), + Val = erlfdb_tuple:pack_vs({?UNSET_VS}), + erlfdb:set_versionstamped_value(Tx, Key, Val), + ok. + + +job_state(JLock, Seq) -> + case {JLock, Seq} of + {null, null} -> finished; + {JLock, _} when JLock =/= null -> running; + {null, Seq} when Seq =/= null -> pending + end. + + +% This a transaction context object similar to the Db = #{} one from +% fabric2_fdb. It's is used to cache the jobs path directory (to avoid extra +% lookups on every operation) and to check for metadata changes (in case +% directory changes). +% +init_jtx(undefined) -> + fabric2_fdb:transactional(fun(Tx) -> init_jtx(Tx) end); + +init_jtx({erlfdb_transaction, _} = Tx) -> + LayerPrefix = fabric2_fdb:get_dir(Tx), + Jobs = erlfdb_tuple:pack({?JOBS}, LayerPrefix), + % layer_prefix, md_version and tx here match db map fields in fabric2_fdb + % but we also assert that this is a job transaction using the jtx => true + % field + #{ + jtx => true, + tx => Tx, + layer_prefix => LayerPrefix, + jobs_path => Jobs, + md_version => get_metadata_version(Tx) + }. + + +ensure_current(#{jtx := true, tx := Tx} = JTx) -> + case get(?COUCH_JOBS_CURRENT) of + Tx -> + JTx; + _ -> + JTx1 = update_current(JTx), + put(?COUCH_JOBS_CURRENT, Tx), + JTx1 + end. + + +get_metadata_version({erlfdb_transaction, _} = Tx) -> + erlfdb:wait(erlfdb:get_ss(Tx, ?COUCH_JOBS_MD_VERSION)). + + +update_current(#{tx := Tx, md_version := Version} = JTx) -> + case get_md_version_age(Version) of + Age when Age =< ?MD_VERSION_MAX_AGE_SEC -> + % Looked it up not too long ago. Avoid looking it up to frequently + JTx; + _ -> + case get_metadata_version(Tx) of + Version -> + update_md_version_timestamp(Version), + JTx; + _NewVersion -> + update_jtx_cache(init_jtx(Tx)) + end + end. + + +update_jtx_cache(#{jtx := true, md_version := Version} = JTx) -> + CachedJTx = JTx#{tx := undefined}, + ets:insert(?MODULE, {?JOBS_ETS_KEY, CachedJTx}), + update_md_version_timestamp(Version), + JTx. + + +get_md_version_age(Version) -> + Timestamp = case ets:lookup(?MODULE, ?MD_TIMESTAMP_ETS_KEY) of + [{_, Version, Ts}] -> Ts; + _ -> 0 + end, + erlang:system_time(second) - Timestamp. + + +update_md_version_timestamp(Version) -> + Ts = erlang:system_time(second), + ets:insert(?MODULE, {?MD_TIMESTAMP_ETS_KEY, Version, Ts}). + + +update_job_data(Data, undefined) -> + Data; + +update_job_data(_Data, NewData) -> + NewData. diff --git a/src/couch_jobs/src/couch_jobs_notifier.erl b/src/couch_jobs/src/couch_jobs_notifier.erl new file mode 100644 index 000000000..99581cb79 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_notifier.erl @@ -0,0 +1,314 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_notifier). + +-behaviour(gen_server). + + +-export([ + start_link/1, + subscribe/4, + unsubscribe/2 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3, + format_status/2 +]). + + +-include("couch_jobs.hrl"). + + +-define(TYPE_MONITOR_HOLDOFF_DEFAULT, 50). +-define(TYPE_MONITOR_TIMEOUT_DEFAULT, "infinity"). +-define(GET_JOBS_RANGE_RATIO, 0.5). + + +-record(st, { + jtx, + type, + monitor_pid, + subs, % #{JobId => #{Ref => {Pid, State, Seq}}} + pidmap, % #{{Jobid, Pid} => Ref} + refmap % #{Ref => JobId} +}). + + +start_link(Type) -> + gen_server:start_link(?MODULE, [Type], []). + + +subscribe(Type, JobId, State, Seq) -> + case couch_jobs_server:get_notifier_server(Type) of + {ok, Server} -> + CallArgs = {subscribe, JobId, State, Seq, self()}, + Ref = gen_server:call(Server, CallArgs, infinity), + {ok, {Server, Ref}}; + {error, Error} -> + {error, Error} + end. + + +unsubscribe(Server, Ref) when is_reference(Ref) -> + gen_server:call(Server, {unsubscribe, Ref, self()}, infinity). + + +init([Type]) -> + JTx = couch_jobs_fdb:get_jtx(), + St = #st{ + jtx = JTx, + type = Type, + subs = #{}, + pidmap = #{}, + refmap = #{} + }, + VS = get_type_vs(St), + HoldOff = get_holdoff(), + Timeout = get_timeout(), + Pid = couch_jobs_type_monitor:start(Type, VS, HoldOff, Timeout), + {ok, St#st{monitor_pid = Pid}}. + + +terminate(_, _St) -> + ok. + + +handle_call({subscribe, JobId, State, Seq, Pid}, _From, #st{} = St) -> + #st{pidmap = PidMap, refmap = RefMap} = St, + case maps:get({JobId, Pid}, PidMap, not_found) of + not_found -> + Ref = erlang:monitor(process, Pid), + St1 = update_sub(JobId, Ref, Pid, State, Seq, St), + St2 = St1#st{pidmap = PidMap#{{JobId, Pid} => Ref}}, + St3 = St2#st{refmap = RefMap#{Ref => JobId}}, + {reply, Ref, St3}; + Ref when is_reference(Ref) -> + St1 = update_sub(JobId, Ref, Pid, State, Seq, St), + {reply, Ref, St1} + end; + +handle_call({unsubscribe, Ref, Pid}, _From, #st{} = St) -> + {reply, ok, unsubscribe_int(Ref, Pid, St)}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info({type_updated, VS}, St) -> + VSMax = flush_type_updated_messages(VS), + {noreply, try_notify_subscribers(VSMax, St)}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Don't crash out couch_jobs_server and the whole application would need to + % eventually do proper cleanup in erlfdb:wait timeout code. + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info({'DOWN', Ref, process, Pid, _}, #st{} = St) -> + {noreply, unsubscribe_int(Ref, Pid, St)}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +format_status(_Opt, [_PDict, State]) -> + #st{ + jtx=JTx, + type=Type, + monitor_pid=MonitorPid, + subs=Subs, + pidmap=PidMap, + refmap=RefMap + } = State, + [{data, [{"State", [ + {jtx, JTx}, + {type, Type}, + {monitor_pid, MonitorPid}, + {subs, {map_size, maps:size(Subs)}}, + {pidmap, {map_size, maps:size(PidMap)}}, + {refmap, {map_size, maps:size(RefMap)}} + ]}]}]. + + +update_subs(JobId, Refs, #st{subs = Subs} = St) when map_size(Refs) =:= 0 -> + St#st{subs = maps:remove(JobId, Subs)}; + +update_subs(JobId, Refs, #st{subs = Subs} = St) when map_size(Refs) > 0 -> + St#st{subs = Subs#{JobId => Refs}}. + + +update_sub(JobId, Ref, Pid, State, Seq, #st{subs = Subs} = St) -> + Refs = maps:get(JobId, Subs, #{}), + update_subs(JobId, Refs#{Ref => {Pid, State, Seq}}, St). + + +remove_sub(JobId, Ref, #st{subs = Subs} = St) -> + case maps:get(JobId, Subs, not_found) of + not_found -> St; + #{} = Refs -> update_subs(JobId, maps:remove(Ref, Refs), St) + end. + + +unsubscribe_int(Id, Ref, Pid, #st{pidmap = PidMap, refmap = RefMap} = St) -> + St1 = remove_sub(Id, Ref, St), + erlang:demonitor(Ref, [flush]), + St1#st{ + pidmap = maps:remove({Id, Pid}, PidMap), + refmap = maps:remove(Ref, RefMap) + }. + + +unsubscribe_int(Ref, Pid, #st{refmap = RefMap} = St) -> + case maps:get(Ref, RefMap, not_found) of + not_found -> St; + Id -> unsubscribe_int(Id, Ref, Pid, St) + end. + + +flush_type_updated_messages(VSMax) -> + receive + {type_updated, VS} -> + flush_type_updated_messages(max(VS, VSMax)) + after + 0 -> VSMax + end. + + +get_jobs(#st{jtx = JTx, type = Type}, InactiveIdMap, Ratio) + when Ratio >= ?GET_JOBS_RANGE_RATIO -> + Filter = fun(JobId) -> maps:is_key(JobId, InactiveIdMap) end, + JobMap = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_jobs(JTx1, Type, Filter) + end), + maps:map(fun(JobId, _) -> + case maps:is_key(JobId, JobMap) of + true -> maps:get(JobId, JobMap); + false -> {null, not_found, not_found} + end + end, InactiveIdMap); + +get_jobs(#st{jtx = JTx, type = Type}, InactiveIdMap, _) -> + couch_jobs_fdb:tx(JTx, fun(JTx1) -> + maps:map(fun(JobId, _) -> + Job = #{job => true, type => Type, id => JobId}, + case couch_jobs_fdb:get_job_state_and_data(JTx1, Job) of + {ok, Seq, State, Data} -> + {Seq, State, Data}; + {error, not_found} -> + {null, not_found, not_found} + end + end, InactiveIdMap) + end). + + +get_type_vs(#st{jtx = JTx, type = Type}) -> + couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_activity_vs(JTx1, Type) + end). + + +% "Active since" is the set of jobs that have been active (running) +% and updated at least once since the given versionstamp. These are relatively +% cheap to find as it's just a range read in the ?ACTIVITY subspace. +% +get_active_since(#st{} = _St, not_found) -> + #{}; + +get_active_since(#st{jtx = JTx, type = Type, subs = Subs}, VS) -> + AllUpdated = couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_active_since(JTx1, Type, VS) + end), + maps:map(fun(_JobId, Data) -> + {VS, running, Data} + end, maps:with(maps:keys(Subs), AllUpdated)). + + +try_notify_subscribers(ActiveVS, #st{} = St) -> + try + notify_subscribers(ActiveVS, St) + catch + error:{timeout, _} -> try_notify_subscribers(ActiveVS, St); + error:{erlfdb_error, 1031} -> try_notify_subscribers(ActiveVS, St) + end. + + +notify_subscribers(_, #st{subs = Subs} = St) when map_size(Subs) =:= 0 -> + St; + +notify_subscribers(ActiveVS, #st{} = St1) -> + % First gather the easy (cheap) active jobs. Then with those out of way + % inspect each job to get its state. + Active = get_active_since(St1, ActiveVS), + St2 = notify_job_ids(Active, St1), + ActiveIds = maps:keys(Active), + Subs = St2#st.subs, + InactiveIdMap = maps:without(ActiveIds, Subs), + InactiveRatio = maps:size(InactiveIdMap) / maps:size(Subs), + Inactive = get_jobs(St2, InactiveIdMap, InactiveRatio), + notify_job_ids(Inactive, St2). + + +notify_job_ids(#{} = Jobs, #st{type = Type} = St0) -> + maps:fold(fun(Id, {VS, State, Data}, #st{} = StAcc) -> + DoUnsub = lists:member(State, [finished, not_found]), + maps:fold(fun + (_Ref, {_Pid, running, OldVS}, St) when State =:= running, + OldVS >= VS -> + St; + (Ref, {Pid, running, OldVS}, St) when State =:= running, + OldVS < VS -> + % For running state send updates even if state doesn't change + notify(Pid, Ref, Type, Id, State, Data), + update_sub(Id, Ref, Pid, running, VS, St); + (_Ref, {_Pid, OldState, _VS}, St) when OldState =:= State -> + St; + (Ref, {Pid, _State, _VS}, St) -> + notify(Pid, Ref, Type, Id, State, Data), + case DoUnsub of + true -> unsubscribe_int(Id, Ref, Pid, St); + false -> update_sub(Id, Ref, Pid, State, VS, St) + end + end, StAcc, maps:get(Id, StAcc#st.subs, #{})) + end, St0, Jobs). + + +notify(Pid, Ref, Type, Id, State, Data) -> + Pid ! {?COUCH_JOBS_EVENT, Ref, Type, Id, State, Data}. + + +get_holdoff() -> + config:get_integer("couch_jobs", "type_monitor_holdoff_msec", + ?TYPE_MONITOR_HOLDOFF_DEFAULT). + + +get_timeout() -> + Default = ?TYPE_MONITOR_TIMEOUT_DEFAULT, + case config:get("couch_jobs", "type_monitor_timeout_msec", Default) of + "infinity" -> infinity; + Milliseconds -> list_to_integer(Milliseconds) + end. diff --git a/src/couch_jobs/src/couch_jobs_notifier_sup.erl b/src/couch_jobs/src/couch_jobs_notifier_sup.erl new file mode 100644 index 000000000..81d93493b --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_notifier_sup.erl @@ -0,0 +1,64 @@ +% +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_notifier_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0, + + start_notifier/1, + stop_notifier/1, + get_child_pids/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +start_notifier(Type) -> + supervisor:start_child(?MODULE, [Type]). + + +stop_notifier(Pid) -> + supervisor:terminate_child(?MODULE, Pid). + + +get_child_pids() -> + lists:map(fun({_Id, Pid, _Type, _Mod}) -> + Pid + end, supervisor:which_children(?MODULE)). + + +init(_) -> + Flags = #{ + strategy => simple_one_for_one, + intensity => 10, + period => 3 + }, + Children = [ + #{ + id => couch_jobs_notifier, + restart => temporary, + start => {couch_jobs_notifier, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_jobs/src/couch_jobs_pending.erl b/src/couch_jobs/src/couch_jobs_pending.erl new file mode 100644 index 000000000..a85f2fc5c --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_pending.erl @@ -0,0 +1,163 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_pending). + + +-export([ + enqueue/4, + dequeue/4, + remove/4, + pending_count/4 +]). + + +-include("couch_jobs.hrl"). + + +-define(RANGE_LIMIT, 1024). + + +enqueue(#{jtx := true} = JTx, Type, STime, JobId) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?PENDING, Type, STime, JobId}, Jobs), + erlfdb:set(Tx, Key, <<>>), + WatchKey = erlfdb_tuple:pack({?WATCHES_PENDING, Type}, Jobs), + erlfdb:add(Tx, WatchKey, 1), + ok. + + +dequeue(#{jtx := true} = JTx, Type, _, true) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Prefix = erlfdb_tuple:pack({?PENDING, Type, 0}, Jobs), + case get_random_item(Tx, Prefix) of + {error, not_found} -> + {not_found, get_pending_watch(JTx, Type)}; + {ok, PendingKey} -> + erlfdb:clear(Tx, PendingKey), + {JobId} = erlfdb_tuple:unpack(PendingKey, Prefix), + {ok, JobId} + end; + +dequeue(#{jtx := true} = JTx, Type, MaxSTime, _) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + {StartKeySel, EndKeySel} = get_range_selectors(JTx, Type, MaxSTime), + case clear_random_key_from_range(Tx, StartKeySel, EndKeySel) of + {error, not_found} -> + {not_found, get_pending_watch(JTx, Type)}; + {ok, PendingKey} -> + Prefix = erlfdb_tuple:pack({?PENDING, Type}, Jobs), + {_, JobId} = erlfdb_tuple:unpack(PendingKey, Prefix), + {ok, JobId} + end. + + +remove(#{jtx := true} = JTx, Type, JobId, STime) -> + #{tx := Tx, jobs_path := Jobs} = JTx, + Key = erlfdb_tuple:pack({?PENDING, Type, STime, JobId}, Jobs), + erlfdb:clear(Tx, Key). + + +pending_count(#{jtx := true} = JTx, Type, MaxSTime, Limit) -> + #{tx := Tx} = JTx, + Opts = [ + {limit, Limit}, + {snapshot, true}, + {streaming_mode, want_all} + ], + {StartSel, EndSel} = get_range_selectors(JTx, Type, MaxSTime), + FoldFun = fun(_Row, Cnt) -> Cnt + 1 end, + erlfdb:fold_range(Tx, StartSel, EndSel, FoldFun, 0, Opts). + + +%% Private functions + +% Get pending key selectors, taking into account max scheduled time value. +get_range_selectors(#{jtx := true} = JTx, Type, MaxSTime) -> + #{jobs_path := Jobs} = JTx, + Prefix = erlfdb_tuple:pack({?PENDING, Type}, Jobs), + StartKeySel = erlfdb_key:first_greater_than(Prefix), + End = erlfdb_tuple:pack({MaxSTime, <<16#FF>>}, Prefix), + EndKeySel = erlfdb_key:first_greater_or_equal(End), + {StartKeySel, EndKeySel}. + + +% Pick a random item from the range without reading the keys in first. However +% the constraint it that IDs should looks like random UUIDs +get_random_item(Tx, Prefix) -> + Id = fabric2_util:uuid(), + Snapshot = erlfdb:snapshot(Tx), + % Try to be fair and switch evently between trying ids before or after the + % randomly generated one. Otherwise, trying before first, will leave a lot + % of <<"fff...">> IDs in the queue for too long and trying "after" first + % will leave a lot of <"00...">> ones waiting. + case rand:uniform() > 0.5 of + true -> + case get_after(Snapshot, Prefix, Id) of + {error, not_found} -> get_before(Snapshot, Prefix, Id); + {ok, Key} -> {ok, Key} + end; + false -> + case get_before(Snapshot, Prefix, Id) of + {error, not_found} -> get_after(Snapshot, Prefix, Id); + {ok, Key} -> {ok, Key} + end + end. + + +get_before(Snapshot, Prefix, Id) -> + KSel = erlfdb_key:last_less_or_equal(erlfdb_tuple:pack({Id}, Prefix)), + PrefixSize = byte_size(Prefix), + case erlfdb:wait(erlfdb:get_key(Snapshot, KSel)) of + <<Prefix:PrefixSize/binary, _/binary>> = Key -> {ok, Key}; + _ -> {error, not_found} + end. + + +get_after(Snapshot, Prefix, Id) -> + KSel = erlfdb_key:first_greater_or_equal(erlfdb_tuple:pack({Id}, Prefix)), + PrefixSize = byte_size(Prefix), + case erlfdb:wait(erlfdb:get_key(Snapshot, KSel)) of + <<Prefix:PrefixSize/binary, _/binary>> = Key -> {ok, Key}; + _ -> {error, not_found} + end. + + +% Pick a random key from the range snapshot. Then radomly pick a key to clear. +% Before clearing, ensure there is a read conflict on the key in in case other +% workers have picked the same key. +% +clear_random_key_from_range(Tx, Start, End) -> + Opts = [ + {limit, ?RANGE_LIMIT}, + {snapshot, true} + ], + case erlfdb:wait(erlfdb:get_range(Tx, Start, End, Opts)) of + [] -> + {error, not_found}; + [{Key, _}] -> + erlfdb:add_read_conflict_key(Tx, Key), + erlfdb:clear(Tx, Key), + {ok, Key}; + [{_, _} | _] = KVs -> + Index = rand:uniform(length(KVs)), + {Key, _} = lists:nth(Index, KVs), + erlfdb:add_read_conflict_key(Tx, Key), + erlfdb:clear(Tx, Key), + {ok, Key} + end. + + +get_pending_watch(#{jtx := true} = JTx, Type) -> + #{tx := Tx, jobs_path := Jobs} = couch_jobs_fdb:get_jtx(JTx), + Key = erlfdb_tuple:pack({?WATCHES_PENDING, Type}, Jobs), + erlfdb:watch(Tx, Key). diff --git a/src/couch_jobs/src/couch_jobs_server.erl b/src/couch_jobs/src/couch_jobs_server.erl new file mode 100644 index 000000000..2e03c7dcf --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_server.erl @@ -0,0 +1,193 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_server). + +-behaviour(gen_server). + + +-export([ + start_link/0, + get_notifier_server/1, + force_check_types/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-define(TYPE_CHECK_PERIOD_DEFAULT, 15000). +-define(MAX_JITTER_DEFAULT, 5000). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, nil, []). + + +get_notifier_server(Type) -> + case get_type_pid_refs(Type) of + {{_, _}, {NotifierPid, _}} -> + {ok, NotifierPid}; + not_found -> + force_check_types(), + case get_type_pid_refs(Type) of + {{_, _}, {NotifierPid, _}} -> + {ok, NotifierPid}; + not_found -> + {error, not_found} + end + end. + + +force_check_types() -> + gen_server:call(?MODULE, check_types, infinity). + + +init(_) -> + % If couch_jobs_server is after the notifiers and activity supervisor. If + % it restart, there could be some stale notifier or activity monitors. Kill + % those as later on we'd start new ones anyway. + reset_monitors(), + reset_notifiers(), + ets:new(?MODULE, [protected, named_table]), + check_types(), + schedule_check(), + {ok, nil}. + + +terminate(_, _St) -> + ok. + + +handle_call(check_types, _From, St) -> + check_types(), + {reply, ok, St}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(check_types, St) -> + check_types(), + schedule_check(), + {noreply, St}; + +handle_info({'DOWN', _Ref, process, Pid, Reason}, St) -> + LogMsg = "~p : process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unexpected_process_exit, Pid, Reason}, St}; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + % Don't crash out couch_jobs_server and the whole application would need to + % eventually do proper cleanup in erlfdb:wait timeout code. + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:error(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +check_types() -> + FdbTypes = fdb_types(), + EtsTypes = ets_types(), + ToStart = FdbTypes -- EtsTypes, + ToStop = EtsTypes -- FdbTypes, + lists:foreach(fun(Type) -> start_monitors(Type) end, ToStart), + lists:foreach(fun(Type) -> stop_monitors(Type) end, ToStop). + + +start_monitors(Type) -> + MonPidRef = case couch_jobs_activity_monitor_sup:start_monitor(Type) of + {ok, Pid1} -> {Pid1, monitor(process, Pid1)}; + {error, Error1} -> error({failed_to_start_monitor, Type, Error1}) + end, + NotifierPidRef = case couch_jobs_notifier_sup:start_notifier(Type) of + {ok, Pid2} -> {Pid2, monitor(process, Pid2)}; + {error, Error2} -> error({failed_to_start_notifier, Type, Error2}) + end, + ets:insert_new(?MODULE, {Type, MonPidRef, NotifierPidRef}). + + +stop_monitors(Type) -> + {{MonPid, MonRef}, {NotifierPid, NotifierRef}} = get_type_pid_refs(Type), + ok = couch_jobs_activity_monitor_sup:stop_monitor(MonPid), + demonitor(MonRef, [flush]), + ok = couch_jobs_notifier_sup:stop_notifier(NotifierPid), + demonitor(NotifierRef, [flush]), + ets:delete(?MODULE, Type). + + +reset_monitors() -> + lists:foreach(fun(Pid) -> + couch_jobs_activity_monitor_sup:stop_monitor(Pid) + end, couch_jobs_activity_monitor_sup:get_child_pids()). + + +reset_notifiers() -> + lists:foreach(fun(Pid) -> + couch_jobs_notifier_sup:stop_notifier(Pid) + end, couch_jobs_notifier_sup:get_child_pids()). + + +get_type_pid_refs(Type) -> + case ets:lookup(?MODULE, Type) of + [{_, MonPidRef, NotifierPidRef}] -> {MonPidRef, NotifierPidRef}; + [] -> not_found + end. + + +ets_types() -> + lists:flatten(ets:match(?MODULE, {'$1', '_', '_'})). + + +fdb_types() -> + try + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + couch_jobs_fdb:get_types(JTx) + end) + catch + error:{timeout, _} -> + couch_log:warning("~p : Timed out connecting to FDB", [?MODULE]), + [] + end. + + +schedule_check() -> + Timeout = get_period_msec(), + MaxJitter = max(Timeout div 2, get_max_jitter_msec()), + Wait = Timeout + rand:uniform(max(1, MaxJitter)), + erlang:send_after(Wait, self(), check_types). + + +get_period_msec() -> + config:get_integer("couch_jobs", "type_check_period_msec", + ?TYPE_CHECK_PERIOD_DEFAULT). + + +get_max_jitter_msec() -> + config:get_integer("couch_jobs", "type_check_max_jitter_msec", + ?MAX_JITTER_DEFAULT). diff --git a/src/couch_jobs/src/couch_jobs_sup.erl b/src/couch_jobs/src/couch_jobs_sup.erl new file mode 100644 index 000000000..d79023777 --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_sup.erl @@ -0,0 +1,66 @@ +% +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +init([]) -> + Flags = #{ + strategy => rest_for_one, + intensity => 3, + period => 10 + }, + Children = [ + #{ + id => couch_jobs_fdb, + restart => transient, + start => {couch_jobs_fdb, init_cache, []} + }, + #{ + id => couch_jobs_activity_monitor_sup, + restart => permanent, + shutdown => brutal_kill, + type => supervisor, + start => {couch_jobs_activity_monitor_sup, start_link, []} + }, + #{ + id => couch_jobs_notifier_sup, + restart => permanent, + shutdown => brutal_kill, + type => supervisor, + start => {couch_jobs_notifier_sup, start_link, []} + }, + #{ + id => couch_jobs_server, + restart => permanent, + shutdown => brutal_kill, + start => {couch_jobs_server, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_jobs/src/couch_jobs_type_monitor.erl b/src/couch_jobs/src/couch_jobs_type_monitor.erl new file mode 100644 index 000000000..04ad60acc --- /dev/null +++ b/src/couch_jobs/src/couch_jobs_type_monitor.erl @@ -0,0 +1,84 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_type_monitor). + + +-export([ + start/4 +]). + + +-include("couch_jobs.hrl"). + + +-record(st, { + jtx, + type, + vs, + parent, + timestamp, + holdoff, + timeout +}). + + +start(Type, VS, HoldOff, Timeout) -> + Parent = self(), + spawn_link(fun() -> + loop(#st{ + jtx = couch_jobs_fdb:get_jtx(), + type = Type, + vs = VS, + parent = Parent, + timestamp = 0, + holdoff = HoldOff, + timeout = Timeout + }) + end). + + +loop(#st{vs = VS, timeout = Timeout} = St) -> + {St1, Watch} = case get_vs_and_watch(St) of + {VS1, W} when VS1 =/= VS -> {notify(St#st{vs = VS1}), W}; + {VS, W} -> {St, W} + end, + try + erlfdb:wait(Watch, [{timeout, Timeout}]) + catch + error:{erlfdb_error, ?FUTURE_VERSION} -> + erlfdb:cancel(Watch, [flush]), + ok; + error:{timeout, _} -> + erlfdb:cancel(Watch, [flush]), + ok + end, + loop(St1). + + +notify(#st{} = St) -> + #st{holdoff = HoldOff, parent = Pid, timestamp = Ts, vs = VS} = St, + Now = erlang:system_time(millisecond), + case Now - Ts of + Dt when Dt < HoldOff -> + timer:sleep(max(HoldOff - Dt, 0)); + _ -> + ok + end, + Pid ! {type_updated, VS}, + St#st{timestamp = Now}. + + +get_vs_and_watch(#st{jtx = JTx, type = Type}) -> + couch_jobs_fdb:tx(JTx, fun(JTx1) -> + couch_jobs_fdb:get_activity_vs_and_watch(JTx1, Type) + end). diff --git a/src/couch_jobs/test/couch_jobs_tests.erl b/src/couch_jobs/test/couch_jobs_tests.erl new file mode 100644 index 000000000..11572a4b9 --- /dev/null +++ b/src/couch_jobs/test/couch_jobs_tests.erl @@ -0,0 +1,762 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_jobs_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +% Job creation API can take an undefined Tx object +% in that case it will start its own transaction +-define(TX, undefined). + + +couch_jobs_basic_test_() -> + { + "Test couch jobs basics", + { + setup, + fun setup_couch/0, fun teardown_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun add_remove_pending/1, + fun add_remove_errors/1, + fun add_with_the_same_scheduled_time/1, + fun get_job_data_and_state/1, + fun resubmit_as_job_creator/1, + fun type_timeouts_and_server/1, + fun dead_notifier_restarts_jobs_server/1, + fun bad_messages_restart_couch_jobs_server/1, + fun bad_messages_restart_notifier/1, + fun bad_messages_restart_activity_monitor/1, + fun basic_accept_and_finish/1, + fun accept_blocking/1, + fun job_processor_update/1, + fun resubmit_enqueues_job/1, + fun resubmit_finished_updates_job_data/1, + fun resubmit_running_does_not_update_job_data/1, + fun resubmit_custom_schedtime/1, + fun add_pending_updates_job_data/1, + fun add_finished_updates_job_data/1, + fun add_running_does_not_update_job_data/1, + fun accept_max_schedtime/1, + fun accept_no_schedule/1, + fun subscribe/1, + fun remove_when_subscribed_and_pending/1, + fun remove_when_subscribed_and_running/1, + fun subscribe_wait_multiple/1, + fun enqueue_inactive/1, + fun remove_running_job/1, + fun check_get_jobs/1, + fun use_fabric_transaction_object/1, + fun metadata_version_bump/1 + ] + } + } + }. + + +setup_couch() -> + test_util:start_couch([fabric]). + + +teardown_couch(Ctx) -> + test_util:stop_couch(Ctx), + meck:unload(). + + +setup() -> + application:start(couch_jobs), + clear_jobs(), + T1 = {<<"t1">>, 1024}, % a complex type should work + T2 = 42, % a number should work as well + T1Timeout = 2, + T2Timeout = 3, + couch_jobs:set_type_timeout(T1, T1Timeout), + couch_jobs:set_type_timeout(T2, T2Timeout), + #{ + t1 => T1, + t2 => T2, + t1_timeout => T1Timeout, + j1 => <<"j1">>, + j2 => <<"j2">>, + dbname => ?tempdb() + }. + + +teardown(#{dbname := DbName}) -> + clear_jobs(), + application:stop(couch_jobs), + AllDbs = fabric2_db:list_dbs(), + case lists:member(DbName, AllDbs) of + true -> ok = fabric2_db:delete(DbName, []); + false -> ok + end, + meck:unload(). + + +clear_jobs() -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + #{jobs_path := Jobs, tx := Tx} = JTx, + erlfdb:clear_range_startswith(Tx, Jobs) + end). + + +restart_app() -> + application:stop(couch_jobs), + application:start(couch_jobs), + couch_jobs_server:force_check_types(). + + +get_job(Type, JobId) -> + couch_jobs_fdb:get_job(Type, JobId). + + +add_remove_pending(#{t1 := T1, j1 := J1, t2 := T2, j2 := J2}) -> + ?_test(begin + ?assertEqual(ok, couch_jobs:add(?TX, T1, J1, #{})), + ?assertMatch(#{state := pending, data := #{}}, get_job(T1, J1)), + ?assertEqual(ok, couch_jobs:remove(?TX, T1, J1)), + % Data and numeric type should work as well. Also do it in a + % transaction + Data = #{<<"x">> => 42}, + ?assertEqual(ok, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:add(Tx, T2, J2, Data) + end)), + ?assertMatch(#{state := pending, data := Data}, get_job(T2, J2)), + ?assertEqual(ok, couch_jobs:remove(?TX, T2, J2)) + end). + + +get_job_data_and_state(#{t1 := T, j1 := J}) -> + ?_test(begin + Data = #{<<"x">> => 42}, + ok = couch_jobs:add(?TX, T, J, Data), + ?assertEqual({ok, Data}, couch_jobs:get_job_data(?TX, T, J)), + ?assertEqual({ok, pending}, couch_jobs:get_job_state(?TX, T, J)), + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:get_job_data(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:get_job_state(?TX, T, J)) + end). + + +add_remove_errors(#{t1 := T, j1 := J}) -> + ?_test(begin + ?assertEqual({error, not_found}, couch_jobs:remove(?TX, 999, <<"x">>)), + ?assertMatch({error, {json_encoding_error, _}}, couch_jobs:add(?TX, T, + J, #{1 => 2})), + ?assertEqual({error, no_type_timeout}, couch_jobs:add(?TX, <<"x">>, J, + #{})), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{})), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{})), + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)) + end). + + +add_with_the_same_scheduled_time(#{t1 := T, j1 := J}) -> + ?_test(begin + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{})), + fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, couch_jobs:add(Tx, T, J, #{})), + ?assert(erlfdb:is_read_only(Tx)) + end) + end). + + +resubmit_as_job_creator(#{t1 := T, j1 := J}) -> + ?_test(begin + Data = #{<<"x">> => 42}, + ok = couch_jobs:add(?TX, T, J, Data, 15), + + % Job was pending, doesn't get resubmitted + ok = couch_jobs:add(?TX, T, J, Data, 16), + ?assertMatch(#{state := pending, stime := 16}, get_job(T, J)), + + {ok, Job1, Data} = couch_jobs:accept(T), + + % If is running, it gets flagged to be resubmitted + ok = couch_jobs:add(?TX, T, J, Data, 17), + ?assertMatch(#{state := running, stime := 17}, get_job(T, J)), + ?assertEqual(true, couch_jobs:is_resubmitted(get_job(T, J))), + + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + % It should be pending according to the resubmit flag + ?assertMatch(#{state := pending, stime := 17}, get_job(T, J)), + + % A finished job will be re-enqueued + {ok, Job2, _} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job2)), + ?assertMatch(#{state := finished, stime := 17}, get_job(T, J)), + ok = couch_jobs:add(?TX, T, J, Data, 18), + ?assertMatch(#{state := pending, stime := 18}, get_job(T, J)) + end). + + +type_timeouts_and_server(#{t1 := T, t1_timeout := T1Timeout}) -> + {timeout, 15, ?_test(begin + + WaitForActivityMonitors = fun(N) -> + test_util:wait(fun() -> + Pids = couch_jobs_activity_monitor_sup:get_child_pids(), + case length(Pids) == N of + true -> ok; + false -> wait + end + end) + end, + + WaitForNotifiers = fun(N) -> + test_util:wait(fun() -> + Pids = couch_jobs_notifier_sup:get_child_pids(), + case length(Pids) == N of + true -> ok; + false -> wait + end + end) + end, + + couch_jobs_server:force_check_types(), + + ?assertEqual(T1Timeout, couch_jobs:get_type_timeout(T)), + + WaitForActivityMonitors(2), + ?assertEqual(2, + length(couch_jobs_activity_monitor_sup:get_child_pids())), + + WaitForNotifiers(2), + ?assertEqual(2, length(couch_jobs_notifier_sup:get_child_pids())), + + ?assertMatch({ok, _}, couch_jobs_server:get_notifier_server(T)), + + ?assertEqual(ok, couch_jobs:set_type_timeout(<<"t3">>, 8)), + couch_jobs_server:force_check_types(), + + WaitForActivityMonitors(3), + ?assertEqual(3, + length(couch_jobs_activity_monitor_sup:get_child_pids())), + + WaitForNotifiers(3), + ?assertEqual(3, length(couch_jobs_notifier_sup:get_child_pids())), + + ?assertEqual(ok, couch_jobs:clear_type_timeout(<<"t3">>)), + couch_jobs_server:force_check_types(), + + WaitForActivityMonitors(2), + ?assertEqual(2, + length(couch_jobs_activity_monitor_sup:get_child_pids())), + + WaitForNotifiers(2), + ?assertEqual(2, + length(couch_jobs_notifier_sup:get_child_pids())), + + ?assertMatch({error, _}, + couch_jobs_server:get_notifier_server(<<"t3">>)), + + ?assertEqual(not_found, couch_jobs:get_type_timeout(<<"t3">>)) + end)}. + + +dead_notifier_restarts_jobs_server(#{}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + ServerPid = whereis(couch_jobs_server), + Ref = monitor(process, ServerPid), + + [Notifier1, _Notifier2] = couch_jobs_notifier_sup:get_child_pids(), + exit(Notifier1, kill), + + % Killing a notifier should kill the server as well + receive {'DOWN', Ref, _, _, _} -> ok end + end). + + +bad_messages_restart_couch_jobs_server(#{}) -> + ?_test(begin + % couch_jobs_server dies on bad cast + ServerPid1 = whereis(couch_jobs_server), + Ref1 = monitor(process, ServerPid1), + gen_server:cast(ServerPid1, bad_cast), + receive {'DOWN', Ref1, _, _, _} -> ok end, + + restart_app(), + + % couch_jobs_server dies on bad call + ServerPid2 = whereis(couch_jobs_server), + Ref2 = monitor(process, ServerPid2), + catch gen_server:call(ServerPid2, bad_call), + receive {'DOWN', Ref2, _, _, _} -> ok end, + + restart_app(), + + % couch_jobs_server dies on bad info + ServerPid3 = whereis(couch_jobs_server), + Ref3 = monitor(process, ServerPid3), + ServerPid3 ! a_random_message, + receive {'DOWN', Ref3, _, _, _} -> ok end, + + restart_app() + end). + + +bad_messages_restart_notifier(#{}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + % bad cast kills the activity monitor + [AMon1, _] = couch_jobs_notifier_sup:get_child_pids(), + Ref1 = monitor(process, AMon1), + gen_server:cast(AMon1, bad_cast), + receive {'DOWN', Ref1, _, _, _} -> ok end, + + restart_app(), + + % bad calls restart activity monitor + [AMon2, _] = couch_jobs_notifier_sup:get_child_pids(), + Ref2 = monitor(process, AMon2), + catch gen_server:call(AMon2, bad_call), + receive {'DOWN', Ref2, _, _, _} -> ok end, + + restart_app(), + + % bad info message kills activity monitor + [AMon3, _] = couch_jobs_notifier_sup:get_child_pids(), + Ref3 = monitor(process, AMon3), + AMon3 ! a_bad_message, + receive {'DOWN', Ref3, _, _, _} -> ok end, + + + restart_app() + end). + + +bad_messages_restart_activity_monitor(#{}) -> + ?_test(begin + couch_jobs_server:force_check_types(), + + % bad cast kills the activity monitor + [AMon1, _] = couch_jobs_activity_monitor_sup:get_child_pids(), + Ref1 = monitor(process, AMon1), + gen_server:cast(AMon1, bad_cast), + receive {'DOWN', Ref1, _, _, _} -> ok end, + + restart_app(), + + % bad calls restart activity monitor + [AMon2, _] = couch_jobs_activity_monitor_sup:get_child_pids(), + Ref2 = monitor(process, AMon2), + catch gen_server:call(AMon2, bad_call), + receive {'DOWN', Ref2, _, _, _} -> ok end, + + restart_app(), + + % bad info message kills activity monitor + [AMon3, _] = couch_jobs_activity_monitor_sup:get_child_pids(), + Ref3 = monitor(process, AMon3), + AMon3 ! a_bad_message, + receive {'DOWN', Ref3, _, _, _} -> ok end, + + restart_app() + end). + + +basic_accept_and_finish(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job, #{}} = couch_jobs:accept(T), + ?assertMatch(#{state := running}, get_job(T, J)), + % check json validation for bad data in finish + ?assertMatch({error, {json_encoding_error, _}}, + fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:finish(Tx, Job, #{1 => 1}) + end)), + Data = #{<<"x">> => 42}, + ?assertEqual(ok, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:finish(Tx, Job, Data) + end)), + ?assertMatch(#{state := finished, data := Data}, get_job(T, J)) + end). + + +accept_blocking(#{t1 := T, j1 := J1, j2 := J2}) -> + ?_test(begin + Accept = fun() -> exit(couch_jobs:accept(T)) end, + WaitAccept = fun(Ref) -> + receive + {'DOWN', Ref, _, _, Res} -> Res + after + 500 -> timeout + end + end, + {_, Ref1} = spawn_monitor(Accept), + ok = couch_jobs:add(?TX, T, J1, #{}), + ?assertMatch({ok, #{id := J1}, #{}}, WaitAccept(Ref1)), + {_, Ref2} = spawn_monitor(Accept), + ?assertEqual(timeout, WaitAccept(Ref2)), + ok = couch_jobs:add(?TX, T, J2, #{}), + ?assertMatch({ok, #{id := J2}, #{}}, WaitAccept(Ref2)) + end). + + +job_processor_update(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job, #{}} = couch_jobs:accept(T), + + % Use proper transactions in a few places here instead of passing in + % ?TX This is mostly to increase code coverage + + ?assertMatch({ok, #{job := true}}, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job, #{<<"x">> => 1}) + end)), + + ?assertMatch(#{data := #{<<"x">> := 1}, state := running}, + get_job(T, J)), + + ?assertMatch({ok, #{job := true}}, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job) + end)), + + ?assertMatch(#{data := #{<<"x">> := 1}, state := running}, + get_job(T, J)), + + ?assertMatch({ok, #{job := true}}, fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job, #{<<"x">> => 2}) + end)), + + % check json validation for bad data in update + ?assertMatch({error, {json_encoding_error, _}}, + fabric2_fdb:transactional(fun(Tx) -> + couch_jobs:update(Tx, Job, #{1 => 1}) + end)), + + ?assertMatch(#{data := #{<<"x">> := 2}, state := running}, + get_job(T, J)), + + % Finish may update the data as well + ?assertEqual(ok, couch_jobs:finish(?TX, Job, #{<<"x">> => 3})), + ?assertMatch(#{data := #{<<"x">> := 3}, state := finished}, + get_job(T, J)) + end). + + +resubmit_enqueues_job(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job1, 6)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch(#{state := pending, stime := 6}, get_job(T, J)), + {ok, Job2, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job2)), + ?assertMatch(#{state := finished}, get_job(T, J)) + end). + + +resubmit_finished_updates_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job1, 6, Data2)), + ?assertMatch({ok, _, Data2}, couch_jobs:accept(T)) + end). + + +resubmit_running_does_not_update_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job1, 6, Data2)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch({ok, _, Data1}, couch_jobs:accept(T)) + end). + + +resubmit_custom_schedtime(#{t1 := T, j1 := J}) -> + ?_test(begin + ?assertEqual(ok, couch_jobs:add(?TX, T, J, #{}, 7)), + {ok, Job, #{}} = couch_jobs:accept(T), + ?assertMatch({ok, _}, couch_jobs:resubmit(?TX, Job, 9)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job)), + ?assertMatch(#{stime := 9, state := pending}, get_job(T, J)) + end). + + +add_pending_updates_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, Data2, 6)), + ?assertMatch({ok, _, Data2}, couch_jobs:accept(T)) + end). + + +add_finished_updates_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, Data2, 6)), + ?assertMatch({ok, _, Data2}, couch_jobs:accept(T)) + end). + + +add_running_does_not_update_job_data(#{t1 := T, j1 := J}) -> + ?_test(begin + Data1 = #{<<"test">> => 1}, + Data2 = #{<<"test">> => 2}, + ok = couch_jobs:add(?TX, T, J, Data1), + {ok, Job1, #{}} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:add(?TX, T, J, Data2, 6)), + ?assertEqual(ok, couch_jobs:finish(?TX, Job1)), + ?assertMatch({ok, _, Data1}, couch_jobs:accept(T)) + end). + + +accept_max_schedtime(#{t1 := T, j1 := J1, j2 := J2}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J1, #{}, 5000), + ok = couch_jobs:add(?TX, T, J2, #{}, 3000), + ?assertEqual({error, not_found}, couch_jobs:accept(T, + #{max_sched_time => 1000})), + ?assertMatch({ok, #{id := J2}, _}, couch_jobs:accept(T, + #{max_sched_time => 3000})), + ?assertMatch({ok, #{id := J1}, _}, couch_jobs:accept(T, + #{max_sched_time => 9000})) + end). + + +accept_no_schedule(#{t1 := T}) -> + ?_test(begin + JobCount = 25, + Jobs = [fabric2_util:uuid() || _ <- lists:seq(1, JobCount)], + [couch_jobs:add(?TX, T, J, #{}) || J <- Jobs], + InvalidOpts = #{no_schedule => true, max_sched_time => 1}, + ?assertMatch({error, _}, couch_jobs:accept(T, InvalidOpts)), + AcceptOpts = #{no_schedule => true}, + Accepted = [begin + {ok, #{id := J}, _} = couch_jobs:accept(T, AcceptOpts), + J + end || _ <- lists:seq(1, JobCount)], + ?assertEqual(lists:sort(Jobs), lists:sort(Accepted)) + end). + + +subscribe(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{<<"z">> => 1}), + + ?assertEqual({error, not_found}, couch_jobs:subscribe(<<"xyz">>, J)), + ?assertEqual({error, not_found}, couch_jobs:subscribe(T, <<"j5">>)), + + SubRes0 = couch_jobs:subscribe(T, J), + ?assertMatch({ok, {_, _}, pending, #{<<"z">> := 1}}, SubRes0), + {ok, SubId0, pending, _} = SubRes0, + + SubRes1 = couch_jobs:subscribe(T, J), + ?assertEqual(SubRes0, SubRes1), + + ?assertEqual(ok, couch_jobs:unsubscribe(SubId0)), + + SubRes = couch_jobs:subscribe(T, J), + ?assertMatch({ok, {_, _}, pending, #{<<"z">> := 1}}, SubRes), + {ok, SubId, pending, _} = SubRes, + + {ok, Job, _} = couch_jobs:accept(T), + ?assertMatch({T, J, running, #{<<"z">> := 1}}, + couch_jobs:wait(SubId, 5000)), + + % Make sure we get intermediate `running` updates + ?assertMatch({ok, _}, couch_jobs:update(?TX, Job, #{<<"z">> => 2})), + ?assertMatch({T, J, running, #{<<"z">> := 2}}, + couch_jobs:wait(SubId, 5000)), + + ?assertEqual(ok, couch_jobs:finish(?TX, Job, #{<<"z">> => 3})), + ?assertMatch({T, J, finished, #{<<"z">> := 3}}, + couch_jobs:wait(SubId, finished, 5000)), + + ?assertEqual(timeout, couch_jobs:wait(SubId, 50)), + + ?assertEqual({ok, finished, #{<<"z">> => 3}}, + couch_jobs:subscribe(T, J)), + + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:subscribe(T, J)) + end). + + +remove_when_subscribed_and_pending(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{<<"x">> => 1}), + {ok, SId, pending, _} = couch_jobs:subscribe(T, J), + + couch_jobs:remove(?TX, T, J), + + ?assertMatch({T, J, not_found, not_found}, couch_jobs:wait(SId, 5000)), + ?assertEqual(timeout, couch_jobs:wait(SId, 50)) + end). + + +remove_when_subscribed_and_running(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{<<"z">> => 2}), + {ok, SId, pending, _} = couch_jobs:subscribe(T, J), + {ok, #{}, _} = couch_jobs:accept(T), + ?assertMatch({_, _, running, _}, couch_jobs:wait(SId, 5000)), + + couch_jobs:remove(?TX, T, J), + + ?assertMatch({T, J, not_found, not_found}, couch_jobs:wait(SId, 5000)), + ?assertEqual(timeout, couch_jobs:wait(SId, 50)) + end). + + +subscribe_wait_multiple(#{t1 := T, j1 := J1, j2 := J2}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J1, #{}), + ok = couch_jobs:add(?TX, T, J2, #{}), + + {ok, S1, pending, #{}} = couch_jobs:subscribe(T, J1), + {ok, S2, pending, #{}} = couch_jobs:subscribe(T, J2), + + Subs = [S1, S2], + + % Accept one job. Only one running update is expected. PJob1 and PJob2 + % do not necessarily correspond got Job1 and Job2, they could be + % accepted as Job2 and Job1 respectively. + {ok, PJob1, _} = couch_jobs:accept(T), + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + ?assertMatch(timeout, couch_jobs:wait(Subs, 50)), + + % Accept another job. Expect another update. + {ok, PJob2, _} = couch_jobs:accept(T), + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + ?assertMatch(timeout, couch_jobs:wait(Subs, 50)), + + ?assertMatch({ok, _}, couch_jobs:update(?TX, PJob1, #{<<"q">> => 5})), + ?assertMatch({ok, _}, couch_jobs:update(?TX, PJob2, #{<<"r">> => 6})), + + % Each job was updated once, expect two running updates. + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + ?assertMatch({_, _, running, _}, couch_jobs:wait(Subs, 5000)), + + % Finish one job. Expect one finished update only. + ?assertEqual(ok, couch_jobs:finish(?TX, PJob1)), + + ?assertMatch({_, _, finished, #{<<"q">> := 5}}, + couch_jobs:wait(Subs, finished, 5000)), + ?assertMatch(timeout, couch_jobs:wait(Subs, finished, 50)), + + % Finish another job. However, unsubscribe should flush the + % the message and we should not get it. + ?assertEqual(ok, couch_jobs:finish(?TX, PJob2)), + ?assertEqual(ok, couch_jobs:unsubscribe(S1)), + ?assertEqual(ok, couch_jobs:unsubscribe(S2)), + ?assertMatch(timeout, couch_jobs:wait(Subs, finished, 50)) + end). + + +enqueue_inactive(#{t1 := T, j1 := J, t1_timeout := Timeout}) -> + {timeout, 10, ?_test(begin + couch_jobs_server:force_check_types(), + + ok = couch_jobs:add(?TX, T, J, #{<<"y">> => 1}), + {ok, Job, _} = couch_jobs:accept(T), + + {ok, SubId, running, #{<<"y">> := 1}} = couch_jobs:subscribe(T, J), + Wait = 3 * Timeout * 1000, + ?assertEqual({T, J, pending, #{<<"y">> => 1}}, + couch_jobs:wait(SubId, pending, Wait)), + ?assertMatch(#{state := pending}, get_job(T, J)), + + % After job was re-enqueued, old job processor can't update it anymore + ?assertEqual({error, halt}, couch_jobs:update(?TX, Job)), + ?assertEqual({error, halt}, couch_jobs:finish(?TX, Job)) + end)}. + + +remove_running_job(#{t1 := T, j1 := J}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T, J, #{}), + {ok, Job, _} = couch_jobs:accept(T), + ?assertEqual(ok, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, not_found}, couch_jobs:remove(?TX, T, J)), + ?assertEqual({error, halt}, couch_jobs:update(?TX, Job)), + ?assertEqual({error, halt}, couch_jobs:finish(?TX, Job)) + end). + + +check_get_jobs(#{t1 := T1, j1 := J1, t2 := T2, j2 := J2}) -> + ?_test(begin + ok = couch_jobs:add(?TX, T1, J1, #{}), + ok = couch_jobs:add(?TX, T2, J2, #{}), + ?assertMatch([ + {T2, J2, pending, #{}}, + {T1, J1, pending, #{}} + ], lists:sort(couch_jobs_fdb:get_jobs())), + {ok, _, _} = couch_jobs:accept(T1), + ?assertMatch([ + {T2, J2, pending, #{}}, + {T1, J1, running, #{}} + ], lists:sort(couch_jobs_fdb:get_jobs())) + end). + + +use_fabric_transaction_object(#{t1 := T1, j1 := J1, dbname := DbName}) -> + ?_test(begin + {ok, Db} = fabric2_db:create(DbName, []), + ?assertEqual(ok, couch_jobs:add(Db, T1, J1, #{})), + ?assertMatch(#{state := pending, data := #{}}, get_job(T1, J1)), + {ok, Job, _} = couch_jobs:accept(T1), + ?assertEqual(ok, fabric2_fdb:transactional(Db, fun(Db1) -> + {ok, #{}} = couch_jobs:get_job_data(Db1, T1, J1), + Doc1 = #doc{id = <<"1">>, body = {[]}}, + {ok, {_, _}} = fabric2_db:update_doc(Db1, Doc1), + Doc2 = #doc{id = <<"2">>, body = {[]}}, + {ok, {_, _}} = fabric2_db:update_doc(Db1, Doc2), + couch_jobs:finish(Db1, Job, #{<<"d">> => 1}) + end)), + ok = couch_jobs:remove(#{tx => undefined}, T1, J1), + ok = fabric2_db:delete(DbName, []) + end). + + +metadata_version_bump(_) -> + ?_test(begin + JTx1 = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(Tx) -> Tx end), + ?assertMatch(#{md_version := not_found}, JTx1), + + ets:delete_all_objects(couch_jobs_fdb), + couch_jobs_fdb:bump_metadata_version(), + JTx2 = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(Tx) -> Tx end), + ?assertMatch(#{md_version := Bin} when is_binary(Bin), JTx2), + + ets:delete_all_objects(couch_jobs_fdb), + couch_jobs_fdb:bump_metadata_version(), + JTx3 = couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(Tx) -> Tx end), + OldMdv = maps:get(md_version, JTx2), + NewMdv = maps:get(md_version, JTx3), + ?assert(NewMdv > OldMdv) + end). diff --git a/src/couch_js/README.md b/src/couch_js/README.md new file mode 100644 index 000000000..4084b7d8e --- /dev/null +++ b/src/couch_js/README.md @@ -0,0 +1,6 @@ +couch_js +=== + +This application is just an isolation of most of the code required for running couchjs. + +For the time being I'm not moving the implementation of couchjs due to the specifics of the build system configuration. Once we go to remove the `couch` application we'll have to revisit that approach.
\ No newline at end of file diff --git a/src/couch_js/src/couch_js.app.src b/src/couch_js/src/couch_js.app.src new file mode 100644 index 000000000..44efd6d7d --- /dev/null +++ b/src/couch_js/src/couch_js.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_js, [ + {description, "An OTP application"}, + {vsn, git}, + {registered, [ + couch_js_proc_manager + ]}, + {mod, {couch_js_app, []}}, + {applications, [ + kernel, + stdlib, + config, + couch_log, + ioq + ]} + ]}. diff --git a/src/couch_js/src/couch_js.erl b/src/couch_js/src/couch_js.erl new file mode 100644 index 000000000..1bc0f1927 --- /dev/null +++ b/src/couch_js/src/couch_js.erl @@ -0,0 +1,51 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_js). +-behavior(couch_eval). + + +-export([ + acquire_map_context/1, + release_map_context/1, + map_docs/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-define(JS, <<"javascript">>). + + +acquire_map_context(Opts) -> + #{ + map_funs := MapFuns, + lib := Lib + } = Opts, + couch_js_query_servers:start_doc_map(?JS, MapFuns, Lib). + + +release_map_context(Proc) -> + couch_js_query_servers:stop_doc_map(Proc). + + +map_docs(Proc, Docs) -> + {ok, lists:map(fun(Doc) -> + {ok, RawResults} = couch_js_query_servers:map_doc_raw(Proc, Doc), + Results = couch_js_query_servers:raw_to_ejson(RawResults), + Tupled = lists:map(fun(ViewResult) -> + lists:map(fun([K, V]) -> {K, V} end, ViewResult) + end, Results), + {Doc#doc.id, Tupled} + end, Docs)}. diff --git a/src/couch_js/src/couch_js_app.erl b/src/couch_js/src/couch_js_app.erl new file mode 100644 index 000000000..b28f5852e --- /dev/null +++ b/src/couch_js/src/couch_js_app.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_js_app). + + +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_StartType, _StartArgs) -> + couch_js_sup:start_link(). + + +stop(_State) -> + ok.
\ No newline at end of file diff --git a/src/couch_js/src/couch_js_io_logger.erl b/src/couch_js/src/couch_js_io_logger.erl new file mode 100644 index 000000000..5a1695c01 --- /dev/null +++ b/src/couch_js/src/couch_js_io_logger.erl @@ -0,0 +1,107 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_io_logger). + +-export([ + start/1, + log_output/1, + log_input/1, + stop_noerror/0, + stop_error/1 +]). + + +start(undefined) -> + ok; +start(Dir) -> + case filelib:is_dir(Dir) of + true -> + Name = log_name(), + Path = Dir ++ "/" ++ Name, + OPath = Path ++ ".out.log_", + IPath = Path ++ ".in.log_", + {ok, OFd} = file:open(OPath, [read, write, raw]), + {ok, IFd} = file:open(IPath, [read, write, raw]), + ok = file:delete(OPath), + ok = file:delete(IPath), + put(logger_path, Path), + put(logger_out_fd, OFd), + put(logger_in_fd, IFd), + ok; + false -> + ok + end. + + +stop_noerror() -> + case get(logger_path) of + undefined -> + ok; + _Path -> + close_logs() + end. + + +stop_error(Err) -> + case get(logger_path) of + undefined -> + ok; + Path -> + save_error_logs(Path, Err), + close_logs() + end. + + +log_output(Data) -> + log(get(logger_out_fd), Data). + + +log_input(Data) -> + log(get(logger_in_fd), Data). + + +unix_time() -> + {Mega, Sec, USec} = os:timestamp(), + UnixTs = (Mega * 1000000 + Sec) * 1000000 + USec, + integer_to_list(UnixTs). + + +log_name() -> + Ts = unix_time(), + Pid0 = erlang:pid_to_list(self()), + Pid1 = string:strip(Pid0, left, $<), + Pid2 = string:strip(Pid1, right, $>), + lists:flatten(io_lib:format("~s_~s", [Ts, Pid2])). + + +close_logs() -> + file:close(get(logger_out_fd)), + file:close(get(logger_in_fd)). + + +save_error_logs(Path, Err) -> + Otp = erlang:system_info(otp_release), + Msg = io_lib:format("Error: ~p~nNode: ~p~nOTP: ~p~n", [Err, node(), Otp]), + file:write_file(Path ++ ".meta", Msg), + IFd = get(logger_out_fd), + OFd = get(logger_in_fd), + file:position(IFd, 0), + file:position(OFd, 0), + file:copy(IFd, Path ++ ".out.log"), + file:copy(OFd, Path ++ ".in.log"). + + +log(undefined, _Data) -> + ok; +log(Fd, Data) -> + ok = file:write(Fd, [Data, io_lib:nl()]). diff --git a/src/couch_js/src/couch_js_native_process.erl b/src/couch_js/src/couch_js_native_process.erl new file mode 100644 index 000000000..d5ed3f94f --- /dev/null +++ b/src/couch_js/src/couch_js_native_process.erl @@ -0,0 +1,468 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% +% You may obtain a copy of the License at +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, +% software distributed under the License is distributed on an +% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +% either express or implied. +% +% See the License for the specific language governing permissions +% and limitations under the License. +% +% This file drew much inspiration from erlview, which was written by and +% copyright Michael McDaniel [http://autosys.us], and is also under APL 2.0 +% +% +% This module provides the smallest possible native view-server. +% With this module in-place, you can add the following to your couch INI files: +% [native_query_servers] +% erlang={couch_native_process, start_link, []} +% +% Which will then allow following example map function to be used: +% +% fun({Doc}) -> +% % Below, we emit a single record - the _id as key, null as value +% DocId = couch_util:get_value(<<"_id">>, Doc, null), +% Emit(DocId, null) +% end. +% +% which should be roughly the same as the javascript: +% emit(doc._id, null); +% +% This module exposes enough functions such that a native erlang server can +% act as a fully-fleged view server, but no 'helper' functions specifically +% for simplifying your erlang view code. It is expected other third-party +% extensions will evolve which offer useful layers on top of this view server +% to help simplify your view code. +-module(couch_js_native_process). +-behaviour(gen_server). +-vsn(1). + +-export([start_link/0,init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3, + handle_info/2,format_status/2]). +-export([set_timeout/2, prompt/2]). + +-define(STATE, native_proc_state). +-record(evstate, { + ddocs, + funs = [], + query_config = [], + list_pid = nil, + timeout = 5000, + idle = 5000 +}). + +-include_lib("couch/include/couch_db.hrl"). + +start_link() -> + gen_server:start_link(?MODULE, [], []). + +% this is a bit messy, see also couch_query_servers handle_info +% stop(_Pid) -> +% ok. + +set_timeout(Pid, TimeOut) -> + gen_server:call(Pid, {set_timeout, TimeOut}). + +prompt(Pid, Data) when is_list(Data) -> + gen_server:call(Pid, {prompt, Data}). + +% gen_server callbacks +init([]) -> + V = config:get("query_server_config", "os_process_idle_limit", "300"), + Idle = list_to_integer(V) * 1000, + {ok, #evstate{ddocs=dict:new(), idle=Idle}, Idle}. + +handle_call({set_timeout, TimeOut}, _From, State) -> + {reply, ok, State#evstate{timeout=TimeOut}, State#evstate.idle}; + +handle_call({prompt, Data}, _From, State) -> + couch_log:debug("Prompt native qs: ~s",[?JSON_ENCODE(Data)]), + {NewState, Resp} = try run(State, to_binary(Data)) of + {S, R} -> {S, R} + catch + throw:{error, Why} -> + {State, [<<"error">>, Why, Why]} + end, + + Idle = State#evstate.idle, + case Resp of + {error, Reason} -> + Msg = io_lib:format("couch native server error: ~p", [Reason]), + Error = [<<"error">>, <<"native_query_server">>, list_to_binary(Msg)], + {reply, Error, NewState, Idle}; + [<<"error">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {reply, [<<"error">> | Rest], NewState, Idle}; + [<<"fatal">> | Rest] -> + % Msg = io_lib:format("couch native server error: ~p", [Rest]), + % TODO: markh? (jan) + {stop, fatal, [<<"error">> | Rest], NewState}; + Resp -> + {reply, Resp, NewState, Idle} + end. + +handle_cast(garbage_collect, State) -> + erlang:garbage_collect(), + {noreply, State, State#evstate.idle}; +handle_cast(stop, State) -> + {stop, normal, State}; +handle_cast(_Msg, State) -> + {noreply, State, State#evstate.idle}. + +handle_info(timeout, State) -> + gen_server:cast(couch_js_proc_manager, {os_proc_idle, self()}), + erlang:garbage_collect(), + {noreply, State, State#evstate.idle}; +handle_info({'EXIT',_,normal}, State) -> + {noreply, State, State#evstate.idle}; +handle_info({'EXIT',_,Reason}, State) -> + {stop, Reason, State}. +terminate(_Reason, _State) -> ok. +code_change(_OldVersion, State, _Extra) -> {ok, State}. + +format_status(_Opt, [_PDict, State]) -> + #evstate{ + ddocs = DDocs, + funs = Funs, + query_config = Config + } = State, + Scrubbed = State#evstate{ + ddocs = {dict_size, dict:size(DDocs)}, + funs = {length, length(Funs)}, + query_config = {length, length(Config)} + }, + [{data, [{"State", + ?record_to_keyval(evstate, Scrubbed) + }]}]. + + +run(#evstate{list_pid=Pid}=State, [<<"list_row">>, Row]) when is_pid(Pid) -> + Pid ! {self(), list_row, Row}, + receive + {Pid, chunks, Data} -> + {State, [<<"chunks">>, Data]}; + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, [<<"end">>, Data]} + after State#evstate.timeout -> + throw({timeout, list_row}) + end; +run(#evstate{list_pid=Pid}=State, [<<"list_end">>]) when is_pid(Pid) -> + Pid ! {self(), list_end}, + Resp = + receive + {Pid, list_end, Data} -> + receive + {'EXIT', Pid, normal} -> ok + after State#evstate.timeout -> + throw({timeout, list_cleanup}) + end, + [<<"end">>, Data] + after State#evstate.timeout -> + throw({timeout, list_end}) + end, + process_flag(trap_exit, erlang:get(do_trap)), + {State#evstate{list_pid=nil}, Resp}; +run(#evstate{list_pid=Pid}=State, _Command) when is_pid(Pid) -> + {State, [<<"error">>, list_error, list_error]}; +run(#evstate{ddocs=DDocs}, [<<"reset">>]) -> + {#evstate{ddocs=DDocs}, true}; +run(#evstate{ddocs=DDocs, idle=Idle}, [<<"reset">>, QueryConfig]) -> + NewState = #evstate{ + ddocs = DDocs, + query_config = QueryConfig, + idle = Idle + }, + {NewState, true}; +run(#evstate{funs=Funs}=State, [<<"add_fun">> , BinFunc]) -> + FunInfo = makefun(State, BinFunc), + {State#evstate{funs=Funs ++ [FunInfo]}, true}; +run(State, [<<"map_doc">> , Doc]) -> + Resp = lists:map(fun({Sig, Fun}) -> + erlang:put(Sig, []), + Fun(Doc), + lists:reverse(erlang:get(Sig)) + end, State#evstate.funs), + {State, Resp}; +run(State, [<<"reduce">>, Funs, KVs]) -> + {Keys, Vals} = + lists:foldl(fun([K, V], {KAcc, VAcc}) -> + {[K | KAcc], [V | VAcc]} + end, {[], []}, KVs), + Keys2 = lists:reverse(Keys), + Vals2 = lists:reverse(Vals), + {State, catch reduce(State, Funs, Keys2, Vals2, false)}; +run(State, [<<"rereduce">>, Funs, Vals]) -> + {State, catch reduce(State, Funs, null, Vals, true)}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, <<"new">>, DDocId, DDoc]) -> + DDocs2 = store_ddoc(DDocs, DDocId, DDoc), + {State#evstate{ddocs=DDocs2}, true}; +run(#evstate{ddocs=DDocs}=State, [<<"ddoc">>, DDocId | Rest]) -> + DDoc = load_ddoc(DDocs, DDocId), + ddoc(State, DDoc, Rest); +run(_, Unknown) -> + couch_log:error("Native Process: Unknown command: ~p~n", [Unknown]), + throw({error, unknown_command}). + +ddoc(State, {DDoc}, [FunPath, Args]) -> + % load fun from the FunPath + BFun = lists:foldl(fun + (Key, {Props}) when is_list(Props) -> + couch_util:get_value(Key, Props, nil); + (_Key, Fun) when is_binary(Fun) -> + Fun; + (_Key, nil) -> + throw({error, not_found}); + (_Key, _Fun) -> + throw({error, malformed_ddoc}) + end, {DDoc}, FunPath), + ddoc(State, makefun(State, BFun, {DDoc}), FunPath, Args). + +ddoc(State, {_, Fun}, [<<"validate_doc_update">>], Args) -> + {State, (catch apply(Fun, Args))}; +ddoc(State, {_, Fun}, [<<"rewrites">>], Args) -> + {State, (catch apply(Fun, Args))}; +ddoc(State, {_, Fun}, [<<"filters">>|_], [Docs, Req]) -> + FilterFunWrapper = fun(Doc) -> + case catch Fun(Doc, Req) of + true -> true; + false -> false; + {'EXIT', Error} -> couch_log:error("~p", [Error]) + end + end, + Resp = lists:map(FilterFunWrapper, Docs), + {State, [true, Resp]}; +ddoc(State, {_, Fun}, [<<"views">>|_], [Docs]) -> + MapFunWrapper = fun(Doc) -> + case catch Fun(Doc) of + undefined -> true; + ok -> false; + false -> false; + [_|_] -> true; + {'EXIT', Error} -> couch_log:error("~p", [Error]) + end + end, + Resp = lists:map(MapFunWrapper, Docs), + {State, [true, Resp]}; +ddoc(State, {_, Fun}, [<<"shows">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + FunResp when is_list(FunResp) -> + FunResp; + {FunResp} -> + [<<"resp">>, {FunResp}]; + FunResp -> + FunResp + end, + {State, Resp}; +ddoc(State, {_, Fun}, [<<"updates">>|_], Args) -> + Resp = case (catch apply(Fun, Args)) of + [JsonDoc, JsonResp] -> + [<<"up">>, JsonDoc, JsonResp] + end, + {State, Resp}; +ddoc(State, {Sig, Fun}, [<<"lists">>|_], Args) -> + Self = self(), + SpawnFun = fun() -> + LastChunk = (catch apply(Fun, Args)), + case start_list_resp(Self, Sig) of + started -> + receive + {Self, list_row, _Row} -> ignore; + {Self, list_end} -> ignore + after State#evstate.timeout -> + throw({timeout, list_cleanup_pid}) + end; + _ -> + ok + end, + LastChunks = + case erlang:get(Sig) of + undefined -> [LastChunk]; + OtherChunks -> [LastChunk | OtherChunks] + end, + Self ! {self(), list_end, lists:reverse(LastChunks)} + end, + erlang:put(do_trap, process_flag(trap_exit, true)), + Pid = spawn_link(SpawnFun), + Resp = + receive + {Pid, start, Chunks, JsonResp} -> + [<<"start">>, Chunks, JsonResp] + after State#evstate.timeout -> + throw({timeout, list_start}) + end, + {State#evstate{list_pid=Pid}, Resp}. + +store_ddoc(DDocs, DDocId, DDoc) -> + dict:store(DDocId, DDoc, DDocs). +load_ddoc(DDocs, DDocId) -> + try dict:fetch(DDocId, DDocs) of + {DDoc} -> {DDoc} + catch + _:_Else -> throw({error, ?l2b(io_lib:format("Native Query Server missing DDoc with Id: ~s",[DDocId]))}) + end. + +bindings(State, Sig) -> + bindings(State, Sig, nil). +bindings(State, Sig, DDoc) -> + Self = self(), + + Log = fun(Msg) -> + couch_log:info(Msg, []) + end, + + Emit = fun(Id, Value) -> + Curr = erlang:get(Sig), + erlang:put(Sig, [[Id, Value] | Curr]) + end, + + Start = fun(Headers) -> + erlang:put(list_headers, Headers) + end, + + Send = fun(Chunk) -> + Curr = + case erlang:get(Sig) of + undefined -> []; + Else -> Else + end, + erlang:put(Sig, [Chunk | Curr]) + end, + + GetRow = fun() -> + case start_list_resp(Self, Sig) of + started -> + ok; + _ -> + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), chunks, lists:reverse(Chunks)} + end, + erlang:put(Sig, []), + receive + {Self, list_row, Row} -> Row; + {Self, list_end} -> nil + after State#evstate.timeout -> + throw({timeout, list_pid_getrow}) + end + end, + + FoldRows = fun(Fun, Acc) -> foldrows(GetRow, Fun, Acc) end, + + Bindings = [ + {'Log', Log}, + {'Emit', Emit}, + {'Start', Start}, + {'Send', Send}, + {'GetRow', GetRow}, + {'FoldRows', FoldRows} + ], + case DDoc of + {_Props} -> + Bindings ++ [{'DDoc', DDoc}]; + _Else -> Bindings + end. + +% thanks to erlview, via: +% http://erlang.org/pipermail/erlang-questions/2003-November/010544.html +makefun(State, Source) -> + Sig = couch_hash:md5_hash(Source), + BindFuns = bindings(State, Sig), + {Sig, makefun(State, Source, BindFuns)}. +makefun(State, Source, {DDoc}) -> + Sig = couch_hash:md5_hash(lists:flatten([Source, term_to_binary(DDoc)])), + BindFuns = bindings(State, Sig, {DDoc}), + {Sig, makefun(State, Source, BindFuns)}; +makefun(_State, Source, BindFuns) when is_list(BindFuns) -> + FunStr = binary_to_list(Source), + {ok, Tokens, _} = erl_scan:string(FunStr), + Form = case (catch erl_parse:parse_exprs(Tokens)) of + {ok, [ParsedForm]} -> + ParsedForm; + {error, {LineNum, _Mod, [Mesg, Params]}}=Error -> + couch_log:error("Syntax error on line: ~p~n~s~p~n", + [LineNum, Mesg, Params]), + throw(Error) + end, + Bindings = lists:foldl(fun({Name, Fun}, Acc) -> + erl_eval:add_binding(Name, Fun, Acc) + end, erl_eval:new_bindings(), BindFuns), + {value, Fun, _} = erl_eval:expr(Form, Bindings), + Fun. + +reduce(State, BinFuns, Keys, Vals, ReReduce) -> + Funs = case is_list(BinFuns) of + true -> + lists:map(fun(BF) -> makefun(State, BF) end, BinFuns); + _ -> + [makefun(State, BinFuns)] + end, + Reds = lists:map(fun({_Sig, Fun}) -> + Fun(Keys, Vals, ReReduce) + end, Funs), + [true, Reds]. + +foldrows(GetRow, ProcRow, Acc) -> + case GetRow() of + nil -> + {ok, Acc}; + Row -> + case (catch ProcRow(Row, Acc)) of + {ok, Acc2} -> + foldrows(GetRow, ProcRow, Acc2); + {stop, Acc2} -> + {ok, Acc2} + end + end. + +start_list_resp(Self, Sig) -> + case erlang:get(list_started) of + undefined -> + Headers = + case erlang:get(list_headers) of + undefined -> {[{<<"headers">>, {[]}}]}; + CurrHdrs -> CurrHdrs + end, + Chunks = + case erlang:get(Sig) of + undefined -> []; + CurrChunks -> CurrChunks + end, + Self ! {self(), start, lists:reverse(Chunks), Headers}, + erlang:put(list_started, true), + erlang:put(Sig, []), + started; + _ -> + ok + end. + +to_binary({Data}) -> + Pred = fun({Key, Value}) -> + {to_binary(Key), to_binary(Value)} + end, + {lists:map(Pred, Data)}; +to_binary(Data) when is_list(Data) -> + [to_binary(D) || D <- Data]; +to_binary(null) -> + null; +to_binary(true) -> + true; +to_binary(false) -> + false; +to_binary(Data) when is_atom(Data) -> + list_to_binary(atom_to_list(Data)); +to_binary(Data) -> + Data. diff --git a/src/couch_js/src/couch_js_os_process.erl b/src/couch_js/src/couch_js_os_process.erl new file mode 100644 index 000000000..a453d1ab2 --- /dev/null +++ b/src/couch_js/src/couch_js_os_process.erl @@ -0,0 +1,265 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_os_process). +-behaviour(gen_server). +-vsn(1). + +-export([start_link/1, start_link/2, start_link/3, stop/1]). +-export([set_timeout/2, prompt/2, killer/1]). +-export([send/2, writeline/2, readline/1, writejson/2, readjson/1]). +-export([init/1, terminate/2, handle_call/3, handle_cast/2, handle_info/2, code_change/3]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(PORT_OPTIONS, [stream, {line, 4096}, binary, exit_status, hide]). + +-record(os_proc, + {command, + port, + writer, + reader, + timeout=5000, + idle + }). + +start_link(Command) -> + start_link(Command, []). +start_link(Command, Options) -> + start_link(Command, Options, ?PORT_OPTIONS). +start_link(Command, Options, PortOptions) -> + gen_server:start_link(?MODULE, [Command, Options, PortOptions], []). + +stop(Pid) -> + gen_server:cast(Pid, stop). + +% Read/Write API +set_timeout(Pid, TimeOut) when is_integer(TimeOut) -> + ok = gen_server:call(Pid, {set_timeout, TimeOut}, infinity). + +% Used by couch_event_os_process.erl +send(Pid, Data) -> + gen_server:cast(Pid, {send, Data}). + +prompt(Pid, Data) -> + case ioq:call(Pid, {prompt, Data}, erlang:get(io_priority)) of + {ok, Result} -> + Result; + Error -> + couch_log:error("OS Process Error ~p :: ~p",[Pid,Error]), + throw(Error) + end. + +% Utility functions for reading and writing +% in custom functions +writeline(OsProc, Data) when is_record(OsProc, os_proc) -> + Res = port_command(OsProc#os_proc.port, [Data, $\n]), + couch_js_io_logger:log_output(Data), + Res. + +readline(#os_proc{} = OsProc) -> + Res = readline(OsProc, []), + couch_js_io_logger:log_input(Res), + Res. +readline(#os_proc{port = Port} = OsProc, Acc) -> + receive + {Port, {data, {noeol, Data}}} when is_binary(Acc) -> + readline(OsProc, <<Acc/binary,Data/binary>>); + {Port, {data, {noeol, Data}}} when is_binary(Data) -> + readline(OsProc, Data); + {Port, {data, {noeol, Data}}} -> + readline(OsProc, [Data|Acc]); + {Port, {data, {eol, <<Data/binary>>}}} when is_binary(Acc) -> + [<<Acc/binary,Data/binary>>]; + {Port, {data, {eol, Data}}} when is_binary(Data) -> + [Data]; + {Port, {data, {eol, Data}}} -> + lists:reverse(Acc, Data); + {Port, Err} -> + catch port_close(Port), + throw({os_process_error, Err}) + after OsProc#os_proc.timeout -> + catch port_close(Port), + throw({os_process_error, "OS process timed out."}) + end. + +% Standard JSON functions +writejson(OsProc, Data) when is_record(OsProc, os_proc) -> + JsonData = ?JSON_ENCODE(Data), + couch_log:debug("OS Process ~p Input :: ~s", + [OsProc#os_proc.port, JsonData]), + true = writeline(OsProc, JsonData). + +readjson(OsProc) when is_record(OsProc, os_proc) -> + Line = iolist_to_binary(readline(OsProc)), + couch_log:debug("OS Process ~p Output :: ~s", [OsProc#os_proc.port, Line]), + try + % Don't actually parse the whole JSON. Just try to see if it's + % a command or a doc map/reduce/filter/show/list/update output. + % If it's a command then parse the whole JSON and execute the + % command, otherwise return the raw JSON line to the caller. + pick_command(Line) + catch + throw:abort -> + {json, Line}; + throw:{cmd, _Cmd} -> + case ?JSON_DECODE(Line) of + [<<"log">>, Msg] when is_binary(Msg) -> + % we got a message to log. Log it and continue + couch_log:info("OS Process ~p Log :: ~s", + [OsProc#os_proc.port, Msg]), + readjson(OsProc); + [<<"error">>, Id, Reason] -> + throw({error, {couch_util:to_existing_atom(Id),Reason}}); + [<<"fatal">>, Id, Reason] -> + couch_log:info("OS Process ~p Fatal Error :: ~s ~p", + [OsProc#os_proc.port, Id, Reason]), + throw({couch_util:to_existing_atom(Id),Reason}); + _Result -> + {json, Line} + end + end. + +pick_command(Line) -> + json_stream_parse:events(Line, fun pick_command0/1). + +pick_command0(array_start) -> + fun pick_command1/1; +pick_command0(_) -> + throw(abort). + +pick_command1(<<"log">> = Cmd) -> + throw({cmd, Cmd}); +pick_command1(<<"error">> = Cmd) -> + throw({cmd, Cmd}); +pick_command1(<<"fatal">> = Cmd) -> + throw({cmd, Cmd}); +pick_command1(_) -> + throw(abort). + + +% gen_server API +init([Command, Options, PortOptions]) -> + couch_js_io_logger:start(os:getenv("COUCHDB_IO_LOG_DIR")), + PrivDir = couch_util:priv_dir(), + Spawnkiller = "\"" ++ filename:join(PrivDir, "couchspawnkillable") ++ "\"", + V = config:get("query_server_config", "os_process_idle_limit", "300"), + IdleLimit = list_to_integer(V) * 1000, + BaseProc = #os_proc{ + command=Command, + port=open_port({spawn, Spawnkiller ++ " " ++ Command}, PortOptions), + writer=fun ?MODULE:writejson/2, + reader=fun ?MODULE:readjson/1, + idle=IdleLimit + }, + KillCmd = iolist_to_binary(readline(BaseProc)), + Pid = self(), + couch_log:debug("OS Process Start :: ~p", [BaseProc#os_proc.port]), + spawn(fun() -> + % this ensure the real os process is killed when this process dies. + erlang:monitor(process, Pid), + killer(?b2l(KillCmd)) + end), + OsProc = + lists:foldl(fun(Opt, Proc) -> + case Opt of + {writer, Writer} when is_function(Writer) -> + Proc#os_proc{writer=Writer}; + {reader, Reader} when is_function(Reader) -> + Proc#os_proc{reader=Reader}; + {timeout, TimeOut} when is_integer(TimeOut) -> + Proc#os_proc{timeout=TimeOut} + end + end, BaseProc, Options), + {ok, OsProc, IdleLimit}. + +terminate(Reason, #os_proc{port=Port}) -> + catch port_close(Port), + case Reason of + normal -> + couch_js_io_logger:stop_noerror(); + Error -> + couch_js_io_logger:stop_error(Error) + end, + ok. + +handle_call({set_timeout, TimeOut}, _From, #os_proc{idle=Idle}=OsProc) -> + {reply, ok, OsProc#os_proc{timeout=TimeOut}, Idle}; +handle_call({prompt, Data}, _From, #os_proc{idle=Idle}=OsProc) -> + #os_proc{writer=Writer, reader=Reader} = OsProc, + try + Writer(OsProc, Data), + {reply, {ok, Reader(OsProc)}, OsProc, Idle} + catch + throw:{error, OsError} -> + {reply, OsError, OsProc, Idle}; + throw:{fatal, OsError} -> + {stop, normal, OsError, OsProc}; + throw:OtherError -> + {stop, normal, OtherError, OsProc} + after + garbage_collect() + end. + +handle_cast({send, Data}, #os_proc{writer=Writer, idle=Idle}=OsProc) -> + try + Writer(OsProc, Data), + {noreply, OsProc, Idle} + catch + throw:OsError -> + couch_log:error("Failed sending data: ~p -> ~p", [Data, OsError]), + {stop, normal, OsProc} + end; +handle_cast(garbage_collect, #os_proc{idle=Idle}=OsProc) -> + erlang:garbage_collect(), + {noreply, OsProc, Idle}; +handle_cast(stop, OsProc) -> + {stop, normal, OsProc}; +handle_cast(Msg, #os_proc{idle=Idle}=OsProc) -> + couch_log:debug("OS Proc: Unknown cast: ~p", [Msg]), + {noreply, OsProc, Idle}. + +handle_info(timeout, #os_proc{idle=Idle}=OsProc) -> + gen_server:cast(couch_js_proc_manager, {os_proc_idle, self()}), + erlang:garbage_collect(), + {noreply, OsProc, Idle}; +handle_info({Port, {exit_status, 0}}, #os_proc{port=Port}=OsProc) -> + couch_log:info("OS Process terminated normally", []), + {stop, normal, OsProc}; +handle_info({Port, {exit_status, Status}}, #os_proc{port=Port}=OsProc) -> + couch_log:error("OS Process died with status: ~p", [Status]), + {stop, {exit_status, Status}, OsProc}; +handle_info(Msg, #os_proc{idle=Idle}=OsProc) -> + couch_log:debug("OS Proc: Unknown info: ~p", [Msg]), + {noreply, OsProc, Idle}. + +code_change(_, {os_proc, Cmd, Port, W, R, Timeout} , _) -> + V = config:get("query_server_config","os_process_idle_limit","300"), + State = #os_proc{ + command = Cmd, + port = Port, + writer = W, + reader = R, + timeout = Timeout, + idle = list_to_integer(V) * 1000 + }, + {ok, State}; +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +killer(KillCmd) -> + receive _ -> + os:cmd(KillCmd) + after 1000 -> + ?MODULE:killer(KillCmd) + end. + diff --git a/src/couch_js/src/couch_js_proc_manager.erl b/src/couch_js/src/couch_js_proc_manager.erl new file mode 100644 index 000000000..db5c492f5 --- /dev/null +++ b/src/couch_js/src/couch_js_proc_manager.erl @@ -0,0 +1,615 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_proc_manager). +-behaviour(gen_server). +-behaviour(config_listener). +-vsn(1). + +-export([ + start_link/0, + get_proc_count/0, + get_stale_proc_count/0, + new_proc/1, + reload/0, + terminate_stale_procs/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3, + format_status/2 +]). + +-export([ + handle_config_change/5, + handle_config_terminate/3 +]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(PROCS, couch_js_proc_manager_procs). +-define(WAITERS, couch_js_proc_manager_waiters). +-define(OPENING, couch_js_proc_manager_opening). +-define(SERVERS, couch_js_proc_manager_servers). +-define(RELISTEN_DELAY, 5000). + +-record(state, { + config, + counts, + threshold_ts, + hard_limit, + soft_limit +}). + +-type docid() :: iodata(). +-type revision() :: {integer(), binary()}. + +-record(client, { + timestamp :: os:timestamp() | '_', + from :: undefined | {pid(), reference()} | '_', + lang :: binary() | '_', + ddoc :: #doc{} | '_', + ddoc_key :: undefined | {DDocId :: docid(), Rev :: revision()} | '_' +}). + +-record(proc_int, { + pid, + lang, + client, + ddoc_keys = [], + prompt_fun, + set_timeout_fun, + stop_fun, + t0 = os:timestamp() +}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +get_proc_count() -> + gen_server:call(?MODULE, get_proc_count). + + +get_stale_proc_count() -> + gen_server:call(?MODULE, get_stale_proc_count). + + +reload() -> + gen_server:call(?MODULE, set_threshold_ts). + + +terminate_stale_procs() -> + gen_server:call(?MODULE, terminate_stale_procs). + + +init([]) -> + process_flag(trap_exit, true), + ok = config:listen_for_changes(?MODULE, undefined), + + TableOpts = [public, named_table, ordered_set], + ets:new(?PROCS, TableOpts ++ [{keypos, #proc_int.pid}]), + ets:new(?WAITERS, TableOpts ++ [{keypos, #client.timestamp}]), + ets:new(?OPENING, [public, named_table, set]), + ets:new(?SERVERS, [public, named_table, set]), + ets:insert(?SERVERS, get_servers_from_env("COUCHDB_QUERY_SERVER_")), + ets:insert(?SERVERS, get_servers_from_env("COUCHDB_NATIVE_QUERY_SERVER_")), + maybe_configure_erlang_native_servers(), + + {ok, #state{ + config = get_proc_config(), + counts = dict:new(), + threshold_ts = os:timestamp(), + hard_limit = get_hard_limit(), + soft_limit = get_soft_limit() + }}. + + +terminate(_Reason, _State) -> + ets:foldl(fun(#proc_int{pid=P}, _) -> + couch_util:shutdown_sync(P) + end, 0, ?PROCS), + ok. + + +handle_call(get_proc_count, _From, State) -> + NumProcs = ets:info(?PROCS, size), + NumOpening = ets:info(?OPENING, size), + {reply, NumProcs + NumOpening, State}; + +handle_call(get_stale_proc_count, _From, State) -> + #state{threshold_ts = T0} = State, + MatchSpec = [{#proc_int{t0='$1', _='_'}, [{'<', '$1', {T0}}], [true]}], + {reply, ets:select_count(?PROCS, MatchSpec), State}; + +handle_call({get_proc, #doc{body={Props}}=DDoc, DDocKey}, From, State) -> + LangStr = couch_util:get_value(<<"language">>, Props, <<"javascript">>), + Lang = couch_util:to_binary(LangStr), + Client = #client{from=From, lang=Lang, ddoc=DDoc, ddoc_key=DDocKey}, + add_waiting_client(Client), + {noreply, flush_waiters(State, Lang)}; + +handle_call({get_proc, LangStr}, From, State) -> + Lang = couch_util:to_binary(LangStr), + Client = #client{from=From, lang=Lang}, + add_waiting_client(Client), + {noreply, flush_waiters(State, Lang)}; + +handle_call({ret_proc, #proc{client=Ref} = Proc}, _From, State) -> + erlang:demonitor(Ref, [flush]), + NewState = case ets:lookup(?PROCS, Proc#proc.pid) of + [#proc_int{}=ProcInt] -> + return_proc(State, ProcInt); + [] -> + % Proc must've died and we already + % cleared it out of the table in + % the handle_info clause. + State + end, + {reply, true, NewState}; + +handle_call(set_threshold_ts, _From, State) -> + FoldFun = fun + (#proc_int{client = undefined} = Proc, StateAcc) -> + remove_proc(StateAcc, Proc); + (_, StateAcc) -> + StateAcc + end, + NewState = ets:foldl(FoldFun, State, ?PROCS), + {reply, ok, NewState#state{threshold_ts = os:timestamp()}}; + +handle_call(terminate_stale_procs, _From, #state{threshold_ts = Ts1} = State) -> + FoldFun = fun + (#proc_int{client = undefined, t0 = Ts2} = Proc, StateAcc) -> + case Ts1 > Ts2 of + true -> + remove_proc(StateAcc, Proc); + false -> + StateAcc + end; + (_, StateAcc) -> + StateAcc + end, + NewState = ets:foldl(FoldFun, State, ?PROCS), + {reply, ok, NewState}; + +handle_call(_Call, _From, State) -> + {reply, ignored, State}. + + +handle_cast({os_proc_idle, Pid}, #state{counts=Counts}=State) -> + NewState = case ets:lookup(?PROCS, Pid) of + [#proc_int{client=undefined, lang=Lang}=Proc] -> + case dict:find(Lang, Counts) of + {ok, Count} when Count >= State#state.soft_limit -> + couch_log:info("Closing idle OS Process: ~p", [Pid]), + remove_proc(State, Proc); + {ok, _} -> + State + end; + _ -> + State + end, + {noreply, NewState}; + +handle_cast(reload_config, State) -> + NewState = State#state{ + config = get_proc_config(), + hard_limit = get_hard_limit(), + soft_limit = get_soft_limit() + }, + maybe_configure_erlang_native_servers(), + {noreply, flush_waiters(NewState)}; + +handle_cast(_Msg, State) -> + {noreply, State}. + + +handle_info(shutdown, State) -> + {stop, shutdown, State}; + +handle_info({'EXIT', Pid, {spawn_ok, Proc0, {ClientPid,_} = From}}, State) -> + ets:delete(?OPENING, Pid), + link(Proc0#proc_int.pid), + Proc = assign_proc(ClientPid, Proc0), + gen_server:reply(From, {ok, Proc, State#state.config}), + {noreply, State}; + +handle_info({'EXIT', Pid, spawn_error}, State) -> + [{Pid, #client{lang=Lang}}] = ets:lookup(?OPENING, Pid), + ets:delete(?OPENING, Pid), + NewState = State#state{ + counts = dict:update_counter(Lang, -1, State#state.counts) + }, + {noreply, flush_waiters(NewState, Lang)}; + +handle_info({'EXIT', Pid, Reason}, State) -> + couch_log:info("~p ~p died ~p", [?MODULE, Pid, Reason]), + case ets:lookup(?PROCS, Pid) of + [#proc_int{} = Proc] -> + NewState = remove_proc(State, Proc), + {noreply, flush_waiters(NewState, Proc#proc_int.lang)}; + [] -> + {noreply, State} + end; + +handle_info({'DOWN', Ref, _, _, _Reason}, State0) -> + case ets:match_object(?PROCS, #proc_int{client=Ref, _='_'}) of + [#proc_int{} = Proc] -> + {noreply, return_proc(State0, Proc)}; + [] -> + {noreply, State0} + end; + + +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; + +handle_info(_Msg, State) -> + {noreply, State}. + + +code_change(_OldVsn, #state{}=State, _Extra) -> + {ok, State}. + + +format_status(_Opt, [_PDict, State]) -> + #state{ + counts = Counts + } = State, + Scrubbed = State#state{ + counts = {dict_size, dict:size(Counts)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + + +handle_config_terminate(_, stop, _) -> + ok; +handle_config_terminate(_Server, _Reason, _State) -> + gen_server:cast(?MODULE, reload_config), + erlang:send_after(?RELISTEN_DELAY, whereis(?MODULE), restart_config_listener). + +handle_config_change("native_query_servers", _, _, _, _) -> + gen_server:cast(?MODULE, reload_config), + {ok, undefined}; +handle_config_change("query_server_config", _, _, _, _) -> + gen_server:cast(?MODULE, reload_config), + {ok, undefined}; +handle_config_change(_, _, _, _, _) -> + {ok, undefined}. + + +find_proc(#client{lang = Lang, ddoc_key = undefined}) -> + Pred = fun(_) -> + true + end, + find_proc(Lang, Pred); +find_proc(#client{lang = Lang, ddoc = DDoc, ddoc_key = DDocKey} = Client) -> + Pred = fun(#proc_int{ddoc_keys = DDocKeys}) -> + lists:member(DDocKey, DDocKeys) + end, + case find_proc(Lang, Pred) of + not_found -> + case find_proc(Client#client{ddoc_key=undefined}) of + {ok, Proc} -> + teach_ddoc(DDoc, DDocKey, Proc); + Else -> + Else + end; + Else -> + Else + end. + +find_proc(Lang, Fun) -> + try iter_procs(Lang, Fun) + catch error:Reason -> + StackTrace = erlang:get_stacktrace(), + couch_log:error("~p ~p ~p", [?MODULE, Reason, StackTrace]), + {error, Reason} + end. + + +iter_procs(Lang, Fun) when is_binary(Lang) -> + Pattern = #proc_int{lang=Lang, client=undefined, _='_'}, + MSpec = [{Pattern, [], ['$_']}], + case ets:select_reverse(?PROCS, MSpec, 25) of + '$end_of_table' -> + not_found; + Continuation -> + iter_procs_int(Continuation, Fun) + end. + + +iter_procs_int({[], Continuation0}, Fun) -> + case ets:select_reverse(Continuation0) of + '$end_of_table' -> + not_found; + Continuation1 -> + iter_procs_int(Continuation1, Fun) + end; +iter_procs_int({[Proc | Rest], Continuation}, Fun) -> + case Fun(Proc) of + true -> + {ok, Proc}; + false -> + iter_procs_int({Rest, Continuation}, Fun) + end. + + +spawn_proc(State, Client) -> + Pid = spawn_link(?MODULE, new_proc, [Client]), + ets:insert(?OPENING, {Pid, Client}), + Counts = State#state.counts, + Lang = Client#client.lang, + State#state{ + counts = dict:update_counter(Lang, 1, Counts) + }. + + +new_proc(#client{ddoc=undefined, ddoc_key=undefined}=Client) -> + #client{from=From, lang=Lang} = Client, + Resp = try + case new_proc_int(From, Lang) of + {ok, Proc} -> + {spawn_ok, Proc, From}; + Error -> + gen_server:reply(From, {error, Error}), + spawn_error + end + catch _:_ -> + spawn_error + end, + exit(Resp); + +new_proc(Client) -> + #client{from=From, lang=Lang, ddoc=DDoc, ddoc_key=DDocKey} = Client, + Resp = try + case new_proc_int(From, Lang) of + {ok, NewProc} -> + {ok, Proc} = teach_ddoc(DDoc, DDocKey, NewProc), + {spawn_ok, Proc, From}; + Error -> + gen_server:reply(From, {error, Error}), + spawn_error + end + catch _:_ -> + spawn_error + end, + exit(Resp). + +split_string_if_longer(String, Pos) -> + case length(String) > Pos of + true -> lists:split(Pos, String); + false -> false + end. + +split_by_char(String, Char) -> + %% 17.5 doesn't have string:split + %% the function doesn't handle errors + %% it is designed to be used only in specific context + Pos = string:chr(String, Char), + {Key, [_Eq | Value]} = lists:split(Pos - 1, String), + {Key, Value}. + +get_servers_from_env(Spec) -> + SpecLen = length(Spec), + % loop over os:getenv(), match SPEC_ + lists:filtermap(fun(EnvStr) -> + case split_string_if_longer(EnvStr, SpecLen) of + {Spec, Rest} -> + {true, split_by_char(Rest, $=)}; + _ -> + false + end + end, os:getenv()). + +get_query_server(LangStr) -> + case ets:lookup(?SERVERS, string:to_upper(LangStr)) of + [{_, Command}] -> Command; + _ -> undefined + end. + +native_query_server_enabled() -> + % 1. [native_query_server] enable_erlang_query_server = true | false + % 2. if [native_query_server] erlang == {couch_native_process, start_link, []} -> pretend true as well + NativeEnabled = config:get_boolean("native_query_servers", "enable_erlang_query_server", false), + NativeLegacyConfig = config:get("native_query_servers", "erlang", ""), + NativeLegacyEnabled = NativeLegacyConfig =:= "{couch_native_process, start_link, []}", + NativeEnabled orelse NativeLegacyEnabled. + +maybe_configure_erlang_native_servers() -> + case native_query_server_enabled() of + true -> + ets:insert(?SERVERS, [ + {"ERLANG", {couch_js_native_process, start_link, []}}]); + _Else -> + ok + end. + +new_proc_int(From, Lang) when is_binary(Lang) -> + LangStr = binary_to_list(Lang), + case get_query_server(LangStr) of + undefined -> + gen_server:reply(From, {unknown_query_language, Lang}); + {M, F, A} -> + {ok, Pid} = apply(M, F, A), + make_proc(Pid, Lang, M); + Command -> + {ok, Pid} = couch_js_os_process:start_link(Command), + make_proc(Pid, Lang, couch_js_os_process) + end. + + +teach_ddoc(DDoc, {DDocId, _Rev}=DDocKey, #proc_int{ddoc_keys=Keys}=Proc) -> + % send ddoc over the wire + % we only share the rev with the client we know to update code + % but it only keeps the latest copy, per each ddoc, around. + true = couch_js_query_servers:proc_prompt( + export_proc(Proc), + [<<"ddoc">>, <<"new">>, DDocId, couch_doc:to_json_obj(DDoc, [])]), + % we should remove any other ddocs keys for this docid + % because the query server overwrites without the rev + Keys2 = [{D,R} || {D,R} <- Keys, D /= DDocId], + % add ddoc to the proc + {ok, Proc#proc_int{ddoc_keys=[DDocKey|Keys2]}}. + + +make_proc(Pid, Lang, Mod) when is_binary(Lang) -> + Proc = #proc_int{ + lang = Lang, + pid = Pid, + prompt_fun = {Mod, prompt}, + set_timeout_fun = {Mod, set_timeout}, + stop_fun = {Mod, stop} + }, + unlink(Pid), + {ok, Proc}. + + +assign_proc(Pid, #proc_int{client=undefined}=Proc0) when is_pid(Pid) -> + Proc = Proc0#proc_int{client = erlang:monitor(process, Pid)}, + ets:insert(?PROCS, Proc), + export_proc(Proc); +assign_proc(#client{}=Client, #proc_int{client=undefined}=Proc) -> + {Pid, _} = Client#client.from, + assign_proc(Pid, Proc). + + +return_proc(#state{} = State, #proc_int{} = ProcInt) -> + #proc_int{pid = Pid, lang = Lang} = ProcInt, + NewState = case is_process_alive(Pid) of true -> + case ProcInt#proc_int.t0 < State#state.threshold_ts of + true -> + remove_proc(State, ProcInt); + false -> + gen_server:cast(Pid, garbage_collect), + true = ets:update_element(?PROCS, Pid, [ + {#proc_int.client, undefined} + ]), + State + end; + false -> + remove_proc(State, ProcInt) + end, + flush_waiters(NewState, Lang). + + +remove_proc(State, #proc_int{}=Proc) -> + ets:delete(?PROCS, Proc#proc_int.pid), + case is_process_alive(Proc#proc_int.pid) of true -> + unlink(Proc#proc_int.pid), + gen_server:cast(Proc#proc_int.pid, stop); + false -> + ok + end, + Counts = State#state.counts, + Lang = Proc#proc_int.lang, + State#state{ + counts = dict:update_counter(Lang, -1, Counts) + }. + + +-spec export_proc(#proc_int{}) -> #proc{}. +export_proc(#proc_int{} = ProcInt) -> + ProcIntList = tuple_to_list(ProcInt), + ProcLen = record_info(size, proc), + [_ | Data] = lists:sublist(ProcIntList, ProcLen), + list_to_tuple([proc | Data]). + + +flush_waiters(State) -> + dict:fold(fun(Lang, Count, StateAcc) -> + case Count < State#state.hard_limit of + true -> + flush_waiters(StateAcc, Lang); + false -> + StateAcc + end + end, State, State#state.counts). + + +flush_waiters(State, Lang) -> + CanSpawn = can_spawn(State, Lang), + case get_waiting_client(Lang) of + #client{from = From} = Client -> + case find_proc(Client) of + {ok, ProcInt} -> + Proc = assign_proc(Client, ProcInt), + gen_server:reply(From, {ok, Proc, State#state.config}), + remove_waiting_client(Client), + flush_waiters(State, Lang); + {error, Error} -> + gen_server:reply(From, {error, Error}), + remove_waiting_client(Client), + flush_waiters(State, Lang); + not_found when CanSpawn -> + NewState = spawn_proc(State, Client), + remove_waiting_client(Client), + flush_waiters(NewState, Lang); + not_found -> + State + end; + undefined -> + State + end. + + +add_waiting_client(Client) -> + ets:insert(?WAITERS, Client#client{timestamp=os:timestamp()}). + +-spec get_waiting_client(Lang :: binary()) -> undefined | #client{}. +get_waiting_client(Lang) -> + case ets:match_object(?WAITERS, #client{lang=Lang, _='_'}, 1) of + '$end_of_table' -> + undefined; + {[#client{}=Client], _} -> + Client + end. + + +remove_waiting_client(#client{timestamp = Timestamp}) -> + ets:delete(?WAITERS, Timestamp). + + +can_spawn(#state{hard_limit = HardLimit, counts = Counts}, Lang) -> + case dict:find(Lang, Counts) of + {ok, Count} -> Count < HardLimit; + error -> true + end. + + +get_proc_config() -> + Limit = config:get("query_server_config", "reduce_limit", "true"), + Timeout = config:get("couchdb", "os_process_timeout", "5000"), + {[ + {<<"reduce_limit">>, list_to_atom(Limit)}, + {<<"timeout">>, list_to_integer(Timeout)} + ]}. + + +get_hard_limit() -> + LimStr = config:get("query_server_config", "os_process_limit", "100"), + list_to_integer(LimStr). + + +get_soft_limit() -> + LimStr = config:get("query_server_config", "os_process_soft_limit", "100"), + list_to_integer(LimStr). diff --git a/src/couch_js/src/couch_js_query_servers.erl b/src/couch_js/src/couch_js_query_servers.erl new file mode 100644 index 000000000..12dc864ea --- /dev/null +++ b/src/couch_js/src/couch_js_query_servers.erl @@ -0,0 +1,683 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_query_servers). + +-export([try_compile/4]). +-export([start_doc_map/3, map_doc_raw/2, stop_doc_map/1, raw_to_ejson/1]). +-export([reduce/3, rereduce/3,validate_doc_update/5]). +-export([filter_docs/5]). +-export([filter_view/3]). +-export([finalize/2]). +-export([rewrite/3]). + +-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). + +% For 210-os-proc-pool.t +-export([get_os_process/1, get_ddoc_process/2, ret_os_process/1]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(SUMERROR, <<"The _sum function requires that map values be numbers, " + "arrays of numbers, or objects. Objects cannot be mixed with other " + "data structures. Objects can be arbitrarily nested, provided that the values " + "for all fields are themselves numbers, arrays of numbers, or objects.">>). + +-define(STATERROR, <<"The _stats function requires that map values be numbers " + "or arrays of numbers, not '~p'">>). + + +try_compile(Proc, FunctionType, FunctionName, FunctionSource) -> + try + proc_prompt(Proc, [<<"add_fun">>, FunctionSource]), + ok + catch + {compilation_error, E} -> + Fmt = "Compilation of the ~s function in the '~s' view failed: ~s", + Msg = io_lib:format(Fmt, [FunctionType, FunctionName, E]), + throw({compilation_error, Msg}); + {os_process_error, {exit_status, ExitStatus}} -> + Fmt = "Compilation of the ~s function in the '~s' view failed with exit status: ~p", + Msg = io_lib:format(Fmt, [FunctionType, FunctionName, ExitStatus]), + throw({compilation_error, Msg}) + end. + +start_doc_map(Lang, Functions, Lib) -> + Proc = get_os_process(Lang), + case Lib of + {[]} -> ok; + Lib -> + true = proc_prompt(Proc, [<<"add_lib">>, Lib]) + end, + lists:foreach(fun(FunctionSource) -> + true = proc_prompt(Proc, [<<"add_fun">>, FunctionSource]) + end, Functions), + {ok, Proc}. + +map_doc_raw(Proc, Doc) -> + Json = couch_doc:to_json_obj(Doc, []), + {ok, proc_prompt_raw(Proc, [<<"map_doc">>, Json])}. + + +stop_doc_map(nil) -> + ok; +stop_doc_map(Proc) -> + ok = ret_os_process(Proc). + +group_reductions_results([]) -> + []; +group_reductions_results(List) -> + {Heads, Tails} = lists:foldl( + fun([H|T], {HAcc,TAcc}) -> + {[H|HAcc], [T|TAcc]} + end, {[], []}, List), + case Tails of + [[]|_] -> % no tails left + [Heads]; + _ -> + [Heads | group_reductions_results(Tails)] + end. + +finalize(<<"_approx_count_distinct",_/binary>>, Reduction) -> + true = hyper:is_hyper(Reduction), + {ok, round(hyper:card(Reduction))}; +finalize(<<"_stats",_/binary>>, Unpacked) -> + {ok, pack_stats(Unpacked)}; +finalize(_RedSrc, Reduction) -> + {ok, Reduction}. + +rereduce(_Lang, [], _ReducedValues) -> + {ok, []}; +rereduce(Lang, RedSrcs, ReducedValues) -> + Grouped = group_reductions_results(ReducedValues), + Results = lists:zipwith( + fun + (<<"_", _/binary>> = FunSrc, Values) -> + {ok, [Result]} = builtin_reduce(rereduce, [FunSrc], [[[], V] || V <- Values], []), + Result; + (FunSrc, Values) -> + os_rereduce(Lang, [FunSrc], Values) + end, RedSrcs, Grouped), + {ok, Results}. + +reduce(_Lang, [], _KVs) -> + {ok, []}; +reduce(Lang, RedSrcs, KVs) -> + {OsRedSrcs, BuiltinReds} = lists:partition(fun + (<<"_", _/binary>>) -> false; + (_OsFun) -> true + end, RedSrcs), + {ok, OsResults} = os_reduce(Lang, OsRedSrcs, KVs), + {ok, BuiltinResults} = builtin_reduce(reduce, BuiltinReds, KVs, []), + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, []). + + +recombine_reduce_results([], [], [], Acc) -> + {ok, lists:reverse(Acc)}; +recombine_reduce_results([<<"_", _/binary>>|RedSrcs], OsResults, [BRes|BuiltinResults], Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [BRes|Acc]); +recombine_reduce_results([_OsFun|RedSrcs], [OsR|OsResults], BuiltinResults, Acc) -> + recombine_reduce_results(RedSrcs, OsResults, BuiltinResults, [OsR|Acc]). + +os_reduce(_Lang, [], _KVs) -> + {ok, []}; +os_reduce(Lang, OsRedSrcs, KVs) -> + Proc = get_os_process(Lang), + OsResults = try proc_prompt(Proc, [<<"reduce">>, OsRedSrcs, KVs]) of + [true, Reductions] -> Reductions + catch + throw:{reduce_overflow_error, Msg} -> + [{[{reduce_overflow_error, Msg}]} || _ <- OsRedSrcs] + after + ok = ret_os_process(Proc) + end, + {ok, OsResults}. + +os_rereduce(Lang, OsRedSrcs, KVs) -> + case get_overflow_error(KVs) of + undefined -> + Proc = get_os_process(Lang), + try proc_prompt(Proc, [<<"rereduce">>, OsRedSrcs, KVs]) of + [true, [Reduction]] -> Reduction + catch + throw:{reduce_overflow_error, Msg} -> + {[{reduce_overflow_error, Msg}]} + after + ok = ret_os_process(Proc) + end; + Error -> + Error + end. + + +get_overflow_error([]) -> + undefined; +get_overflow_error([{[{reduce_overflow_error, _}]} = Error | _]) -> + Error; +get_overflow_error([_ | Rest]) -> + get_overflow_error(Rest). + + +builtin_reduce(_Re, [], _KVs, Acc) -> + {ok, lists:reverse(Acc)}; +builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) -> + Sum = builtin_sum_rows(KVs, 0), + Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum), + builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]); +builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = length(KVs), + builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> + Count = builtin_sum_rows(KVs, 0), + builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]); +builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) -> + Stats = builtin_stats(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]); +builtin_reduce(Re, [<<"_approx_count_distinct",_/binary>>|BuiltinReds], KVs, Acc) -> + Distinct = approx_count_distinct(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Distinct|Acc]). + + +builtin_sum_rows([], Acc) -> + Acc; +builtin_sum_rows([[_Key, Value] | RestKVs], Acc) -> + try sum_values(Value, Acc) of + NewAcc -> + builtin_sum_rows(RestKVs, NewAcc) + catch + throw:{builtin_reduce_error, Obj} -> + Obj; + throw:{invalid_value, Reason, Cause} -> + {[{<<"error">>, <<"builtin_reduce_error">>}, + {<<"reason">>, Reason}, {<<"caused_by">>, Cause}]} + end. + + +sum_values(Value, Acc) when is_number(Value), is_number(Acc) -> + Acc + Value; +sum_values(Value, Acc) when is_list(Value), is_list(Acc) -> + sum_arrays(Acc, Value); +sum_values(Value, Acc) when is_number(Value), is_list(Acc) -> + sum_arrays(Acc, [Value]); +sum_values(Value, Acc) when is_list(Value), is_number(Acc) -> + sum_arrays([Acc], Value); +sum_values({Props}, Acc) -> + case lists:keyfind(<<"error">>, 1, Props) of + {<<"error">>, <<"builtin_reduce_error">>} -> + throw({builtin_reduce_error, {Props}}); + false -> + ok + end, + case Acc of + 0 -> + {Props}; + {AccProps} -> + {sum_objects(lists:sort(Props), lists:sort(AccProps))} + end; +sum_values(Else, _Acc) -> + throw_sum_error(Else). + +sum_objects([{K1, V1} | Rest1], [{K1, V2} | Rest2]) -> + [{K1, sum_values(V1, V2)} | sum_objects(Rest1, Rest2)]; +sum_objects([{K1, V1} | Rest1], [{K2, V2} | Rest2]) when K1 < K2 -> + [{K1, V1} | sum_objects(Rest1, [{K2, V2} | Rest2])]; +sum_objects([{K1, V1} | Rest1], [{K2, V2} | Rest2]) when K1 > K2 -> + [{K2, V2} | sum_objects([{K1, V1} | Rest1], Rest2)]; +sum_objects([], Rest) -> + Rest; +sum_objects(Rest, []) -> + Rest. + +sum_arrays([], []) -> + []; +sum_arrays([_|_]=Xs, []) -> + Xs; +sum_arrays([], [_|_]=Ys) -> + Ys; +sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) -> + [X+Y | sum_arrays(Xs,Ys)]; +sum_arrays(Else, _) -> + throw_sum_error(Else). + +check_sum_overflow(InSize, OutSize, Sum) -> + Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize, + case config:get("query_server_config", "reduce_limit", "true") of + "true" when Overflowed -> + Msg = log_sum_overflow(InSize, OutSize), + {[ + {<<"error">>, <<"builtin_reduce_error">>}, + {<<"reason">>, Msg} + ]}; + "log" when Overflowed -> + log_sum_overflow(InSize, OutSize), + Sum; + _ -> + Sum + end. + +log_sum_overflow(InSize, OutSize) -> + Fmt = "Reduce output must shrink more rapidly: " + "input size: ~b " + "output size: ~b", + Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])), + couch_log:error(Msg, []), + Msg. + +builtin_stats(_, []) -> + {0, 0, 0, 0, 0}; +builtin_stats(_, [[_,First]|Rest]) -> + lists:foldl(fun([_Key, Value], Acc) -> + stat_values(Value, Acc) + end, build_initial_accumulator(First), Rest). + +stat_values(Value, Acc) when is_list(Value), is_list(Acc) -> + lists:zipwith(fun stat_values/2, Value, Acc); +stat_values({PreRed}, Acc) when is_list(PreRed) -> + stat_values(unpack_stats({PreRed}), Acc); +stat_values(Value, Acc) when is_number(Value) -> + stat_values({Value, 1, Value, Value, Value*Value}, Acc); +stat_values(Value, Acc) when is_number(Acc) -> + stat_values(Value, {Acc, 1, Acc, Acc, Acc*Acc}); +stat_values(Value, Acc) when is_tuple(Value), is_tuple(Acc) -> + {Sum0, Cnt0, Min0, Max0, Sqr0} = Value, + {Sum1, Cnt1, Min1, Max1, Sqr1} = Acc, + { + Sum0 + Sum1, + Cnt0 + Cnt1, + erlang:min(Min0, Min1), + erlang:max(Max0, Max1), + Sqr0 + Sqr1 + }; +stat_values(Else, _Acc) -> + throw_stat_error(Else). + +build_initial_accumulator(L) when is_list(L) -> + [build_initial_accumulator(X) || X <- L]; +build_initial_accumulator(X) when is_number(X) -> + {X, 1, X, X, X*X}; +build_initial_accumulator({_, _, _, _, _} = AlreadyUnpacked) -> + AlreadyUnpacked; +build_initial_accumulator({Props}) -> + unpack_stats({Props}); +build_initial_accumulator(Else) -> + Msg = io_lib:format("non-numeric _stats input: ~w", [Else]), + throw({invalid_value, iolist_to_binary(Msg)}). + +unpack_stats({PreRed}) when is_list(PreRed) -> + { + get_number(<<"sum">>, PreRed), + get_number(<<"count">>, PreRed), + get_number(<<"min">>, PreRed), + get_number(<<"max">>, PreRed), + get_number(<<"sumsqr">>, PreRed) + }. + + +pack_stats({Sum, Cnt, Min, Max, Sqr}) -> + {[{<<"sum">>,Sum}, {<<"count">>,Cnt}, {<<"min">>,Min}, {<<"max">>,Max}, {<<"sumsqr">>,Sqr}]}; +pack_stats({Packed}) -> + % Legacy code path before we had the finalize operation + {Packed}; +pack_stats(Stats) when is_list(Stats) -> + lists:map(fun pack_stats/1, Stats). + +get_number(Key, Props) -> + case couch_util:get_value(Key, Props) of + X when is_number(X) -> + X; + undefined when is_binary(Key) -> + get_number(binary_to_atom(Key, latin1), Props); + undefined -> + Msg = io_lib:format("user _stats input missing required field ~s (~p)", + [Key, Props]), + throw({invalid_value, iolist_to_binary(Msg)}); + Else -> + Msg = io_lib:format("non-numeric _stats input received for ~s: ~w", + [Key, Else]), + throw({invalid_value, iolist_to_binary(Msg)}) + end. + +% TODO allow customization of precision in the ddoc. +approx_count_distinct(reduce, KVs) -> + lists:foldl(fun([[Key, _Id], _Value], Filter) -> + hyper:insert(term_to_binary(Key), Filter) + end, hyper:new(11), KVs); +approx_count_distinct(rereduce, Reds) -> + hyper:union([Filter || [_, Filter] <- Reds]). + +% use the function stored in ddoc.validate_doc_update to test an update. +-spec validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> ok when + DDoc :: ddoc(), + EditDoc :: doc(), + DiskDoc :: doc() | nil, + Ctx :: user_ctx(), + SecObj :: sec_obj(). + +validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> + JsonEditDoc = couch_doc:to_json_obj(EditDoc, [revs]), + JsonDiskDoc = json_doc(DiskDoc), + Resp = ddoc_prompt( + DDoc, + [<<"validate_doc_update">>], + [JsonEditDoc, JsonDiskDoc, Ctx, SecObj] + ), + if Resp == 1 -> ok; true -> + couch_stats:increment_counter([couchdb, query_server, vdu_rejects], 1) + end, + case Resp of + RespCode when RespCode =:= 1; RespCode =:= ok; RespCode =:= true -> + ok; + {[{<<"forbidden">>, Message}]} -> + throw({forbidden, Message}); + {[{<<"unauthorized">>, Message}]} -> + throw({unauthorized, Message}); + {[{_, Message}]} -> + throw({unknown_error, Message}); + Message when is_binary(Message) -> + throw({unknown_error, Message}) + end. + + +rewrite(Req, Db, DDoc) -> + Fields = [F || F <- chttpd_external:json_req_obj_fields(), + F =/= <<"info">>, F =/= <<"form">>, + F =/= <<"uuid">>, F =/= <<"id">>], + JsonReq = chttpd_external:json_req_obj(Req, Db, null, Fields), + case ddoc_prompt(DDoc, [<<"rewrites">>], [JsonReq]) of + {[{<<"forbidden">>, Message}]} -> + throw({forbidden, Message}); + {[{<<"unauthorized">>, Message}]} -> + throw({unauthorized, Message}); + [<<"no_dispatch_rule">>] -> + undefined; + [<<"ok">>, {V}=Rewrite] when is_list(V) -> + ok = validate_rewrite_response(Rewrite), + Rewrite; + [<<"ok">>, _] -> + throw_rewrite_error(<<"bad rewrite">>); + V -> + couch_log:error("bad rewrite return ~p", [V]), + throw({unknown_error, V}) + end. + +validate_rewrite_response({Fields}) when is_list(Fields) -> + validate_rewrite_response_fields(Fields). + +validate_rewrite_response_fields([{Key, Value} | Rest]) -> + validate_rewrite_response_field(Key, Value), + validate_rewrite_response_fields(Rest); +validate_rewrite_response_fields([]) -> + ok. + +validate_rewrite_response_field(<<"method">>, Method) when is_binary(Method) -> + ok; +validate_rewrite_response_field(<<"method">>, _) -> + throw_rewrite_error(<<"bad method">>); +validate_rewrite_response_field(<<"path">>, Path) when is_binary(Path) -> + ok; +validate_rewrite_response_field(<<"path">>, _) -> + throw_rewrite_error(<<"bad path">>); +validate_rewrite_response_field(<<"body">>, Body) when is_binary(Body) -> + ok; +validate_rewrite_response_field(<<"body">>, _) -> + throw_rewrite_error(<<"bad body">>); +validate_rewrite_response_field(<<"headers">>, {Props}=Headers) when is_list(Props) -> + validate_object_fields(Headers); +validate_rewrite_response_field(<<"headers">>, _) -> + throw_rewrite_error(<<"bad headers">>); +validate_rewrite_response_field(<<"query">>, {Props}=Query) when is_list(Props) -> + validate_object_fields(Query); +validate_rewrite_response_field(<<"query">>, _) -> + throw_rewrite_error(<<"bad query">>); +validate_rewrite_response_field(<<"code">>, Code) when is_integer(Code) andalso Code >= 200 andalso Code < 600 -> + ok; +validate_rewrite_response_field(<<"code">>, _) -> + throw_rewrite_error(<<"bad code">>); +validate_rewrite_response_field(K, V) -> + couch_log:debug("unknown rewrite field ~p=~p", [K, V]), + ok. + +validate_object_fields({Props}) when is_list(Props) -> + lists:foreach(fun + ({Key, Value}) when is_binary(Key) andalso is_binary(Value) -> + ok; + ({Key, Value}) -> + Reason = io_lib:format( + "object key/value must be strings ~p=~p", [Key, Value]), + throw_rewrite_error(Reason); + (Value) -> + throw_rewrite_error(io_lib:format("bad value ~p", [Value])) + end, Props). + + +throw_rewrite_error(Reason) when is_list(Reason)-> + throw_rewrite_error(iolist_to_binary(Reason)); +throw_rewrite_error(Reason) when is_binary(Reason) -> + throw({rewrite_error, Reason}). + + +json_doc_options() -> + json_doc_options([]). + +json_doc_options(Options) -> + Limit = config:get_integer("query_server_config", "revs_limit", 20), + [{revs, Limit} | Options]. + +json_doc(Doc) -> + json_doc(Doc, json_doc_options()). + +json_doc(nil, _) -> + null; +json_doc(Doc, Options) -> + couch_doc:to_json_obj(Doc, Options). + +filter_view(DDoc, VName, Docs) -> + Options = json_doc_options(), + JsonDocs = [json_doc(Doc, Options) || Doc <- Docs], + [true, Passes] = ddoc_prompt(DDoc, [<<"views">>, VName, <<"map">>], [JsonDocs]), + {ok, Passes}. + +filter_docs(Req, Db, DDoc, FName, Docs) -> + JsonReq = case Req of + {json_req, JsonObj} -> + JsonObj; + #httpd{} = HttpReq -> + couch_httpd_external:json_req_obj(HttpReq, Db) + end, + Options = json_doc_options(), + JsonDocs = [json_doc(Doc, Options) || Doc <- Docs], + [true, Passes] = ddoc_prompt(DDoc, [<<"filters">>, FName], + [JsonDocs, JsonReq]), + {ok, Passes}. + +ddoc_proc_prompt({Proc, DDocId}, FunPath, Args) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]). + +ddoc_prompt(DDoc, FunPath, Args) -> + with_ddoc_proc(DDoc, fun({Proc, DDocId}) -> + proc_prompt(Proc, [<<"ddoc">>, DDocId, FunPath, Args]) + end). + +with_ddoc_proc(#doc{id=DDocId,revs={Start, [DiskRev|_]}}=DDoc, Fun) -> + Rev = couch_doc:rev_to_str({Start, DiskRev}), + DDocKey = {DDocId, Rev}, + Proc = get_ddoc_process(DDoc, DDocKey), + try Fun({Proc, DDocId}) + after + ok = ret_os_process(Proc) + end. + +proc_prompt(Proc, Args) -> + case proc_prompt_raw(Proc, Args) of + {json, Json} -> + ?JSON_DECODE(Json); + EJson -> + EJson + end. + +proc_prompt_raw(#proc{prompt_fun = {Mod, Func}} = Proc, Args) -> + apply(Mod, Func, [Proc#proc.pid, Args]). + +raw_to_ejson({json, Json}) -> + ?JSON_DECODE(Json); +raw_to_ejson(EJson) -> + EJson. + +proc_stop(Proc) -> + {Mod, Func} = Proc#proc.stop_fun, + apply(Mod, Func, [Proc#proc.pid]). + +proc_set_timeout(Proc, Timeout) -> + {Mod, Func} = Proc#proc.set_timeout_fun, + apply(Mod, Func, [Proc#proc.pid, Timeout]). + +get_os_process_timeout() -> + list_to_integer(config:get("couchdb", "os_process_timeout", "5000")). + +get_ddoc_process(#doc{} = DDoc, DDocKey) -> + % remove this case statement + case gen_server:call(couch_js_proc_manager, {get_proc, DDoc, DDocKey}, get_os_process_timeout()) of + {ok, Proc, {QueryConfig}} -> + % process knows the ddoc + case (catch proc_prompt(Proc, [<<"reset">>, {QueryConfig}])) of + true -> + proc_set_timeout(Proc, couch_util:get_value(<<"timeout">>, QueryConfig)), + Proc; + _ -> + catch proc_stop(Proc), + get_ddoc_process(DDoc, DDocKey) + end; + Error -> + throw(Error) + end. + +get_os_process(Lang) -> + case gen_server:call(couch_js_proc_manager, {get_proc, Lang}, get_os_process_timeout()) of + {ok, Proc, {QueryConfig}} -> + case (catch proc_prompt(Proc, [<<"reset">>, {QueryConfig}])) of + true -> + proc_set_timeout(Proc, couch_util:get_value(<<"timeout">>, QueryConfig)), + Proc; + _ -> + catch proc_stop(Proc), + get_os_process(Lang) + end; + Error -> + throw(Error) + end. + +ret_os_process(Proc) -> + true = gen_server:call(couch_js_proc_manager, {ret_proc, Proc}, infinity), + catch unlink(Proc#proc.pid), + ok. + +throw_sum_error(Else) -> + throw({invalid_value, ?SUMERROR, Else}). + +throw_stat_error(Else) -> + throw({invalid_value, iolist_to_binary(io_lib:format(?STATERROR, [Else]))}). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +builtin_sum_rows_negative_test() -> + A = [{[{<<"a">>, 1}]}, {[{<<"a">>, 2}]}, {[{<<"a">>, 3}]}], + E = {[{<<"error">>, <<"builtin_reduce_error">>}]}, + ?assertEqual(E, builtin_sum_rows([["K", E]], [])), + % The below case is where the value is invalid, but no error because + % it's only one document. + ?assertEqual(A, builtin_sum_rows([["K", A]], [])), + {Result} = builtin_sum_rows([["K", A]], [1, 2, 3]), + ?assertEqual({<<"error">>, <<"builtin_reduce_error">>}, + lists:keyfind(<<"error">>, 1, Result)). + +sum_values_test() -> + ?assertEqual(3, sum_values(1, 2)), + ?assertEqual([2,4,6], sum_values(1, [1,4,6])), + ?assertEqual([3,5,7], sum_values([3,2,4], [0,3,3])), + X = {[{<<"a">>,1}, {<<"b">>,[1,2]}, {<<"c">>, {[{<<"d">>,3}]}}, + {<<"g">>,1}]}, + Y = {[{<<"a">>,2}, {<<"b">>,3}, {<<"c">>, {[{<<"e">>, 5}]}}, + {<<"f">>,1}, {<<"g">>,1}]}, + Z = {[{<<"a">>,3}, {<<"b">>,[4,2]}, {<<"c">>, {[{<<"d">>,3},{<<"e">>,5}]}}, + {<<"f">>,1}, {<<"g">>,2}]}, + ?assertEqual(Z, sum_values(X, Y)), + ?assertEqual(Z, sum_values(Y, X)). + +sum_values_negative_test() -> + % invalid value + A = [{[{<<"a">>, 1}]}, {[{<<"a">>, 2}]}, {[{<<"a">>, 3}]}], + B = ["error 1", "error 2"], + C = [<<"error 3">>, <<"error 4">>], + KV = {[{<<"error">>, <<"builtin_reduce_error">>}, + {<<"reason">>, ?SUMERROR}, {<<"caused_by">>, <<"some cause">>}]}, + ?assertThrow({invalid_value, _, _}, sum_values(A, [1, 2, 3])), + ?assertThrow({invalid_value, _, _}, sum_values(A, 0)), + ?assertThrow({invalid_value, _, _}, sum_values(B, [1, 2])), + ?assertThrow({invalid_value, _, _}, sum_values(C, [0])), + ?assertThrow({builtin_reduce_error, KV}, sum_values(KV, [0])). + +stat_values_test() -> + ?assertEqual({1, 2, 0, 1, 1}, stat_values(1, 0)), + ?assertEqual({11, 2, 1, 10, 101}, stat_values(1, 10)), + ?assertEqual([{9, 2, 2, 7, 53}, + {14, 2, 3, 11, 130}, + {18, 2, 5, 13, 194} + ], stat_values([2,3,5], [7,11,13])). + +reduce_stats_test() -> + ?assertEqual([ + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], test_reduce(<<"_stats">>, [[[null, key], 2]])), + + ?assertEqual([[ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ]], test_reduce(<<"_stats">>, [[[null, key],[1,2]]])), + + ?assertEqual( + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + , element(2, finalize(<<"_stats">>, {2, 1, 2, 2, 4}))), + + ?assertEqual([ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], element(2, finalize(<<"_stats">>, [ + {1, 1, 1, 1, 1}, + {2, 1, 2, 2, 4} + ]))), + + ?assertEqual([ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], element(2, finalize(<<"_stats">>, [ + {1, 1, 1, 1, 1}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ]))), + + ?assertEqual([ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {[{<<"sum">>,2},{<<"count">>,1},{<<"min">>,2},{<<"max">>,2},{<<"sumsqr">>,4}]} + ], element(2, finalize(<<"_stats">>, [ + {[{<<"sum">>,1},{<<"count">>,1},{<<"min">>,1},{<<"max">>,1},{<<"sumsqr">>,1}]}, + {2, 1, 2, 2, 4} + ]))), + ok. + +test_reduce(Reducer, KVs) -> + ?assertMatch({ok, _}, reduce(<<"javascript">>, [Reducer], KVs)), + {ok, Reduced} = reduce(<<"javascript">>, [Reducer], KVs), + {ok, Finalized} = finalize(Reducer, Reduced), + Finalized. + +-endif. diff --git a/src/couch_js/src/couch_js_sup.erl b/src/couch_js/src/couch_js_sup.erl new file mode 100644 index 000000000..e87546127 --- /dev/null +++ b/src/couch_js/src/couch_js_sup.erl @@ -0,0 +1,45 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_js_sup). +-behaviour(supervisor). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 50, + period => 3600 + }, + Children = [ + #{ + id => couch_js_proc_manager, + restart => permanent, + shutdown => brutal_kill, + start => {couch_js_proc_manager, start_link, []} + } + ], + {ok, {Flags, Children}}. diff --git a/src/couch_js/test/couch_js_proc_manager_tests.erl b/src/couch_js/test/couch_js_proc_manager_tests.erl new file mode 100644 index 000000000..f138dd651 --- /dev/null +++ b/src/couch_js/test/couch_js_proc_manager_tests.erl @@ -0,0 +1,373 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_proc_manager_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + +-define(NUM_PROCS, 3). +-define(TIMEOUT, 1000). + +-define(TIMEOUT_ERROR(Msg), erlang:error({assertion_failed, [ + {module, ?MODULE}, + {line, ?LINE}, + {reason, Msg} + ]})). + + +start() -> + ok = application:set_env(config, ini_files, ?CONFIG_CHAIN), + {ok, Started} = application:ensure_all_started(couch_js), + config:set("native_query_servers", "enable_erlang_query_server", "true", false), + config:set("query_server_config", "os_process_limit", "3", false), + config:set("query_server_config", "os_process_soft_limit", "2", false), + config:set("query_server_config", "os_process_idle_limit", "1", false), + ok = config_wait("os_process_idle_limit", "1"), + Started. + + +stop(Apps) -> + lists:foreach(fun(App) -> + ok = application:stop(App) + end, lists:reverse(Apps)). + + +couch_js_proc_manager_test_() -> + { + "couch_js_proc_manger tests", + { + setup, + fun start/0, + fun stop/1, + [ + ?TDEF(should_block_new_proc_on_full_pool), + ?TDEF(should_free_slot_on_proc_unexpected_exit), + ?TDEF(should_reuse_known_proc), + ?TDEF(should_process_waiting_queue_as_fifo), + ?TDEF(should_reduce_pool_on_idle_os_procs) + ] + } + }. + + +should_block_new_proc_on_full_pool() -> + ok = couch_js_proc_manager:reload(), + + Clients = [ + spawn_client(), + spawn_client(), + spawn_client() + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + % Make sure everyone got a different proc + Procs = [get_client_proc(Client) || Client <- Clients], + ?assertEqual(lists:sort(Procs), lists:usort(Procs)), + + % This client will be stuck waiting for someone + % to give up their proc. + Client4 = spawn_client(), + ?assert(is_client_waiting(Client4)), + + Client1 = hd(Clients), + Proc1 = hd(Procs), + + ?assertEqual(ok, stop_client(Client1)), + ?assertEqual(ok, ping_client(Client4)), + + Proc4 = get_client_proc(Client4), + + ?assertEqual(Proc1#proc.pid, Proc4#proc.pid), + ?assertNotEqual(Proc1#proc.client, Proc4#proc.client), + + lists:map(fun(C) -> + ?assertEqual(ok, stop_client(C)) + end, [Client4 | tl(Clients)]). + + +should_free_slot_on_proc_unexpected_exit() -> + ok = couch_js_proc_manager:reload(), + + Clients = [ + spawn_client(), + spawn_client(), + spawn_client() + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + Procs1 = [get_client_proc(Client) || Client <- Clients], + ProcClients1 = [Proc#proc.client || Proc <- Procs1], + ?assertEqual(lists:sort(Procs1), lists:usort(Procs1)), + ?assertEqual(lists:sort(ProcClients1), lists:usort(ProcClients1)), + + Client1 = hd(Clients), + Proc1 = hd(Procs1), + ?assertEqual(ok, kill_client(Client1)), + + Client4 = spawn_client(), + ?assertEqual(ok, ping_client(Client4)), + Proc4 = get_client_proc(Client4), + + ?assertEqual(Proc1#proc.pid, Proc4#proc.pid), + ?assertNotEqual(Proc1#proc.client, Proc4#proc.client), + + Procs2 = [Proc4 | tl(Procs1)], + ProcClients2 = [Proc4#proc.client | tl(ProcClients1)], + ?assertEqual(lists:sort(Procs2), lists:usort(Procs2)), + ?assertEqual(lists:sort(ProcClients2), lists:usort(ProcClients2)), + + lists:map(fun(C) -> + ?assertEqual(ok, stop_client(C)) + end, [Client4 | tl(Clients)]). + + +should_reuse_known_proc() -> + ok = couch_js_proc_manager:reload(), + + Clients = [ + spawn_client(<<"ddoc1">>), + spawn_client(<<"ddoc2">>) + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + Procs = [get_client_proc(Client) || Client <- Clients], + ?assertEqual(lists:sort(Procs), lists:usort(Procs)), + + lists:foreach(fun(Client) -> + ?assertEqual(ok, stop_client(Client)) + end, Clients), + + lists:foreach(fun(Proc) -> + ?assert(is_process_alive(Proc#proc.pid)) + end, Procs), + + Client = spawn_client(<<"ddoc1">>), + ?assertEqual(ok, ping_client(Client)), + + OldProc = hd(Procs), + NewProc = get_client_proc(Client), + + ?assertEqual(OldProc#proc.pid, NewProc#proc.pid), + ?assertNotEqual(OldProc#proc.client, NewProc#proc.client), + ?assertEqual(ok, stop_client(Client)). + + +should_process_waiting_queue_as_fifo() -> + Clients = [ + spawn_client(<<"ddoc1">>), + spawn_client(<<"ddoc2">>), + spawn_client(<<"ddoc3">>), + spawn_client(<<"ddoc4">>), + spawn_client(<<"ddoc5">>), + spawn_client(<<"ddoc6">>) + ], + + lists:foldl(fun(Client, Pos) -> + case Pos =< ?NUM_PROCS of + true -> + ?assertEqual(ok, ping_client(Client)); + false -> + ?assert(is_client_waiting(Client)) + end, + Pos + 1 + end, 1, Clients), + + LastClients = lists:foldl(fun(_Iteration, ClientAcc) -> + FirstClient = hd(ClientAcc), + FirstProc = get_client_proc(FirstClient), + ?assertEqual(ok, stop_client(FirstClient)), + + RestClients = tl(ClientAcc), + + lists:foldl(fun(Client, Pos) -> + case Pos =< ?NUM_PROCS of + true -> + ?assertEqual(ok, ping_client(Client)); + false -> + ?assert(is_client_waiting(Client)) + end, + if Pos /= ?NUM_PROCS -> ok; true -> + BubbleProc = get_client_proc(Client), + ?assertEqual(FirstProc#proc.pid, BubbleProc#proc.pid), + ?assertNotEqual(FirstProc#proc.client, BubbleProc#proc.client) + end, + Pos + 1 + end, 1, RestClients), + + RestClients + end, Clients, lists:seq(1, 3)), + + lists:foreach(fun(Client) -> + ?assertEqual(ok, stop_client(Client)) + end, LastClients). + + +should_reduce_pool_on_idle_os_procs() -> + Clients = [ + spawn_client(<<"ddoc1">>), + spawn_client(<<"ddoc2">>), + spawn_client(<<"ddoc3">>) + ], + + lists:foreach(fun(Client) -> + ?assertEqual(ok, ping_client(Client)) + end, Clients), + + ?assertEqual(3, couch_js_proc_manager:get_proc_count()), + + lists:foreach(fun(Client) -> + ?assertEqual(ok, stop_client(Client)) + end, Clients), + + ?assertEqual(3, couch_js_proc_manager:get_proc_count()), + + timer:sleep(1200), + + ?assertEqual(1, couch_js_proc_manager:get_proc_count()). + + +spawn_client() -> + Parent = self(), + Ref = make_ref(), + {Pid, _} = spawn_monitor(fun() -> + Parent ! {self(), initialized}, + Proc = couch_js_query_servers:get_os_process(<<"erlang">>), + loop(Parent, Ref, Proc) + end), + receive + {Pid, initialized} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Error creating client.") + end, + {Pid, Ref}. + + +spawn_client(DDocId) -> + Parent = self(), + Ref = make_ref(), + {Pid, _} = spawn_monitor(fun() -> + DDocKey = {DDocId, <<"1-abcdefgh">>}, + DDoc = #doc{body={[{<<"language">>, <<"erlang">>}]}}, + Parent ! {self(), initialized}, + Proc = couch_js_query_servers:get_ddoc_process(DDoc, DDocKey), + loop(Parent, Ref, Proc) + end), + receive + {Pid, initialized} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Error creating ddoc client.") + end, + {Pid, Ref}. + + +loop(Parent, Ref, Proc) -> + receive + ping -> + Parent ! {pong, Ref}, + loop(Parent, Ref, Proc); + get_proc -> + Parent ! {proc, Ref, Proc}, + loop(Parent, Ref, Proc); + stop -> + couch_js_query_servers:ret_os_process(Proc), + Parent ! {stop, Ref}; + die -> + Parent ! {die, Ref}, + exit(some_error) + end. + + +ping_client({Pid, Ref}) -> + Pid ! ping, + receive + {pong, Ref} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout pinging client") + end. + + +is_client_waiting({Pid, _Ref}) -> + {status, Status} = process_info(Pid, status), + {current_function, {M, F, A}} = process_info(Pid, current_function), + Status == waiting andalso {M, F, A} == {gen, do_call, 4}. + + +get_client_proc({Pid, Ref}) -> + Pid ! get_proc, + receive + {proc, Ref, Proc} -> Proc + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout getting proc from client") + end. + + +stop_client({Pid, Ref}) -> + Pid ! stop, + receive + {stop, Ref} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout stopping client") + end, + receive + {'DOWN', _, _, Pid, _} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout waiting for stopped client 'DOWN'") + end. + + +kill_client({Pid, Ref}) -> + Pid ! die, + receive + {die, Ref} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout killing client") + end, + receive + {'DOWN', _, _, Pid, _} -> + ok + after ?TIMEOUT -> + ?TIMEOUT_ERROR("Timeout waiting for killed client 'DOWN'") + end. + + +config_wait(Key, Value) -> + config_wait(Key, Value, 0). + +config_wait(Key, Value, Count) -> + case config:get("query_server_config", Key) of + Value -> + ok; + _ when Count > 10 -> + ?TIMEOUT_ERROR("Error waiting for config changes."); + _ -> + timer:sleep(10), + config_wait(Key, Value, Count + 1) + end. diff --git a/src/couch_js/test/couch_js_query_servers_tests.erl b/src/couch_js/test/couch_js_query_servers_tests.erl new file mode 100644 index 000000000..bc4ecc72f --- /dev/null +++ b/src/couch_js/test/couch_js_query_servers_tests.erl @@ -0,0 +1,96 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_js_query_servers_tests). + +-include_lib("couch/include/couch_eunit.hrl"). + + +setup() -> + meck:new([config, couch_log]). + + +teardown(_) -> + meck:unload(). + + +sum_overflow_test_() -> + { + "Test overflow detection in the _sum reduce function", + { + setup, + fun setup/0, + fun teardown/1, + [ + fun should_return_error_on_overflow/0, + fun should_return_object_on_log/0, + fun should_return_object_on_false/0 + ] + } + }. + + +should_return_error_on_overflow() -> + setup_reduce_limit_mock("true"), + + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), + ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result), + + check_reduce_limit_mock(). + + +should_return_object_on_log() -> + setup_reduce_limit_mock("log"), + + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), + ?assertMatch({[_ | _]}, Result), + Keys = [K || {K, _} <- element(1, Result)], + ?assert(not lists:member(<<"error">>, Keys)), + + check_reduce_limit_mock(). + + +should_return_object_on_false() -> + setup_reduce_limit_mock("false"), + + KVs = gen_sum_kvs(), + {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs), + ?assertMatch({[_ | _]}, Result), + Keys = [K || {K, _} <- element(1, Result)], + ?assert(not lists:member(<<"error">>, Keys)), + + ?assert(meck:called(config, get, '_')), + ?assertNot(meck:called(couch_log, error, '_')). + + +gen_sum_kvs() -> + lists:map(fun(I) -> + Props = lists:map(fun(_) -> + K = couch_util:encodeBase64Url(crypto:strong_rand_bytes(16)), + {K, 1} + end, lists:seq(1, 20)), + [I, {Props}] + end, lists:seq(1, 10)). + + +setup_reduce_limit_mock(Value) -> + ConfigArgs = ["query_server_config", "reduce_limit", "true"], + meck:reset([config, couch_log]), + meck:expect(config, get, ConfigArgs, Value), + meck:expect(couch_log, error, ['_', '_'], ok). + + +check_reduce_limit_mock() -> + ?assert(meck:called(config, get, '_')), + ?assert(meck:called(couch_log, error, '_')). diff --git a/src/couch_log/src/couch_log_config.erl b/src/couch_log/src/couch_log_config.erl index 766d068a4..ab076cc69 100644 --- a/src/couch_log/src/couch_log_config.erl +++ b/src/couch_log/src/couch_log_config.erl @@ -49,7 +49,8 @@ entries() -> [ {level, "level", "info"}, {level_int, "level", "info"}, - {max_message_size, "max_message_size", "16000"} + {max_message_size, "max_message_size", "16000"}, + {strip_last_msg, "strip_last_msg", "true"} ]. @@ -97,4 +98,10 @@ transform(max_message_size, SizeStr) -> Size -> Size catch _:_ -> 16000 - end.
\ No newline at end of file + end; + +transform(strip_last_msg, "false") -> + false; + +transform(strip_last_msg, _) -> + true. diff --git a/src/couch_log/src/couch_log_config_dyn.erl b/src/couch_log/src/couch_log_config_dyn.erl index f7541f61f..b39dcf2f5 100644 --- a/src/couch_log/src/couch_log_config_dyn.erl +++ b/src/couch_log/src/couch_log_config_dyn.erl @@ -25,4 +25,5 @@ get(level) -> info; get(level_int) -> 2; -get(max_message_size) -> 16000. +get(max_message_size) -> 16000; +get(strip_last_msg) -> true. diff --git a/src/couch_log/src/couch_log_formatter.erl b/src/couch_log/src/couch_log_formatter.erl index 4d81f184f..26997a8a6 100644 --- a/src/couch_log/src/couch_log_formatter.erl +++ b/src/couch_log/src/couch_log_formatter.erl @@ -68,7 +68,13 @@ format(Event) -> do_format({error, _GL, {Pid, "** Generic server " ++ _, Args}}) -> %% gen_server terminate - [Name, LastMsg, State, Reason | Extra] = Args, + [Name, LastMsg0, State, Reason | Extra] = Args, + LastMsg = case couch_log_config:get(strip_last_msg) of + true -> + redacted; + false -> + LastMsg0 + end, MsgFmt = "gen_server ~w terminated with reason: ~s~n" ++ " last msg: ~p~n state: ~p~n extra: ~p", MsgArgs = [Name, format_reason(Reason), LastMsg, State, Extra], @@ -76,7 +82,13 @@ do_format({error, _GL, {Pid, "** Generic server " ++ _, Args}}) -> do_format({error, _GL, {Pid, "** State machine " ++ _, Args}}) -> %% gen_fsm terminate - [Name, LastMsg, StateName, State, Reason | Extra] = Args, + [Name, LastMsg0, StateName, State, Reason | Extra] = Args, + LastMsg = case couch_log_config:get(strip_last_msg) of + true -> + redacted; + false -> + LastMsg0 + end, MsgFmt = "gen_fsm ~w in state ~w terminated with reason: ~s~n" ++ " last msg: ~p~n state: ~p~n extra: ~p", MsgArgs = [Name, StateName, format_reason(Reason), LastMsg, State, Extra], @@ -84,7 +96,13 @@ do_format({error, _GL, {Pid, "** State machine " ++ _, Args}}) -> do_format({error, _GL, {Pid, "** gen_event handler" ++ _, Args}}) -> %% gen_event handler terminate - [ID, Name, LastMsg, State, Reason] = Args, + [ID, Name, LastMsg0, State, Reason] = Args, + LastMsg = case couch_log_config:get(strip_last_msg) of + true -> + redacted; + false -> + LastMsg0 + end, MsgFmt = "gen_event ~w installed in ~w terminated with reason: ~s~n" ++ " last msg: ~p~n state: ~p", MsgArgs = [ID, Name, format_reason(Reason), LastMsg, State], diff --git a/src/couch_log/src/couch_log_sup.erl b/src/couch_log/src/couch_log_sup.erl index 6219a36e9..fc1ac7812 100644 --- a/src/couch_log/src/couch_log_sup.erl +++ b/src/couch_log/src/couch_log_sup.erl @@ -63,6 +63,8 @@ handle_config_change("log", Key, _, _, S) -> couch_log_config:reconfigure(); "max_message_size" -> couch_log_config:reconfigure(); + "strip_last_msg" -> + couch_log_config:reconfigure(); _ -> % Someone may have changed the config for % the writer so we need to re-initialize. diff --git a/src/couch_log/test/eunit/couch_log_config_test.erl b/src/couch_log/test/eunit/couch_log_config_test.erl index c4677f37f..a4c4bcff2 100644 --- a/src/couch_log/test/eunit/couch_log_config_test.erl +++ b/src/couch_log/test/eunit/couch_log_config_test.erl @@ -25,7 +25,9 @@ couch_log_config_test_() -> fun check_level/0, fun check_max_message_size/0, fun check_bad_level/0, - fun check_bad_max_message_size/0 + fun check_bad_max_message_size/0, + fun check_strip_last_msg/0, + fun check_bad_strip_last_msg/0 ] }. @@ -108,3 +110,36 @@ check_bad_max_message_size() -> couch_log_test_util:wait_for_config(), ?assertEqual(16000, couch_log_config:get(max_message_size)) end). + + +check_strip_last_msg() -> + % Default is true + ?assertEqual(true, couch_log_config:get(strip_last_msg)), + + couch_log_test_util:with_config_listener(fun() -> + config:set("log", "strip_last_msg", "false"), + couch_log_test_util:wait_for_config(), + ?assertEqual(false, couch_log_config:get(strip_last_msg)), + + config:delete("log", "strip_last_msg"), + couch_log_test_util:wait_for_config(), + ?assertEqual(true, couch_log_config:get(strip_last_msg)) + end). + +check_bad_strip_last_msg() -> + % Default is true + ?assertEqual(true, couch_log_config:get(strip_last_msg)), + + couch_log_test_util:with_config_listener(fun() -> + config:set("log", "strip_last_msg", "false"), + couch_log_test_util:wait_for_config(), + ?assertEqual(false, couch_log_config:get(strip_last_msg)), + + config:set("log", "strip_last_msg", "this is not a boolean"), + couch_log_test_util:wait_for_config(), + ?assertEqual(true, couch_log_config:get(strip_last_msg)), + + config:delete("log", "strip_last_msg"), + couch_log_test_util:wait_for_config(), + ?assertEqual(true, couch_log_config:get(strip_last_msg)) + end). diff --git a/src/couch_log/test/eunit/couch_log_formatter_test.erl b/src/couch_log/test/eunit/couch_log_formatter_test.erl index 795efcf29..24de346c6 100644 --- a/src/couch_log/test/eunit/couch_log_formatter_test.erl +++ b/src/couch_log/test/eunit/couch_log_formatter_test.erl @@ -81,7 +81,7 @@ gen_server_error_test() -> do_matches(do_format(Event), [ "gen_server a_gen_server terminated", "with reason: some_reason", - "last msg: {foo,bar}", + "last msg: redacted", "state: server_state", "extra: \\[\\]" ]). @@ -108,7 +108,7 @@ gen_server_error_with_extra_args_test() -> do_matches(do_format(Event), [ "gen_server a_gen_server terminated", "with reason: some_reason", - "last msg: {foo,bar}", + "last msg: redacted", "state: server_state", "extra: \\[sad,args\\]" ]). @@ -135,7 +135,7 @@ gen_fsm_error_test() -> do_matches(do_format(Event), [ "gen_fsm a_gen_fsm in state state_name", "with reason: barf", - "last msg: {ohai,there}", + "last msg: redacted", "state: curr_state", "extra: \\[\\]" ]). @@ -162,7 +162,7 @@ gen_fsm_error_with_extra_args_test() -> do_matches(do_format(Event), [ "gen_fsm a_gen_fsm in state state_name", "with reason: barf", - "last msg: {ohai,there}", + "last msg: redacted", "state: curr_state", "extra: \\[sad,args\\]" ]). @@ -195,7 +195,7 @@ gen_event_error_test() -> do_matches(do_format(Event), [ "gen_event handler_id installed in a_gen_event", "reason: barf", - "last msg: {ohai,there}", + "last msg: redacted", "state: curr_state" ]). @@ -850,6 +850,110 @@ coverage_test() -> }) ). +gen_server_error_with_last_msg_test() -> + Pid = self(), + Event = { + error, + erlang:group_leader(), + { + Pid, + "** Generic server and some stuff", + [a_gen_server, {foo, bar}, server_state, some_reason] + } + }, + ?assertMatch( + #log_entry{ + level = error, + pid = Pid + }, + do_format(Event) + ), + with_last(fun() -> + do_matches(do_format(Event), [ + "gen_server a_gen_server terminated", + "with reason: some_reason", + "last msg: {foo,bar}", + "state: server_state", + "extra: \\[\\]" + ]) + end). + +gen_event_error_with_last_msg_test() -> + Pid = self(), + Event = { + error, + erlang:group_leader(), + { + Pid, + "** gen_event handler did a thing", + [ + handler_id, + a_gen_event, + {ohai,there}, + curr_state, + barf + ] + } + }, + ?assertMatch( + #log_entry{ + level = error, + pid = Pid + }, + do_format(Event) + ), + with_last(fun() -> + do_matches(do_format(Event), [ + "gen_event handler_id installed in a_gen_event", + "reason: barf", + "last msg: {ohai,there}", + "state: curr_state" + ]) + end). + + +gen_fsm_error_with_last_msg_test() -> + Pid = self(), + Event = { + error, + erlang:group_leader(), + { + Pid, + "** State machine did a thing", + [a_gen_fsm, {ohai,there}, state_name, curr_state, barf] + } + }, + ?assertMatch( + #log_entry{ + level = error, + pid = Pid + }, + do_format(Event) + ), + with_last(fun() -> + do_matches(do_format(Event), [ + "gen_fsm a_gen_fsm in state state_name", + "with reason: barf", + "last msg: {ohai,there}", + "state: curr_state", + "extra: \\[\\]" + ]) + end). + + +with_last(Fun) -> + meck:new(couch_log_config_dyn, [passthrough]), + try + meck:expect(couch_log_config_dyn, get, fun(Case) -> + case Case of + strip_last_msg -> false; + Case -> meck:passthrough([Case]) + end + end), + Fun() + after + meck:unload(couch_log_config_dyn) + end. do_format(Event) -> E = couch_log_formatter:format(Event), diff --git a/src/couch_mrview/include/couch_mrview.hrl b/src/couch_mrview/include/couch_mrview.hrl index bb0ab0b46..e0f80df81 100644 --- a/src/couch_mrview/include/couch_mrview.hrl +++ b/src/couch_mrview/include/couch_mrview.hrl @@ -81,7 +81,9 @@ conflicts, callback, sorted = true, - extra = [] + extra = [], + page_size = undefined, + bookmark=nil }). -record(vacc, { @@ -95,7 +97,9 @@ bufsize = 0, threshold = 1490, row_sent = false, - meta_sent = false + meta_sent = false, + paginated = false, + meta = #{} }). -record(lacc, { diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index 1cdc91809..880dfa725 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -170,8 +170,18 @@ join([H|[]], _, Acc) -> join([H|T], Sep, Acc) -> join(T, Sep, [Sep, H | Acc]). +validate(#{} = Db, DDoc) -> + DbName = fabric2_db:name(Db), + IsPartitioned = fabric2_db:is_partitioned(Db), + validate(DbName, IsPartitioned, DDoc); -validate(Db, DDoc) -> +validate(Db, DDoc) -> + DbName = couch_db:name(Db), + IsPartitioned = couch_db:is_partitioned(Db), + validate(DbName, IsPartitioned, DDoc). + + +validate(DbName, _IsDbPartitioned, DDoc) -> ok = validate_ddoc_fields(DDoc#doc.body), GetName = fun (#mrview{map_names = [Name | _]}) -> Name; @@ -198,18 +208,8 @@ validate(Db, DDoc) -> end, {ok, #mrst{ language = Lang, - views = Views, - partitioned = Partitioned - }} = couch_mrview_util:ddoc_to_mrst(couch_db:name(Db), DDoc), - - case {couch_db:is_partitioned(Db), Partitioned} of - {false, true} -> - throw({invalid_design_doc, - <<"partitioned option cannot be true in a " - "non-partitioned database.">>}); - {_, _} -> - ok - end, + views = Views + }} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), try Views =/= [] andalso couch_query_servers:get_os_process(Lang) of false -> diff --git a/src/couch_mrview/src/couch_mrview_http.erl b/src/couch_mrview/src/couch_mrview_http.erl index 3cf8833d7..e1ba9d656 100644 --- a/src/couch_mrview/src/couch_mrview_http.erl +++ b/src/couch_mrview/src/couch_mrview_http.erl @@ -35,6 +35,8 @@ parse_params/3, parse_params/4, view_cb/2, + row_to_obj/1, + row_to_obj/2, row_to_json/1, row_to_json/2, check_view_etag/3 @@ -413,11 +415,19 @@ prepend_val(#vacc{prepend=Prepend}) -> row_to_json(Row) -> + ?JSON_ENCODE(row_to_obj(Row)). + + +row_to_json(Kind, Row) -> + ?JSON_ENCODE(row_to_obj(Kind, Row)). + + +row_to_obj(Row) -> Id = couch_util:get_value(id, Row), - row_to_json(Id, Row). + row_to_obj(Id, Row). -row_to_json(error, Row) -> +row_to_obj(error, Row) -> % Special case for _all_docs request with KEYS to % match prior behavior. Key = couch_util:get_value(key, Row), @@ -426,9 +436,8 @@ row_to_json(error, Row) -> ReasonProp = if Reason == undefined -> []; true -> [{reason, Reason}] end, - Obj = {[{key, Key}, {error, Val}] ++ ReasonProp}, - ?JSON_ENCODE(Obj); -row_to_json(Id0, Row) -> + {[{key, Key}, {error, Val}] ++ ReasonProp}; +row_to_obj(Id0, Row) -> Id = case Id0 of undefined -> []; Id0 -> [{id, Id0}] @@ -439,8 +448,7 @@ row_to_json(Id0, Row) -> undefined -> []; Doc0 -> [{doc, Doc0}] end, - Obj = {Id ++ [{key, Key}, {value, Val}] ++ Doc}, - ?JSON_ENCODE(Obj). + {Id ++ [{key, Key}, {value, Val}] ++ Doc}. parse_params(#httpd{}=Req, Keys) -> @@ -523,6 +531,8 @@ parse_param(Key, Val, Args, IsDecoded) -> Args#mrargs{end_key_docid=couch_util:to_binary(Val)}; "limit" -> Args#mrargs{limit=parse_pos_int(Val)}; + "page_size" -> + Args#mrargs{page_size=parse_pos_int(Val)}; "stale" when Val == "ok" orelse Val == <<"ok">> -> Args#mrargs{stable=true, update=false}; "stale" when Val == "update_after" orelse Val == <<"update_after">> -> diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index c96d87173..8e844e80c 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -20,6 +20,7 @@ -export([index_file_exists/1]). -export([update_local_purge_doc/2, verify_index_exists/2]). -export([ensure_local_purge_docs/2]). +-export([format_status/2]). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). @@ -133,6 +134,12 @@ open(Db, State0) -> NewSt = couch_mrview_util:reset_index(Db, Fd, State), ensure_local_purge_doc(Db, NewSt), {ok, NewSt}; + {ok, Else} -> + couch_log:error("~s has a bad header: got ~p", + [IndexFName, Else]), + NewSt = couch_mrview_util:reset_index(Db, Fd, State), + ensure_local_purge_doc(Db, NewSt), + {ok, NewSt}; no_valid_header -> NewSt = couch_mrview_util:reset_index(Db, Fd, State), ensure_local_purge_doc(Db, NewSt), @@ -252,16 +259,7 @@ set_partitioned(Db, State) -> DbPartitioned = couch_db:is_partitioned(Db), ViewPartitioned = couch_util:get_value( <<"partitioned">>, DesignOpts, DbPartitioned), - IsPartitioned = case {DbPartitioned, ViewPartitioned} of - {true, true} -> - true; - {true, false} -> - false; - {false, false} -> - false; - _ -> - throw({bad_request, <<"invalid partition option">>}) - end, + IsPartitioned = DbPartitioned andalso ViewPartitioned, State#mrst{partitioned = IsPartitioned}. @@ -318,3 +316,14 @@ update_local_purge_doc(Db, State, PSeq) -> BaseDoc end, couch_db:update_doc(Db, Doc, []). + +format_status(_Opt, [_PDict, State]) -> + Scrubbed = State#mrst{ + lib = nil, + views = nil, + id_btree = nil, + doc_acc = nil, + doc_queue = nil, + write_queue = nil + }, + ?record_to_keyval(mrst, Scrubbed). diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index e971720c9..fe6e6bd60 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -397,7 +397,7 @@ fold_reduce({NthRed, Lang, View}, Fun, Acc, Options) -> validate_args(Db, DDoc, Args0) -> - {ok, State} = couch_mrview_index:init(Db, DDoc), + {ok, State} = couch_mrview_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), Args1 = apply_limit(State#mrst.partitioned, Args0), validate_args(State, Args1). @@ -425,9 +425,12 @@ validate_args(#mrst{} = State, Args0) -> apply_limit(ViewPartitioned, Args) -> - LimitType = case ViewPartitioned of - true -> "partition_query_limit"; - false -> "query_limit" + Options = Args#mrargs.extra, + IgnorePQLimit = lists:keyfind(ignore_partition_query_limit, 1, Options), + LimitType = case {ViewPartitioned, IgnorePQLimit} of + {true, false} -> "partition_query_limit"; + {true, _} -> "query_limit"; + {false, _} -> "query_limit" end, MaxLimit = config:get_integer("query_server_config", diff --git a/src/couch_peruser/src/couch_peruser.erl b/src/couch_peruser/src/couch_peruser.erl index 886fb4f6e..4c06e8f27 100644 --- a/src/couch_peruser/src/couch_peruser.erl +++ b/src/couch_peruser/src/couch_peruser.erl @@ -19,7 +19,7 @@ % gen_server callbacks -export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). + terminate/2, code_change/3, format_status/2]). -export([init_changes_handler/1, changes_handler/3]). @@ -410,3 +410,14 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. + + format_status(_Opt, [_PDict, State]) -> + #state{ + states = States + } = State, + Scrubbed = State#state{ + states = {length, length(States)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}].
\ No newline at end of file diff --git a/src/couch_replicator/README.md b/src/couch_replicator/README.md index 6a2a5cfdd..4eced760f 100644 --- a/src/couch_replicator/README.md +++ b/src/couch_replicator/README.md @@ -3,41 +3,67 @@ Developer Oriented Replicator Description This description of scheduling replicator's functionality is mainly geared to CouchDB developers. It dives a bit into the internal and explains how -everything is connected together. +everything is connected together. A higher level overview is available in the +[RFC](https://github.com/apache/couchdb-documentation/pull/581). This +documention assumes the audience is familiar with that description as well as +with the [Couch Jobs +RFC](https://github.com/apache/couchdb-documentation/blob/master/rfcs/007-background-jobs.md) +as well as with the [Node Types +RFC](https://github.com/apache/couchdb-documentation/blob/master/rfcs/013-node-types.md). A natural place to start is the top application supervisor: -`couch_replicator_sup`. It's a `rest_for_one` restart strategy supervisor, -so if a child process terminates, the rest of the children in the hierarchy -following it are also terminated. This structure implies a useful constraint -- -children lower in the list can safely call their siblings which are higher in -the list. - -A description of each child: - - * `couch_replication_event`: Starts a gen_event publication bus to handle some - replication related events. This used for example, to publish cluster - membership changes by the `couch_replicator_clustering` process. But is - also used in replication tests to monitor for replication events. - Notification is performed via the `couch_replicator_notifier:notify/1` - function. It's the first (left-most) child because - `couch_replicator_clustering` uses it. - - * `couch_replicator_clustering`: This module maintains cluster membership - information for the replication application and provides functions to check - ownership of replication jobs. A cluster membership change is published via - the `gen_event` event server named `couch_replication_event` as previously - covered. Published events are `{cluster, stable}` when cluster membership - has stabilized, that it, no node membership changes in a given period, and - `{cluster, unstable}` which indicates there was a recent change to the - cluster membership and now it's considered unstable. Listeners for cluster - membership change include `couch_replicator_doc_processor` and - `couch_replicator_db_changes`. When doc processor gets an `{cluster, - stable}` event it will remove all the replication jobs not belonging to the - current node. When `couch_replicator_db_changes` gets a `{cluster, - stable}` event, it will restart the `couch_multidb_changes` process it - controls, which will launch an new scan of all the replicator databases. - - * `couch_replicator_connection`: Maintains a global replication connection +`couch_replicator_sup`. The set of children in the supervisor is split into +`frontend` and `backend`. The `frontend` set is started on nodes which have the +`api_frontend` node type label set to `true`, and `backend` ones are started on +nodes which have the `replication` label set to `true`. The same node could +have both them set to `true`, and it could act as a replication front and +backend node. However, it is not guaranteed that jobs which are created by the +frontend part will necessarily run on the backend on the same node. + + +Frontend Description +-- + +The "frontend" consists of the parts which handle HTTP requests and monitor +`_replicator` databases for changes and then create `couch_jobs` replication +job records. Some of the modules involved in this are: + + * `couch_replicator` : Contains the main API "entry" point into the + `couch_replicator` application. The `replicate/2` function creates transient + replication jobs. `after_db_create/2`, `after_db_delete/2`, + `after_doc_write/6` functions are called from `couch_epi` callbacks to + create replication jobs from `_replicator` db events. Eventually they all + call `couch_replicator_jobs:add_job/3` to create a `couch_jobs` replication + job. Before the job is created, either the HTTP request body or the + `_replicator` doc body is parsed into a `Rep` map object. An important + property of this object is that it can be serialized to JSON and + deserialized from JSON. This object is saved in the `?REP` field of the + replication `couch_jobs` job data. Besides creating replication job + `couch_replicator` is also responsible for handling `_scheduler/jobs` and + `_scheduler/docs` monitoring API response. That happens in the `jobs/0`, + `job/1`, `docs/` and `doc/2` function. + +Backend Description +-- + +The "backend" consists of parts which run replication jobs, update their state, +and handle rescheduling on intermettent errors. All the job activity on these +nodes is ultumately driven from `couch_jobs` acceptors which wait in +`couch_jobs:accept/2` for replication jobs. + + * `couch_replicator_job_server` : A singleton process in charge of which + spawning and keeping track of `couch_replicator_job` processes. It ensures + there is a limited number of replication jobs running on each node. It + periodically accepts new jobs and stopping the oldest running ones in order + to give other pending jobs a chance to run. It runs this logic in the + `reschedule/1` function. That function is called with a frequency defined by + the `interval_sec` configuration setting. The other pramers which determine + how jobs start and stop are `max_jobs` and `max_churn`. The node will try to + limit running up to `max_jobs` job on average with periodic spikes of up to + `max_jobs + max_churn` job at a time, and it will try not to start more than + `max_churn` number of job during each rescheduling cycle. + + * `couch_replicator_connection`: Maintains a global replication connection pool. It allows reusing connections across replication tasks. The main interface is `acquire/1` and `release/1`. The general idea is once a connection is established, it is kept around for @@ -62,224 +88,8 @@ A description of each child: interval is updated accordingly on each call to `failure/1` or `success/1` calls. For a successful request, a client should call `success/1`. Whenever a 429 response is received the client should call `failure/1`. When no - failures are happening the code ensures the ETS tables are empty in - order to have a lower impact on a running system. - - * `couch_replicator_scheduler`: This is the core component of the scheduling - replicator. It's main task is to switch between replication jobs, by - stopping some and starting others to ensure all of them make progress. - Replication jobs which fail are penalized using an exponential backoff. - That is, each consecutive failure will double the time penalty. This frees - up system resources for more useful work than just continuously trying to - run the same subset of failing jobs. - - The main API function is `add_job/1`. Its argument is an instance of the - `#rep{}` record, which could be the result of a document update from a - `_replicator` db or the result of a POST to `_replicate` endpoint. - - Each job internally is represented by the `#job{}` record. It contains the - original `#rep{}` but also, maintains an event history. The history is a - sequence of past events for each job. These are timestamped and ordered - such that the most recent event is at the head. History length is limited - based on the `replicator.max_history` configuration value. The default is - 20 entries. History events types are: - - * `added` : job was just added to the scheduler. This is the first event. - * `started` : job was started. This was an attempt to run the job. - * `stopped` : job was stopped by the scheduler. - * `crashed` : job has crashed (instead of stopping cleanly). - - The core of the scheduling algorithm is the `reschedule/1` function. This - function is called every `replicator.interval` milliseconds (default is - 60000 i.e. a minute). During each call the scheduler will try to stop some - jobs, start some new ones and will also try to keep the maximum number of - jobs running less than `replicator.max_jobs` (default 500). So the - functions does these operations (actual code paste): - - ``` - Running = running_job_count(), - Pending = pending_job_count(), - stop_excess_jobs(State, Running), - start_pending_jobs(State, Running, Pending), - rotate_jobs(State, Running, Pending), - update_running_jobs_stats(State#state.stats_pid) - ``` - - `Running` is the total number of currently running jobs. `Pending` is the - total number of jobs waiting to be run. `stop_excess_jobs` will stop any - exceeding the `replicator.max_jobs` configured limit. This code takes - effect if user reduces the `max_jobs` configuration value. - `start_pending_jobs` will start any jobs if there is more room available. - This will take effect on startup or when user increases the `max_jobs` - configuration value. `rotate_jobs` is where all the action happens. The - scheduler picks `replicator.max_churn` running jobs to stop and then picks - the same number of pending jobs to start. The default value of `max_churn` - is 20. So by default every minute, 20 running jobs are stopped, and 20 new - pending jobs are started. - - Before moving on it is worth pointing out that scheduler treats continuous - and non-continuous replications differently. Normal (non-continuous) - replications once started will be allowed to run to completion. That - behavior is to preserve their semantics of replicating a snapshot of the - source database to the target. For example if new documents are added to - the source after the replication are started, those updates should not show - up on the target database. Stopping and restarting a normal replication - would violate that constraint. The only exception to the rule is the user - explicitly reduces `replicator.max_jobs` configuration value. Even then - scheduler will first attempt to stop as many continuous jobs as possible - and only if it has no choice left will it stop normal jobs. - - Keeping that in mind and going back to the scheduling algorithm, the next - interesting part is how the scheduler picks which jobs to stop and which - ones to start: - - * Stopping: When picking jobs to stop the scheduler will pick longest - running continuous jobs first. The sorting callback function to get the - longest running jobs is unsurprisingly called `longest_running/2`. To - pick the longest running jobs it looks at the most recent `started` - event. After it gets a sorted list by longest running, it simply picks - first few depending on the value of `max_churn` using `lists:sublist/2`. - Then those jobs are stopped. - - * Starting: When starting the scheduler will pick the jobs which have been - waiting the longest. Surprisingly, in this case it also looks at the - `started` timestamp and picks the jobs which have the oldest `started` - timestamp. If there are 3 jobs, A[started=10], B[started=7], - C[started=9], then B will be picked first, then C then A. This ensures - that jobs are not starved, which is a classic scheduling pitfall. - - In the code, the list of pending jobs is picked slightly differently than - how the list of running jobs is picked. `pending_jobs/1` uses `ets:foldl` - to iterate over all the pending jobs. As it iterates it tries to keep only - up to `max_churn` oldest items in the accumulator. The reason this is done - is that there could be a very large number of pending jobs and loading them - all in a list (making a copy from ETS) and then sorting it can be quite - expensive performance-wise. The tricky part of the iteration is happening - in `pending_maybe_replace/2`. A `gb_sets` ordered set is used to keep top-N - longest waiting jobs so far. The code has a comment with a helpful example - on how this algorithm works. - - The last part is how the scheduler treats jobs which keep crashing. If a - job is started but then crashes then that job is considered unhealthy. The - main idea is to penalize such jobs such that they are forced to wait an - exponentially larger amount of time with each consecutive crash. A central - part to this algorithm is determining what forms a sequence of consecutive - crashes. If a job starts then quickly crashes, and after its next start it - crashes again, then that would become a sequence of 2 consecutive crashes. - The penalty then would be calculated by `backoff_micros/1` function where - the consecutive crash count would end up as the exponent. However for - practical concerns there is also maximum penalty specified and that's the - equivalent of 10 consecutive crashes. Timewise it ends up being about 8 - hours. That means even a job which keep crashing will still get a chance to - retry once in 8 hours. - - There is subtlety when calculating consecutive crashes and that is deciding - when the sequence stops. That is, figuring out when a job becomes healthy - again. The scheduler considers a job healthy again if it started and hasn't - crashed in a while. The "in a while" part is a configuration parameter - `replicator.health_threshold` defaulting to 2 minutes. This means if job - has been crashing, for example 5 times in a row, but then on the 6th - attempt it started and ran for more than 2 minutes then it is considered - healthy again. The next time it crashes its sequence of consecutive crashes - will restart at 1. - - * `couch_replicator_scheduler_sup`: This module is a supervisor for running - replication tasks. The most interesting thing about it is perhaps that it is - not used to restart children. The scheduler handles restarts and error - handling backoffs. - - * `couch_replicator_doc_processor`: The doc processor component is in charge - of processing replication document updates, turning them into replication - jobs and adding those jobs to the scheduler. Unfortunately the only reason - there is even a `couch_replicator_doc_processor` gen_server, instead of - replication documents being turned to jobs and inserted into the scheduler - directly, is because of one corner case -- filtered replications using - custom (JavaScript mostly) filters. More about this later. It is better to - start with how updates flow through the doc processor: - - Document updates come via the `db_change/3` callback from - `couch_multidb_changes`, then go to the `process_change/2` function. - - In `process_change/2` a few decisions are made regarding how to proceed. The - first is "ownership" check. That is a check if the replication document - belongs on the current node. If not, then it is ignored. In a cluster, in - general there would be N copies of a document change and we only want to run - the replication once. Another check is to see if the update has arrived - during a time when the cluster is considered "unstable". If so, it is - ignored, because soon enough a rescan will be launched and all the documents - will be reprocessed anyway. Another noteworthy thing in `process_change/2` - is handling of upgrades from the previous version of the replicator when - transient states were written to the documents. Two such states were - `triggered` and `error`. Both of those states are removed from the document - then then update proceeds in the regular fashion. `failed` documents are - also ignored here. `failed` is a terminal state which indicates the document - was somehow unsuitable to become a replication job (it was malformed or a - duplicate). Otherwise the state update proceeds to `process_updated/2`. - - `process_updated/2` is where replication document updates are parsed and - translated to `#rep{}` records. The interesting part here is that the - replication ID isn't calculated yet. Unsurprisingly the parsing function - used is called `parse_rep_doc_without_id/1`. Also note that up until now - everything is still running in the context of the `db_change/3` callback. - After replication filter type is determined the update gets passed to the - `couch_replicator_doc_processor` gen_server. - - The `couch_replicator_doc_processor` gen_server's main role is to try to - calculate replication IDs for each `#rep{}` record passed to it, then add - that as a scheduler job. As noted before, `#rep{}` records parsed up until - this point lack a replication ID. The reason is replication ID calculation - includes a hash of the filter code. And because user defined replication - filters live in the source DB, which most likely involves a remote network - fetch there is a possibility of blocking and a need to handle various - network failures and retries. Because of that `replication_doc_processor` - dispatches all of that blocking and retrying to a separate `worker` process - (`couch_replicator_doc_processor_worker` module). - - `couch_replicator_doc_processor_worker` is where replication IDs are - calculated for each individual doc update. There are two separate modules - which contain utilities related to replication ID calculation: - `couch_replicator_ids` and `couch_replicator_filters`. The first one - contains ID calculation algorithms and the second one knows how to parse and - fetch user filters from a remote source DB. One interesting thing about the - worker is that it is time-bounded and is guaranteed to not be stuck forever. - That's why it spawns an extra process with `spawn_monitor`, just so it can - do an `after` clause in receive and bound the maximum time this worker will - take. - - A doc processor worker will either succeed or fail but never block for too - long. Success and failure are returned as exit values. Those are handled in - the `worker_returned/3` doc processor clauses. The most common pattern is - that a worker is spawned to add a replication job, it does so and returns a - `{ok, ReplicationID}` value in `worker_returned`. - - In case of a filtered replication with custom user code there are two case to - consider: - - 1. Filter fetching code has failed. In that case worker returns an error. - But because the error could be a transient network error, another - worker is started to try again. It could fail and return an error - again, then another one is started and so on. However each consecutive - worker will do an exponential backoff, not unlike the scheduler code. - `error_backoff/1` is where the backoff period is calculated. - Consecutive errors are held in the `errcnt` field in the ETS table. - - 2. Fetching filter code succeeds, replication ID is calculated and job is - added to the scheduler. However, because this is a filtered replication - the source database could get an updated filter. Which means - replication ID could change again. So the worker is spawned to - periodically check the filter and see if it changed. In other words doc - processor will do the work of checking for filtered replications, get - an updated filter and will then refresh the replication job (remove the - old one and add a new one with a different ID). The filter checking - interval is determined by the `filter_backoff` function. An unusual - thing about that function is it calculates the period based on the size - of the ETS table. The idea there is for a few replications in a - cluster, it's ok to check filter changes often. But when there are lots - of replications running, having each one checking their filter often is - not a good idea. + failures are happening the code ensures the ETS tables are empty in order + to have a lower impact on a running system. - * `couch_replicator_db_changes`: This process specializes and configures - `couch_multidb_changes` so that it looks for `_replicator` suffixed shards - and makes sure to restart it when node membership changes. diff --git a/src/couch_replicator/include/couch_replicator_api_wrap.hrl b/src/couch_replicator/include/couch_replicator_api_wrap.hrl index 0f8213c51..6212ab412 100644 --- a/src/couch_replicator/include/couch_replicator_api_wrap.hrl +++ b/src/couch_replicator/include/couch_replicator_api_wrap.hrl @@ -14,7 +14,7 @@ -record(httpdb, { url, - auth_props = [], + auth_props = #{}, headers = [ {"Accept", "application/json"}, {"User-Agent", "CouchDB-Replicator/" ++ couch_server:get_version()} diff --git a/src/couch_replicator/priv/stats_descriptions.cfg b/src/couch_replicator/priv/stats_descriptions.cfg index d9efb91dc..1bb151c1c 100644 --- a/src/couch_replicator/priv/stats_descriptions.cfg +++ b/src/couch_replicator/priv/stats_descriptions.cfg @@ -54,14 +54,6 @@ {type, counter}, {desc, <<"number of replicator workers started">>} ]}. -{[couch_replicator, cluster_is_stable], [ - {type, gauge}, - {desc, <<"1 if cluster is stable, 0 if unstable">>} -]}. -{[couch_replicator, db_scans], [ - {type, counter}, - {desc, <<"number of times replicator db scans have been started">>} -]}. {[couch_replicator, docs, dbs_created], [ {type, counter}, {desc, <<"number of db shard creations seen by replicator doc processor">>} @@ -70,10 +62,6 @@ {type, counter}, {desc, <<"number of db shard deletions seen by replicator doc processor">>} ]}. -{[couch_replicator, docs, dbs_found], [ - {type, counter}, - {desc, <<"number of db shard found by replicator doc processor">>} -]}. {[couch_replicator, docs, db_changes], [ {type, counter}, {desc, <<"number of db changes processed by replicator doc processor">>} @@ -88,43 +76,43 @@ ]}. {[couch_replicator, jobs, adds], [ {type, counter}, - {desc, <<"number of jobs added to replicator scheduler">>} + {desc, <<"number of jobs added to replicator">>} ]}. -{[couch_replicator, jobs, duplicate_adds], [ +{[couch_replicator, jobs, removes], [ {type, counter}, - {desc, <<"number of duplicate jobs added to replicator scheduler">>} + {desc, <<"number of jobs removed from replicator">>} ]}. -{[couch_replicator, jobs, removes], [ +{[couch_replicator, jobs, accepts], [ {type, counter}, - {desc, <<"number of jobs removed from replicator scheduler">>} + {desc, <<"number of job acceptors started">>} ]}. {[couch_replicator, jobs, starts], [ {type, counter}, - {desc, <<"number of jobs started by replicator scheduler">>} + {desc, <<"number of jobs started by replicator">>} ]}. {[couch_replicator, jobs, stops], [ {type, counter}, - {desc, <<"number of jobs stopped by replicator scheduler">>} + {desc, <<"number of jobs stopped by replicator">>} ]}. {[couch_replicator, jobs, crashes], [ {type, counter}, - {desc, <<"number of job crashed noticed by replicator scheduler">>} + {desc, <<"number of job crashed noticed by replicator">>} ]}. {[couch_replicator, jobs, running], [ {type, gauge}, - {desc, <<"replicator scheduler running jobs">>} + {desc, <<"replicator running jobs">>} ]}. -{[couch_replicator, jobs, pending], [ +{[couch_replicator, jobs, accepting], [ {type, gauge}, - {desc, <<"replicator scheduler pending jobs">>} + {desc, <<"replicator acceptors count">>} ]}. -{[couch_replicator, jobs, crashed], [ - {type, gauge}, - {desc, <<"replicator scheduler crashed jobs">>} +{[couch_replicator, jobs, reschedules], [ + {type, counter}, + {desc, <<"replicator reschedule cycles counter">>} ]}. -{[couch_replicator, jobs, total], [ +{[couch_replicator, jobs, pending], [ {type, gauge}, - {desc, <<"total number of replicator scheduler jobs">>} + {desc, <<"replicator pending count">>} ]}. {[couch_replicator, connection, acquires], [ {type, counter}, diff --git a/src/couch_replicator/src/couch_replicator.app.src b/src/couch_replicator/src/couch_replicator.app.src index 2e0e191d3..81789f155 100644 --- a/src/couch_replicator/src/couch_replicator.app.src +++ b/src/couch_replicator/src/couch_replicator.app.src @@ -18,20 +18,15 @@ couch_replicator_sup, couch_replicator_rate_limiter, couch_replicator_connection, - couch_replication, % couch_replication_event gen_event - couch_replicator_clustering, - couch_replicator_scheduler, - couch_replicator_scheduler_sup, - couch_replicator_doc_processor + couch_replicator_job_server ]}, {applications, [ kernel, stdlib, couch_log, - mem3, config, couch, - couch_event, - couch_stats + couch_stats, + couch_jobs ]} ]}. diff --git a/src/couch_replicator/src/couch_replicator.erl b/src/couch_replicator/src/couch_replicator.erl index b38f31b59..a690d37c3 100644 --- a/src/couch_replicator/src/couch_replicator.erl +++ b/src/couch_replicator/src/couch_replicator.erl @@ -14,285 +14,513 @@ -export([ replicate/2, - replication_states/0, + + jobs/0, job/1, - doc/3, - active_doc/2, - info_from_doc/2, - restart_job/1 + docs/2, + doc/2, + + after_db_create/2, + after_db_delete/2, + after_doc_write/6, + + ensure_rep_db_exists/0, + + rescan_jobs/0, + rescan_jobs/1, + reenqueue_jobs/0, + reenqueue_jobs/1, + remove_jobs/0, + get_job_ids/0 ]). + +-include_lib("ibrowse/include/ibrowse.hrl"). -include_lib("couch/include/couch_db.hrl"). -include("couch_replicator.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). --include_lib("mem3/include/mem3.hrl"). - --define(DESIGN_DOC_CREATION_DELAY_MSEC, 1000). --define(REPLICATION_STATES, [ - initializing, % Just added to scheduler - error, % Could not be turned into a replication job - running, % Scheduled and running - pending, % Scheduled and waiting to run - crashing, % Scheduled but crashing, backed off by the scheduler - completed, % Non-continuous (normal) completed replication - failed % Terminal failure, will not be retried anymore -]). - --import(couch_util, [ - get_value/2, - get_value/3 -]). -spec replicate({[_]}, any()) -> {ok, {continuous, binary()}} | - {ok, {[_]}} | + {ok, #{}} | {ok, {cancelled, binary()}} | {error, any()} | no_return(). -replicate(PostBody, Ctx) -> - {ok, Rep0} = couch_replicator_utils:parse_rep_doc(PostBody, Ctx), - Rep = Rep0#rep{start_time = os:timestamp()}, - #rep{id = RepId, options = Options, user_ctx = UserCtx} = Rep, - case get_value(cancel, Options, false) of - true -> - CancelRepId = case get_value(id, Options, nil) of - nil -> - RepId; - RepId2 -> - RepId2 - end, - case check_authorization(CancelRepId, UserCtx) of - ok -> - cancel_replication(CancelRepId); - not_found -> - {error, not_found} - end; - false -> - check_authorization(RepId, UserCtx), - {ok, Listener} = rep_result_listener(RepId), - Result = do_replication_loop(Rep), - couch_replicator_notifier:stop(Listener), - Result +replicate(Body, #user_ctx{name = User} = UserCtx) -> + {ok, Id, Rep} = couch_replicator_parse:parse_transient_rep(Body, User), + #{?OPTIONS := Options} = Rep, + JobId = case couch_replicator_jobs:get_job_id(undefined, Id) of + {ok, JobId0} -> JobId0; + {error, not_found} -> Id + end, + case maps:get(<<"cancel">>, Options, false) of + true -> + case check_authorization(JobId, UserCtx) of + ok -> cancel_replication(JobId); + not_found -> {error, not_found} + end; + false -> + check_authorization(JobId, UserCtx), + ok = start_transient_job(JobId, Rep), + case maps:get(<<"continuous">>, Options, false) of + true -> + case couch_replicator_jobs:wait_running(JobId) of + {ok, #{?STATE := ?ST_RUNNING} = JobData} -> + {ok, {continuous, maps:get(?REP_ID, JobData)}}; + {ok, #{?STATE := ?ST_FAILED} = JobData} -> + {error, maps:get(?STATE_INFO, JobData)}; + {error, Error} -> + {error, Error} + end; + false -> + case couch_replicator_jobs:wait_result(JobId) of + {ok, #{?STATE := ?ST_COMPLETED} = JobData} -> + {ok, maps:get(?CHECKPOINT_HISTORY, JobData)}; + {ok, #{?STATE := ?ST_FAILED} = JobData} -> + {error, maps:get(?STATE_INFO, JobData)}; + {error, Error} -> + {error, Error} + end + end end. --spec do_replication_loop(#rep{}) -> - {ok, {continuous, binary()}} | {ok, tuple()} | {error, any()}. -do_replication_loop(#rep{id = {BaseId, Ext} = Id, options = Options} = Rep) -> - ok = couch_replicator_scheduler:add_job(Rep), - case get_value(continuous, Options, false) of - true -> - {ok, {continuous, ?l2b(BaseId ++ Ext)}}; - false -> - wait_for_result(Id) +jobs() -> + FoldFun = fun(_JTx, _JobId, CouchJobsState, JobData, Acc) -> + case CouchJobsState of + pending -> [job_ejson(JobData) | Acc]; + running -> [job_ejson(JobData) | Acc]; + finished -> Acc + end + end, + couch_replicator_jobs:fold_jobs(undefined, FoldFun, []). + + +job(Id0) when is_binary(Id0) -> + Id1 = couch_replicator_ids:convert(Id0), + JobId = case couch_replicator_jobs:get_job_id(undefined, Id1) of + {ok, JobId0} -> JobId0; + {error, not_found} -> Id1 + end, + case couch_replicator_jobs:get_job_data(undefined, JobId) of + {ok, #{} = JobData} -> {ok, job_ejson(JobData)}; + {error, not_found} -> {error, not_found} end. --spec rep_result_listener(rep_id()) -> {ok, pid()}. -rep_result_listener(RepId) -> - ReplyTo = self(), - {ok, _Listener} = couch_replicator_notifier:start_link( - fun({_, RepId2, _} = Ev) when RepId2 =:= RepId -> - ReplyTo ! Ev; - (_) -> - ok - end). +docs(#{} = Db, States) when is_list(States) -> + DbName = fabric2_db:name(Db), + FoldFun = fun(_JTx, _JobId, _, JobData, Acc) -> + case JobData of + #{?DB_NAME := DbName, ?STATE := State} -> + case {States, lists:member(State, States)} of + {[], _} -> [doc_ejson(JobData) | Acc]; + {[_ | _], true} -> [doc_ejson(JobData) | Acc]; + {[_ | _], false} -> Acc + end; + #{} -> + Acc + end + end, + couch_replicator_jobs:fold_jobs(undefined, FoldFun, []). --spec wait_for_result(rep_id()) -> - {ok, {[_]}} | {error, any()}. -wait_for_result(RepId) -> - receive - {finished, RepId, RepResult} -> - {ok, RepResult}; - {error, RepId, Reason} -> - {error, Reason} +doc(#{} = Db, DocId) when is_binary(DocId) -> + DbUUID = fabric2_db:get_uuid(Db), + JobId = couch_replicator_ids:job_id(DbUUID, DocId), + case couch_replicator_jobs:get_job_data(undefined, JobId) of + {ok, #{} = JobData} -> {ok, doc_ejson(JobData)}; + {error, not_found} -> {error, not_found} end. --spec cancel_replication(rep_id()) -> - {ok, {cancelled, binary()}} | {error, not_found}. -cancel_replication({BasedId, Extension} = RepId) -> - FullRepId = BasedId ++ Extension, - couch_log:notice("Canceling replication '~s' ...", [FullRepId]), - case couch_replicator_scheduler:rep_state(RepId) of - #rep{} -> - ok = couch_replicator_scheduler:remove_job(RepId), - couch_log:notice("Replication '~s' cancelled", [FullRepId]), - {ok, {cancelled, ?l2b(FullRepId)}}; - nil -> - couch_log:notice("Replication '~s' not found", [FullRepId]), - {error, not_found} - end. +after_db_create(DbName, DbUUID) when ?IS_REP_DB(DbName)-> + couch_stats:increment_counter([couch_replicator, docs, dbs_created]), + try fabric2_db:open(DbName, [{uuid, DbUUID}, ?ADMIN_CTX]) of + {ok, Db} -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + ok = add_jobs_from_db(TxDb) + end) + catch + error:database_does_not_exist -> + ok + end; +after_db_create(_DbName, _DbUUID) -> + ok. --spec replication_states() -> [atom()]. -replication_states() -> - ?REPLICATION_STATES. +after_db_delete(DbName, DbUUID) when ?IS_REP_DB(DbName) -> + couch_stats:increment_counter([couch_replicator, docs, dbs_deleted]), + FoldFun = fun(JTx, JobId, _, JobData, ok) -> + case JobData of + #{?DB_UUID := DbUUID} -> + ok = couch_replicator_jobs:remove_job(JTx, JobId); + #{} -> + ok + end + end, + couch_replicator_jobs:fold_jobs(undefined, FoldFun, ok); + +after_db_delete(_DbName, _DbUUID) -> + ok. + + +after_doc_write(#{name := DbName} = Db, #doc{} = Doc, _NewWinner, _OldWinner, + _NewRevId, _Seq) when ?IS_REP_DB(DbName) -> + couch_stats:increment_counter([couch_replicator, docs, db_changes]), + {Props} = Doc#doc.body, + case couch_util:get_value(?REPLICATION_STATE, Props) of + ?ST_COMPLETED -> ok; + ?ST_FAILED -> ok; + _ -> process_change(Db, Doc) + end; + +after_doc_write(_Db, _Doc, _NewWinner, _OldWinner, _NewRevId, _Seq) -> + ok. + + +% This is called from supervisor, must return ignore. +-spec ensure_rep_db_exists() -> ignore. +ensure_rep_db_exists() -> + couch_replicator_jobs:set_timeout(), + case config:get_boolean("replicator", "create_replicator_db", false) of + true -> + UserCtx = #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}, + Opts = [{user_ctx, UserCtx}, sys_db], + case fabric2_db:create(?REP_DB_NAME, Opts) of + {error, file_exists} -> ok; + {ok, _Db} -> ok + end; + false -> + ok + end, + ignore. --spec strip_url_creds(binary() | {[_]}) -> binary(). -strip_url_creds(Endpoint) -> - try - couch_replicator_docs:parse_rep_db(Endpoint, [], []) of - #httpdb{url = Url} -> - iolist_to_binary(couch_util:url_strip_password(Url)) + +% Testing and debug functions + +rescan_jobs() -> + rescan_jobs(?REP_DB_NAME). + + +rescan_jobs(DbName) when is_binary(DbName), ?IS_REP_DB(DbName) -> + try fabric2_db:open(DbName, [?ADMIN_CTX]) of + {ok, Db} -> + after_db_create(DbName, fabric2_db:get_uuid(Db)) catch - throw:{error, local_endpoints_not_supported} -> - Endpoint + error:database_does_not_exist -> + database_does_not_exist end. --spec job(binary()) -> {ok, {[_]}} | {error, not_found}. -job(JobId0) when is_binary(JobId0) -> - JobId = couch_replicator_ids:convert(JobId0), - {Res, _Bad} = rpc:multicall(couch_replicator_scheduler, job, [JobId]), - case [JobInfo || {ok, JobInfo} <- Res] of - [JobInfo| _] -> - {ok, JobInfo}; - [] -> - {error, not_found} - end. +reenqueue_jobs() -> + reenqueue_jobs(?REP_DB_NAME). --spec restart_job(binary() | list() | rep_id()) -> - {ok, {[_]}} | {error, not_found}. -restart_job(JobId0) -> - JobId = couch_replicator_ids:convert(JobId0), - {Res, _} = rpc:multicall(couch_replicator_scheduler, restart_job, [JobId]), - case [JobInfo || {ok, JobInfo} <- Res] of - [JobInfo| _] -> - {ok, JobInfo}; - [] -> - {error, not_found} +reenqueue_jobs(DbName) when is_binary(DbName), ?IS_REP_DB(DbName) -> + try fabric2_db:open(DbName, [?ADMIN_CTX]) of + {ok, Db} -> + DbUUID = fabric2_db:get_uuid(Db), + ok = after_db_delete(DbName, DbUUID), + ok = after_db_create(DbName, DbUUID) + catch + error:database_does_not_exist -> + database_does_not_exist end. --spec active_doc(binary(), binary()) -> {ok, {[_]}} | {error, not_found}. -active_doc(DbName, DocId) -> - try - Shards = mem3:shards(DbName), - Live = [node() | nodes()], - Nodes = lists:usort([N || #shard{node=N} <- Shards, - lists:member(N, Live)]), - Owner = mem3:owner(DbName, DocId, Nodes), - case active_doc_rpc(DbName, DocId, [Owner]) of - {ok, DocInfo} -> - {ok, DocInfo}; +remove_jobs() -> + % If we clear a large number of jobs make sure to use batching so we don't + % take too long, if use individual transactions, and also don't timeout if + % use a single transaction + FoldFun = fun + (_, JobId, _, _, Acc) when length(Acc) > 250 -> + couch_replicator_jobs:remove_jobs(undefined, [JobId | Acc]); + (_, JobId, _, _, Acc) -> + [JobId | Acc] + end, + Acc = couch_replicator_jobs:fold_jobs(undefined, FoldFun, []), + [] = couch_replicator_jobs:remove_jobs(undefined, Acc), + ok. + + +get_job_ids() -> + couch_replicator_jobs:get_job_ids(undefined). + + +% Private functions + +-spec start_transient_job(binary(), #{}) -> ok. +start_transient_job(JobId, #{} = Rep) -> + JobData = couch_replicator_jobs:new_job(Rep, null, null, null, + ?ST_PENDING, null, null), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + case couch_replicator_jobs:get_job_data(JTx, JobId) of + {ok, #{?REP := OldRep, ?STATE := State}} -> + SameRep = couch_replicator_utils:compare_reps(Rep, OldRep), + Active = State =:= ?ST_PENDING orelse State =:= ?ST_RUNNING, + case SameRep andalso Active of + true -> + % If a job with the same paremeters is running we don't + % stop and just ignore the request. This is mainly for + % compatibility where users are able to idempotently + % POST the same job without it being stopped and + % restarted. + ok; + false -> + couch_replicator_jobs:add_job(JTx, JobId, JobData) + end; {error, not_found} -> - active_doc_rpc(DbName, DocId, Nodes -- [Owner]) + ok = couch_replicator_jobs:add_job(JTx, JobId, JobData) end - catch - % Might be a local database - error:database_does_not_exist -> - active_doc_rpc(DbName, DocId, [node()]) - end. + end). --spec active_doc_rpc(binary(), binary(), [node()]) -> - {ok, {[_]}} | {error, not_found}. -active_doc_rpc(_DbName, _DocId, []) -> - {error, not_found}; -active_doc_rpc(DbName, DocId, [Node]) when Node =:= node() -> - couch_replicator_doc_processor:doc(DbName, DocId); -active_doc_rpc(DbName, DocId, Nodes) -> - {Res, _Bad} = rpc:multicall(Nodes, couch_replicator_doc_processor, doc, - [DbName, DocId]), - case [DocInfo || {ok, DocInfo} <- Res] of - [DocInfo | _] -> - {ok, DocInfo}; - [] -> - {error, not_found} - end. +-spec cancel_replication(job_id()) -> + {ok, {cancelled, binary()}} | {error, not_found}. +cancel_replication(JobId) when is_binary(JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + Id = case couch_replicator_jobs:get_job_data(JTx, JobId) of + {ok, #{?REP_ID := RepId}} when is_binary(RepId) -> + RepId; + _ -> + JobId + end, + couch_log:notice("Canceling replication '~s'", [Id]), + case couch_replicator_jobs:remove_job(JTx, JobId) of + {error, not_found} -> + {error, not_found}; + ok -> + {ok, {cancelled, Id}} + end + end). --spec doc(binary(), binary(), any()) -> {ok, {[_]}} | {error, not_found}. -doc(RepDb, DocId, UserCtx) -> - case active_doc(RepDb, DocId) of - {ok, DocInfo} -> - {ok, DocInfo}; - {error, not_found} -> - doc_from_db(RepDb, DocId, UserCtx) - end. +process_change(_Db, #doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>}) -> + ok; +process_change(#{} = Db, #doc{deleted = true} = Doc) -> + DbUUID = fabric2_db:get_uuid(Db), + JobId = couch_replicator_ids:job_id(DbUUID, Doc#doc.id), + couch_replicator_jobs:remove_job(undefined, JobId); --spec doc_from_db(binary(), binary(), any()) -> {ok, {[_]}} | {error, not_found}. -doc_from_db(RepDb, DocId, UserCtx) -> - case fabric:open_doc(RepDb, DocId, [UserCtx, ejson_body]) of - {ok, Doc} -> - {ok, info_from_doc(RepDb, couch_doc:to_json_obj(Doc, []))}; - {not_found, _Reason} -> - {error, not_found} - end. +process_change(#{} = Db, #doc{deleted = false} = Doc) -> + #doc{id = DocId, body = {Props} = Body} = Doc, + DbName = fabric2_db:name(Db), + DbUUID = fabric2_db:get_uuid(Db), + {Rep, DocState, Error} = try + Rep0 = couch_replicator_parse:parse_rep_doc(Body), + DocState0 = couch_util:get_value(?REPLICATION_STATE, Props, null), + {Rep0, DocState0, null} + catch + throw:{bad_rep_doc, Reason} -> + {null, null, couch_replicator_utils:rep_error_to_binary(Reason)} + end, + JobId = couch_replicator_ids:job_id(DbUUID, DocId), + JobData = case Rep of + null -> + couch_relicator_jobs:new_job(Rep, DbName, DbUUID, DocId, + ?ST_FAILED, Error, null); + #{} -> + couch_replicator_jobs:new_job(Rep, DbName, DbUUID, DocId, + ?ST_PENDING, null, DocState) + end, + LogMsg = "~p : replication doc update db:~s doc:~s job_id:~s doc_state:~s", + couch_log:notice(LogMsg, [?MODULE, DbName, DocId, JobId, DocState]), + + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Db), fun(JTx) -> + case couch_replicator_jobs:get_job_data(JTx, JobId) of + {ok, #{?REP := null, ?STATE_INFO := Error}} when Rep =:= null -> + % Same error as before occurred, don't bother updating the job + ok; + {ok, #{?REP := null}} when Rep =:= null -> + % New error so the job is updated + couch_replicator_jobs:add_job(JTx, JobId, JobData); + {ok, #{?REP := OldRep, ?STATE := State}} when is_map(Rep) -> + SameRep = couch_replicator_utils:compare_reps(Rep, OldRep), + Active = State =:= ?ST_PENDING orelse State =:= ?ST_RUNNING, + case SameRep andalso Active of + true -> + % Document was changed but none of the parameters + % relevent for the replication job have changed, so + % make it a no-op + ok; + false -> + couch_replicator_jobs:add_job(JTx, JobId, JobData) + end; + {error, not_found} -> + couch_replicator_jobs:add_job(JTx, JobId, JobData) + end --spec info_from_doc(binary(), {[_]}) -> {[_]}. -info_from_doc(RepDb, {Props}) -> - DocId = get_value(<<"_id">>, Props), - Source = get_value(<<"source">>, Props), - Target = get_value(<<"target">>, Props), - State0 = state_atom(get_value(<<"_replication_state">>, Props, null)), - StateTime = get_value(<<"_replication_state_time">>, Props, null), - {State1, StateInfo, ErrorCount, StartTime} = case State0 of - completed -> - {InfoP} = get_value(<<"_replication_stats">>, Props, {[]}), - case lists:keytake(<<"start_time">>, 1, InfoP) of - {value, {_, Time}, InfoP1} -> - {State0, {InfoP1}, 0, Time}; - false -> - case lists:keytake(start_time, 1, InfoP) of - {value, {_, Time}, InfoP1} -> - {State0, {InfoP1}, 0, Time}; - false -> - {State0, {InfoP}, 0, null} - end - end; - failed -> - Info = get_value(<<"_replication_state_reason">>, Props, nil), - EJsonInfo = couch_replicator_utils:ejson_state_info(Info), - {State0, EJsonInfo, 1, StateTime}; - _OtherState -> - {null, null, 0, null} + end). + + +-spec add_jobs_from_db(#{}) -> ok. +add_jobs_from_db(#{} = TxDb) -> + FoldFun = fun + ({meta, _Meta}, ok) -> + {ok, ok}; + (complete, ok) -> + {ok, ok}; + ({row, Row}, ok) -> + Db = TxDb#{tx := undefined}, + ok = process_change(Db, get_doc(TxDb, Row)), + {ok, ok} + end, + Opts = [{restart_tx, true}], + {ok, ok} = fabric2_db:fold_docs(TxDb, FoldFun, ok, Opts), + ok. + + +-spec get_doc(#{}, list()) -> #doc{}. +get_doc(TxDb, Row) -> + {_, DocId} = lists:keyfind(id, 1, Row), + {ok, #doc{deleted = false} = Doc} = fabric2_db:open_doc(TxDb, DocId, []), + Doc. + + +doc_ejson(#{} = JobData) -> + #{ + ?REP := Rep, + ?REP_ID := RepId, + ?DB_NAME := DbName, + ?DOC_ID := DocId, + ?STATE := State, + ?STATE_INFO := Info0, + ?ERROR_COUNT := ErrorCount, + ?LAST_UPDATED := LastUpdatedSec, + ?REP_NODE := Node, + ?REP_PID := Pid, + ?REP_STATS := Stats + } = JobData, + + #{ + ?SOURCE := #{<<"url">> := Source, <<"proxy_url">> := SourceProxy}, + ?TARGET := #{<<"url">> := Target, <<"proxy_url">> := TargetProxy}, + ?START_TIME := StartSec + } = Rep, + + LastUpdatedISO8601 = couch_replicator_utils:iso8601(LastUpdatedSec), + StartISO8601 = couch_replicator_utils:iso8601(StartSec), + + Info = case State of + ?ST_RUNNING -> Stats; + ?ST_PENDING -> Stats; + _Other -> Info0 end, - {[ - {doc_id, DocId}, - {database, RepDb}, - {id, null}, - {source, strip_url_creds(Source)}, - {target, strip_url_creds(Target)}, - {state, State1}, - {error_count, ErrorCount}, - {info, StateInfo}, - {start_time, StartTime}, - {last_updated, StateTime} - ]}. - - -state_atom(<<"triggered">>) -> - triggered; % This handles a legacy case were document wasn't converted yet -state_atom(State) when is_binary(State) -> - erlang:binary_to_existing_atom(State, utf8); -state_atom(State) when is_atom(State) -> - State. + + #{ + <<"id">> => RepId, + <<"database">> => DbName, + <<"doc_id">> => DocId, + <<"source">> => ejson_url(Source), + <<"target">> => ejson_url(Target), + <<"source_proxy">> => ejson_url(SourceProxy), + <<"target_proxy">> => ejson_url(TargetProxy), + <<"state">> => State, + <<"info">> => Info, + <<"error_count">> => ErrorCount, + <<"last_updated">> => LastUpdatedISO8601, + <<"start_time">> => StartISO8601, + <<"node">> => Node, + <<"pid">> => Pid + }. + + +job_ejson(#{} = JobData) -> + #{ + ?REP := Rep, + ?REP_ID := RepId, + ?DB_NAME := DbName, + ?DOC_ID := DocId, + ?STATE := State, + ?STATE_INFO := Info0, + ?JOB_HISTORY := History, + ?REP_STATS := Stats, + ?REP_NODE := Node, + ?REP_PID := Pid + } = JobData, + + #{ + ?SOURCE := #{<<"url">> := Source}, + ?TARGET := #{<<"url">> := Target}, + ?REP_USER := User, + ?START_TIME := StartSec + } = Rep, + + StartISO8601 = couch_replicator_utils:iso8601(StartSec), + + History1 = lists:map(fun(#{?HIST_TIMESTAMP := Ts} = Evt) -> + Evt#{?HIST_TIMESTAMP := couch_replicator_utils:iso8601(Ts)} + end, History), + + Info = case State of + ?ST_RUNNING -> Stats; + ?ST_PENDING -> Stats; + _Other -> Info0 + end, + + #{ + <<"id">> => RepId, + <<"database">> => DbName, + <<"doc_id">> => DocId, + <<"source">> => ejson_url(Source), + <<"target">> => ejson_url(Target), + <<"state">> => State, + <<"info">> => Info, + <<"user">> => User, + <<"history">> => History1, + <<"start_time">> => StartISO8601, + <<"node">> => Node, + <<"pid">> => Pid + }. + + +ejson_url(Url) when is_binary(Url) -> + strip_url_creds(Url); + +ejson_url(null) -> + null. + + +-spec strip_url_creds(binary()) -> binary() | null. +strip_url_creds(Url) -> + try + case ibrowse_lib:parse_url(binary_to_list(Url)) of + #url{} -> ok; + {error, Error} -> error(Error) + end, + iolist_to_binary(couch_util:url_strip_password(Url)) + catch + error:_ -> + % Avoid exposing any part of the URL in case there is a password in + % the malformed endpoint URL + null + end. -spec check_authorization(rep_id(), #user_ctx{}) -> ok | not_found. -check_authorization(RepId, #user_ctx{name = Name} = Ctx) -> - case couch_replicator_scheduler:rep_state(RepId) of - #rep{user_ctx = #user_ctx{name = Name}} -> - ok; - #rep{} -> - couch_httpd:verify_is_server_admin(Ctx); - nil -> - not_found +check_authorization(JobId, #user_ctx{} = Ctx) when is_binary(JobId) -> + #user_ctx{name = Name} = Ctx, + case couch_replicator_jobs:get_job_data(undefined, JobId) of + {error, not_found} -> + not_found; + {ok, #{?DB_NAME := DbName}} when is_binary(DbName) -> + throw({unauthorized, <<"Persistent replication collision">>}); + {ok, #{?REP := #{?REP_USER := Name}}} -> + ok; + {ok, #{}} -> + couch_httpd:verify_is_server_admin(Ctx) end. -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + authorization_test_() -> { @@ -300,52 +528,42 @@ authorization_test_() -> fun () -> ok end, fun (_) -> meck:unload() end, [ - t_admin_is_always_authorized(), - t_username_must_match(), - t_replication_not_found() + ?TDEF_FE(t_admin_is_always_authorized), + ?TDEF_FE(t_username_must_match), + ?TDEF_FE(t_replication_not_found) ] }. -t_admin_is_always_authorized() -> - ?_test(begin - expect_rep_user_ctx(<<"someuser">>, <<"_admin">>), - UserCtx = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, - ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx)) - end). +t_admin_is_always_authorized(_) -> + expect_job_data({ok, #{?REP => #{?REP_USER => <<"someuser">>}}}), + UserCtx = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, + ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx)). -t_username_must_match() -> - ?_test(begin - expect_rep_user_ctx(<<"user">>, <<"somerole">>), - UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, - ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx1)), - UserCtx2 = #user_ctx{name = <<"other">>, roles = [<<"somerole">>]}, - ?assertThrow({unauthorized, _}, check_authorization(<<"RepId">>, - UserCtx2)) - end). +t_username_must_match(_) -> + expect_job_data({ok, #{?REP => #{?REP_USER => <<"user1">>}}}), + UserCtx1 = #user_ctx{name = <<"user1">>, roles = [<<"somerole">>]}, + ?assertEqual(ok, check_authorization(<<"RepId">>, UserCtx1)), + UserCtx2 = #user_ctx{name = <<"other">>, roles = [<<"somerole">>]}, + ?assertThrow({unauthorized, _}, check_authorization(<<"RepId">>, + UserCtx2)). -t_replication_not_found() -> - ?_test(begin - meck:expect(couch_replicator_scheduler, rep_state, 1, nil), - UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, - ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx1)), - UserCtx2 = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, - ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx2)) - end). +t_replication_not_found(_) -> + expect_job_data({error, not_found}), + UserCtx1 = #user_ctx{name = <<"user">>, roles = [<<"somerole">>]}, + ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx1)), + UserCtx2 = #user_ctx{name = <<"adm">>, roles = [<<"_admin">>]}, + ?assertEqual(not_found, check_authorization(<<"RepId">>, UserCtx2)). -expect_rep_user_ctx(Name, Role) -> - meck:expect(couch_replicator_scheduler, rep_state, - fun(_Id) -> - UserCtx = #user_ctx{name = Name, roles = [Role]}, - #rep{user_ctx = UserCtx} - end). +expect_job_data(JobDataRes) -> + meck:expect(couch_replicator_jobs, get_job_data, 2, JobDataRes). strip_url_creds_test_() -> - { + { setup, fun() -> meck:expect(config, get, fun(_, _, Default) -> Default end) @@ -353,40 +571,39 @@ strip_url_creds_test_() -> fun(_) -> meck:unload() end, - [ - t_strip_http_basic_creds(), - t_strip_http_props_creds(), - t_strip_local_db_creds() - ] + with([ + ?TDEF(t_strip_http_basic_creds), + ?TDEF(t_strip_url_creds_errors) + ]) }. -t_strip_local_db_creds() -> - ?_test(?assertEqual(<<"localdb">>, strip_url_creds(<<"localdb">>))). +t_strip_http_basic_creds(_) -> + Url1 = <<"http://adm:pass@host/db/">>, + ?assertEqual(<<"http://adm:*****@host/db/">>, strip_url_creds(Url1)), + Url2 = <<"https://adm:pass@host/db/">>, + ?assertEqual(<<"https://adm:*****@host/db/">>, strip_url_creds(Url2)), + Url3 = <<"http://adm:pass@host:80/db/">>, + ?assertEqual(<<"http://adm:*****@host:80/db/">>, strip_url_creds(Url3)), + Url4 = <<"http://adm:pass@host/db?a=b&c=d">>, + ?assertEqual(<<"http://adm:*****@host/db?a=b&c=d">>, + strip_url_creds(Url4)). + + +t_strip_url_creds_errors(_) -> + Bad1 = <<"http://adm:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad1)), + Bad2 = <<"more garbage">>, + ?assertEqual(null, strip_url_creds(Bad2)), + Bad3 = <<"http://a:b:c">>, + ?assertEqual(null, strip_url_creds(Bad3)), + Bad4 = <<"http://adm:pass:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad4)), + ?assertEqual(null, strip_url_creds(null)), + ?assertEqual(null, strip_url_creds(42)), + ?assertEqual(null, strip_url_creds([<<"a">>, <<"b">>])), + Bad5 = <<"http://adm:pass/bad">>, + ?assertEqual(null, strip_url_creds(Bad5)). -t_strip_http_basic_creds() -> - ?_test(begin - Url1 = <<"http://adm:pass@host/db">>, - ?assertEqual(<<"http://adm:*****@host/db/">>, strip_url_creds(Url1)), - Url2 = <<"https://adm:pass@host/db">>, - ?assertEqual(<<"https://adm:*****@host/db/">>, strip_url_creds(Url2)), - Url3 = <<"http://adm:pass@host:80/db">>, - ?assertEqual(<<"http://adm:*****@host:80/db/">>, strip_url_creds(Url3)), - Url4 = <<"http://adm:pass@host/db?a=b&c=d">>, - ?assertEqual(<<"http://adm:*****@host/db?a=b&c=d">>, - strip_url_creds(Url4)) - end). - - -t_strip_http_props_creds() -> - ?_test(begin - Props1 = {[{<<"url">>, <<"http://adm:pass@host/db">>}]}, - ?assertEqual(<<"http://adm:*****@host/db/">>, strip_url_creds(Props1)), - Props2 = {[ {<<"url">>, <<"http://host/db">>}, - {<<"headers">>, {[{<<"Authorization">>, <<"Basic pa55">>}]}} - ]}, - ?assertEqual(<<"http://host/db/">>, strip_url_creds(Props2)) - end). - -endif. diff --git a/src/couch_replicator/src/couch_replicator.hrl b/src/couch_replicator/src/couch_replicator.hrl index 2a5b7c8c8..28a86d91b 100644 --- a/src/couch_replicator/src/couch_replicator.hrl +++ b/src/couch_replicator/src/couch_replicator.hrl @@ -12,32 +12,80 @@ -define(REP_ID_VERSION, 4). --record(rep, { - id :: rep_id() | '_' | 'undefined', - source :: any() | '_', - target :: any() | '_', - options :: [_] | '_', - user_ctx :: any() | '_', - type = db :: atom() | '_', - view = nil :: any() | '_', - doc_id :: any() | '_', - db_name = null :: null | binary() | '_', - start_time = {0, 0, 0} :: erlang:timestamp() | '_', - stats = couch_replicator_stats:new() :: orddict:orddict() | '_' -}). - --type rep_id() :: {string(), string()}. +% Some fields from the replication doc +-define(SOURCE, <<"source">>). +-define(TARGET, <<"target">>). +-define(CREATE_TARGET, <<"create_target">>). +-define(DOC_IDS, <<"doc_ids">>). +-define(SELECTOR, <<"selector">>). +-define(FILTER, <<"filter">>). +-define(QUERY_PARAMS, <<"query_params">>). +-define(URL, <<"url">>). +-define(AUTH, <<"auth">>). +-define(HEADERS, <<"headers">>). +-define(PROXY, <<"proxy">>). +-define(SOURCE_PROXY, <<"source_proxy">>). +-define(TARGET_PROXY, <<"target_proxy">>). + +-define(REPLICATION_STATE, <<"_replication_state">>). +-define(REPLICATION_STATS, <<"_replication_stats">>). +-define(REPLICATION_ID, <<"_replication_id">>). +-define(REPLICATION_STATE_TIME, <<"_replication_state_time">>). +-define(REPLICATION_STATE_REASON, <<"_replication_state_reason">>). + +% Replication states +-define(ST_ERROR, <<"error">>). +-define(ST_COMPLETED, <<"completed">>). +-define(ST_RUNNING, <<"running">>). +-define(ST_FAILED, <<"failed">>). +-define(ST_PENDING, <<"pending">>). +-define(ST_CRASHING, <<"crashing">>). + +% Some fields from a rep object +-define(REP_ID, <<"rep_id">>). +-define(BASE_ID, <<"base_id">>). +-define(DB_NAME, <<"db_name">>). +-define(DB_UUID, <<"db_uuid">>). +-define(DOC_ID, <<"doc_id">>). +-define(REP_USER, <<"rep_user">>). +-define(START_TIME, <<"start_time">>). +-define(OPTIONS, <<"options">>). + +% Fields for couch job data objects +-define(REP, <<"rep">>). +-define(REP_PARSE_ERROR, <<"rep_parse_error">>). +-define(REP_STATS, <<"rep_stats">>). +-define(STATE, <<"state">>). +-define(STATE_INFO, <<"state_info">>). +-define(DOC_STATE, <<"doc_state">>). +-define(ERROR_COUNT, <<"error_count">>). +-define(LAST_UPDATED, <<"last_updated">>). +-define(LAST_START, <<"last_start">>). +-define(LAST_ERROR, <<"last_error">>). +-define(JOB_HISTORY, <<"job_history">>). +-define(CHECKPOINT_HISTORY, <<"checkpoint_history">>). +-define(REP_NODE, <<"node">>). +-define(REP_PID, <<"pid">>). + +% Job history tags +-define(HIST_TYPE, <<"type">>). +-define(HIST_TIMESTAMP, <<"timestamp">>). +-define(HIST_REASON, <<"reason">>). +-define(HIST_ADDED, <<"added">>). +-define(HIST_STARTED, <<"started">>). +-define(HIST_STOPPED, <<"stopped">>). +-define(HIST_PENDING, <<"pending">>). +-define(HIST_CRASHED, <<"crashed">>). + +-define(REP_DB_NAME, <<"_replicator">>). + +% Can be used as a guard +-define(IS_REP_DB(X), (X =:= ?REP_DB_NAME orelse + binary_part(X, {byte_size(X), -12}) =:= <<"/_replicator">>)). + + +-type rep_id() :: binary(). +-type job_id() :: binary(). +-type user_name() :: binary() | null. -type db_doc_id() :: {binary(), binary() | '_'}. -type seconds() :: non_neg_integer(). --type rep_start_result() :: - {ok, rep_id()} | - ignore | - {temporary_error, binary()} | - {permanent_failure, binary()}. - - --record(doc_worker_result, { - id :: db_doc_id(), - wref :: reference(), - result :: rep_start_result() -}). diff --git a/src/couch_replicator/src/couch_replicator_api_wrap.erl b/src/couch_replicator/src/couch_replicator_api_wrap.erl index a21de4242..1df8ee0c7 100644 --- a/src/couch_replicator/src/couch_replicator_api_wrap.erl +++ b/src/couch_replicator/src/couch_replicator_api_wrap.erl @@ -28,7 +28,6 @@ db_close/1, get_db_info/1, get_pending_count/2, - get_view_info/3, update_doc/3, update_doc/4, update_docs/3, @@ -39,39 +38,28 @@ open_doc_revs/6, changes_since/5, db_uri/1, - normalize_db/1 + db_from_json/1 ]). --import(couch_replicator_httpc, [ - send_req/3 - ]). - --import(couch_util, [ - encode_doc_id/1, - get_value/2, - get_value/3 - ]). -define(MAX_WAIT, 5 * 60 * 1000). -define(MAX_URL_LEN, 7000). -define(MIN_URL_LEN, 200). -db_uri(#httpdb{url = Url}) -> +db_uri(#{<<"url">> := Url}) -> couch_util:url_strip_password(Url); -db_uri(DbName) when is_binary(DbName) -> - ?b2l(DbName); +db_uri(#httpdb{url = Url}) -> + couch_util:url_strip_password(Url). -db_uri(Db) -> - db_uri(couch_db:name(Db)). +db_open(#{} = Db) -> + db_open(Db, false, #{}). -db_open(Db) -> - db_open(Db, false, []). -db_open(#httpdb{} = Db1, Create, CreateParams) -> - {ok, Db} = couch_replicator_httpc:setup(Db1), +db_open(#{} = Db0, Create, #{} = CreateParams) when is_boolean(Create) -> + {ok, Db} = couch_replicator_httpc:setup(db_from_json(Db0)), try case Create of false -> @@ -149,14 +137,6 @@ get_pending_count(#httpdb{} = Db, Seq) -> {ok, couch_util:get_value(<<"pending">>, Props, null)} end). -get_view_info(#httpdb{} = Db, DDocId, ViewName) -> - Path = io_lib:format("~s/_view/~s/_info", [DDocId, ViewName]), - send_req(Db, [{path, Path}], - fun(200, _, {Props}) -> - {VInfo} = couch_util:get_value(<<"view_index">>, Props, {[]}), - {ok, VInfo} - end). - ensure_full_commit(#httpdb{} = Db) -> send_req( @@ -434,9 +414,9 @@ changes_since(#httpdb{headers = Headers1, timeout = InactiveTimeout} = HttpDb, {undefined, undefined} -> QArgs1 = maybe_add_changes_filter_q_args(BaseQArgs, Options), {QArgs1, get, [], Headers1}; - {undefined, _} when is_tuple(Selector) -> + {undefined, #{}} -> Headers2 = [{"Content-Type", "application/json"} | Headers1], - JsonSelector = ?JSON_ENCODE({[{<<"selector">>, Selector}]}), + JsonSelector = ?JSON_ENCODE(#{<<"selector">> => Selector}), {[{"filter", "_selector"} | BaseQArgs], post, JsonSelector, Headers2}; {_, undefined} when is_list(DocIds) -> Headers2 = [{"Content-Type", "application/json"} | Headers1], @@ -496,7 +476,8 @@ maybe_add_changes_filter_q_args(BaseQS, Options) -> ViewFields0 = [atom_to_list(F) || F <- record_info(fields, mrargs)], ViewFields = ["key" | ViewFields0], - {Params} = get_value(query_params, Options, {[]}), + ParamsMap = #{} = get_value(query_params, Options, #{}), + Params = maps:to_list(ParamsMap), [{"filter", ?b2l(FilterName)} | lists:foldl( fun({K, V}, QSAcc) -> Ks = couch_util:to_list(K), @@ -546,7 +527,7 @@ options_to_query_args(HttpDb, Path, Options0) -> length("GET " ++ FullUrl ++ " HTTP/1.1\r\n") + length("&atts_since=") + 6, % +6 = % encoded [ and ] PAs, MaxLen, []), - [{"atts_since", ?JSON_ENCODE(RevList)} | QueryArgs1] + [{"atts_since", ?b2l(iolist_to_binary(?JSON_ENCODE(RevList)))} | QueryArgs1] end. @@ -787,7 +768,7 @@ json_to_doc_info({Props}) -> RevsInfo0 = lists:map( fun({Change}) -> Rev = couch_doc:parse_rev(get_value(<<"rev">>, Change)), - Del = couch_replicator_utils:is_deleted(Change), + Del = get_value(<<"deleted">>, Change, false), #rev_info{rev=Rev, deleted=Del} end, Changes), @@ -895,52 +876,95 @@ header_value(Key, Headers, Default) -> end. -% Normalize an #httpdb{} or #db{} record such that it can be used for -% comparisons. This means remove things like pids and also sort options / props. -normalize_db(#httpdb{} = HttpDb) -> +maybe_append_create_query_params(Db, Params) when map_size(Params) == 0 -> + Db; + +maybe_append_create_query_params(Db, #{} = Params) -> + ParamList = maps:to_list(Params), + NewUrl = Db#httpdb.url ++ "?" ++ mochiweb_util:urlencode(ParamList), + Db#httpdb{url = NewUrl}. + + +db_from_json(#{} = DbMap) -> + #{ + <<"url">> := Url, + <<"auth_props">> := Auth, + <<"headers">> := Headers0, + <<"ibrowse_options">> := IBrowseOptions0, + <<"timeout">> := Timeout, + <<"http_connections">> := HttpConnections, + <<"retries">> := Retries, + <<"proxy_url">> := ProxyUrl0 + } = DbMap, + Headers = maps:fold(fun(K, V, Acc) -> + [{binary_to_list(K), binary_to_list(V)} | Acc] + end, [], Headers0), + IBrowseOptions = maps:fold(fun + (<<"socket_options">>, #{} = SockOpts, Acc) -> + SockOptsKVs = maps:fold(fun sock_opts_fold/3, [], SockOpts), + [{socket_options, SockOptsKVs} | Acc]; + (<<"ssl_options">>, #{} = SslOpts, Acc) -> + SslOptsKVs = maps:fold(fun ssl_opts_fold/3, [], SslOpts), + [{ssl_options, SslOptsKVs} | Acc]; + (K, V, Acc) when is_binary(V) -> + [{binary_to_atom(K, utf8), binary_to_list(V)} | Acc]; + (K, V, Acc) -> + [{binary_to_atom(K, utf8), V} | Acc] + end, [], IBrowseOptions0), + ProxyUrl = case ProxyUrl0 of + null -> undefined; + V when is_binary(V) -> binary_to_list(V) + end, #httpdb{ - url = HttpDb#httpdb.url, - auth_props = lists:sort(HttpDb#httpdb.auth_props), - headers = lists:keysort(1, HttpDb#httpdb.headers), - timeout = HttpDb#httpdb.timeout, - ibrowse_options = lists:keysort(1, HttpDb#httpdb.ibrowse_options), - retries = HttpDb#httpdb.retries, - http_connections = HttpDb#httpdb.http_connections - }; + url = binary_to_list(Url), + auth_props = Auth, + headers = Headers, + ibrowse_options = IBrowseOptions, + timeout = Timeout, + http_connections = HttpConnections, + retries = Retries, + proxy_url = ProxyUrl + }. -normalize_db(<<DbName/binary>>) -> - DbName. +send_req(#httpdb{} = HttpDb, Opts, Callback) when is_function(Callback) -> + couch_replicator_httpc:send_req(HttpDb, Opts, Callback). -maybe_append_create_query_params(Db, []) -> - Db; -maybe_append_create_query_params(Db, CreateParams) -> - NewUrl = Db#httpdb.url ++ "?" ++ mochiweb_util:urlencode(CreateParams), - Db#httpdb{url = NewUrl}. +get_value(K, Props) -> + couch_util:get_value(K, Props). + + +get_value(K, Props, Default) -> + couch_util:get_value(K, Props, Default). --ifdef(TEST). +encode_doc_id(DocId) -> + couch_util:encode_doc_id(DocId). --include_lib("eunit/include/eunit.hrl"). -normalize_http_db_test() -> - HttpDb = #httpdb{ - url = "http://host/db", - auth_props = [{"key", "val"}], - headers = [{"k2","v2"}, {"k1","v1"}], - timeout = 30000, - ibrowse_options = [{k2, v2}, {k1, v1}], - retries = 10, - http_connections = 20 - }, - Expected = HttpDb#httpdb{ - headers = [{"k1","v1"}, {"k2","v2"}], - ibrowse_options = [{k1, v1}, {k2, v2}] - }, - ?assertEqual(Expected, normalize_db(HttpDb)), - ?assertEqual(<<"local">>, normalize_db(<<"local">>)). +% See couch_replicator_docs:ssl_params/1 for ssl parsed options +% and http://erlang.org/doc/man/ssl.html#type-server_option +% all latest SSL server options +% +ssl_opts_fold(K, V, Acc) when is_boolean(V); is_integer(V) -> + [{binary_to_atom(K, utf8), V} | Acc]; + +ssl_opts_fold(K, null, Acc) -> + [{binary_to_atom(K, utf8), undefined} | Acc]; + +ssl_opts_fold(<<"verify">>, V, Acc) -> + [{verify, binary_to_atom(V, utf8)} | Acc]; +ssl_opts_fold(K, V, Acc) when is_list(V) -> + [{binary_to_atom(K, utf8), binary_to_list(V)} | Acc]. + + +% See ?VALID_SOCK_OPTS in couch_replicator_docs for accepted socket options +% +sock_opts_fold(K, V, Acc) when is_binary(V) -> + [{binary_to_atom(K, utf8), binary_to_atom(V, utf8)} | Acc]; --endif. +sock_opts_fold(K, V, Acc) when is_boolean(V); is_integer(V) -> + [{binary_to_atom(K, utf8), V} | Acc]. diff --git a/src/couch_replicator/src/couch_replicator_auth_session.erl b/src/couch_replicator/src/couch_replicator_auth_session.erl index 30f499a33..a59c770b4 100644 --- a/src/couch_replicator/src/couch_replicator_auth_session.erl +++ b/src/couch_replicator/src/couch_replicator_auth_session.erl @@ -187,7 +187,7 @@ format_status(_Opt, [_PDict, State]) -> [ {epoch, State#state.epoch}, {user, State#state.user}, - {session_url, State#state.session_url}, + {session_url, couch_util:url_strip_password(State#state.session_url)}, {refresh_tstamp, State#state.refresh_tstamp} ]. diff --git a/src/couch_replicator/src/couch_replicator_changes_reader.erl b/src/couch_replicator/src/couch_replicator_changes_reader.erl index 2e4df5365..6adf1af5e 100644 --- a/src/couch_replicator/src/couch_replicator_changes_reader.erl +++ b/src/couch_replicator/src/couch_replicator_changes_reader.erl @@ -22,11 +22,8 @@ -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). -include("couch_replicator.hrl"). --import(couch_util, [ - get_value/2 -]). -start_link(StartSeq, #httpdb{} = Db, ChangesQueue, Options) -> +start_link(StartSeq, #httpdb{} = Db, ChangesQueue, #{} = Options) -> Parent = self(), {ok, spawn_link(fun() -> put(last_seq, StartSeq), @@ -41,12 +38,12 @@ start_link(StartSeq, Db, ChangesQueue, Options) -> end)}. read_changes(Parent, StartSeq, Db, ChangesQueue, Options) -> - Continuous = couch_util:get_value(continuous, Options), + Continuous = maps:get(<<"continuous">>, Options, false), try couch_replicator_api_wrap:changes_since(Db, all_docs, StartSeq, fun(Item) -> process_change(Item, {Parent, Db, ChangesQueue, Continuous}) - end, Options), + end, couch_replicator_utils:proplist_options(Options)), couch_work_queue:close(ChangesQueue) catch throw:recurse -> diff --git a/src/couch_replicator/src/couch_replicator_clustering.erl b/src/couch_replicator/src/couch_replicator_clustering.erl deleted file mode 100644 index 18de1e825..000000000 --- a/src/couch_replicator/src/couch_replicator_clustering.erl +++ /dev/null @@ -1,279 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - - -% Maintain cluster membership and stability notifications for replications. -% On changes to cluster membership, broadcast events to `replication` gen_event. -% Listeners will get `{cluster, stable}` or `{cluster, unstable}` events. -% -% Cluster stability is defined as "there have been no nodes added or removed in -% last `QuietPeriod` seconds". QuietPeriod value is configurable. To ensure a -% speedier startup, during initialization there is a shorter StartupPeriod -% in effect (also configurable). -% -% This module is also in charge of calculating ownership of replications based -% on where their _replicator db documents shards live. - - --module(couch_replicator_clustering). - --behaviour(gen_server). --behaviour(config_listener). --behaviour(mem3_cluster). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3 -]). - --export([ - owner/2, - is_stable/0, - link_cluster_event_listener/3 -]). - -% config_listener callbacks --export([ - handle_config_change/5, - handle_config_terminate/3 -]). - -% mem3_cluster callbacks --export([ - cluster_stable/1, - cluster_unstable/1 -]). - --include_lib("couch/include/couch_db.hrl"). --include_lib("mem3/include/mem3.hrl"). - --define(DEFAULT_QUIET_PERIOD, 60). % seconds --define(DEFAULT_START_PERIOD, 5). % seconds --define(RELISTEN_DELAY, 5000). - --record(state, { - mem3_cluster_pid :: pid(), - cluster_stable :: boolean() -}). - - --spec start_link() -> {ok, pid()} | ignore | {error, term()}. -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - - -% owner/2 function computes ownership for a {DbName, DocId} tuple -% `unstable` if cluster is considered to be unstable i.e. it has changed -% recently, or returns node() which of the owner. -% --spec owner(Dbname :: binary(), DocId :: binary()) -> node() | unstable. -owner(<<"shards/", _/binary>> = DbName, DocId) -> - case is_stable() of - false -> - unstable; - true -> - owner_int(DbName, DocId) - end; -owner(_DbName, _DocId) -> - node(). - - --spec is_stable() -> true | false. -is_stable() -> - gen_server:call(?MODULE, is_stable). - - --spec link_cluster_event_listener(atom(), atom(), list()) -> pid(). -link_cluster_event_listener(Mod, Fun, Args) - when is_atom(Mod), is_atom(Fun), is_list(Args) -> - CallbackFun = - fun(Event = {cluster, _}) -> erlang:apply(Mod, Fun, Args ++ [Event]); - (_) -> ok - end, - {ok, Pid} = couch_replicator_notifier:start_link(CallbackFun), - Pid. - - -% Mem3 cluster callbacks - -cluster_unstable(Server) -> - ok = gen_server:call(Server, set_unstable), - couch_replicator_notifier:notify({cluster, unstable}), - couch_stats:update_gauge([couch_replicator, cluster_is_stable], 0), - couch_log:notice("~s : cluster unstable", [?MODULE]), - Server. - -cluster_stable(Server) -> - ok = gen_server:call(Server, set_stable), - couch_replicator_notifier:notify({cluster, stable}), - couch_stats:update_gauge([couch_replicator, cluster_is_stable], 1), - couch_log:notice("~s : cluster stable", [?MODULE]), - Server. - - -% gen_server callbacks - -init([]) -> - ok = config:listen_for_changes(?MODULE, nil), - Period = abs(config:get_integer("replicator", "cluster_quiet_period", - ?DEFAULT_QUIET_PERIOD)), - StartPeriod = abs(config:get_integer("replicator", "cluster_start_period", - ?DEFAULT_START_PERIOD)), - couch_stats:update_gauge([couch_replicator, cluster_is_stable], 0), - {ok, Mem3Cluster} = mem3_cluster:start_link(?MODULE, self(), StartPeriod, - Period), - {ok, #state{mem3_cluster_pid = Mem3Cluster, cluster_stable = false}}. - - -terminate(_Reason, _State) -> - ok. - - -handle_call(is_stable, _From, #state{cluster_stable = IsStable} = State) -> - {reply, IsStable, State}; - -handle_call(set_stable, _From, State) -> - {reply, ok, State#state{cluster_stable = true}}; - -handle_call(set_unstable, _From, State) -> - {reply, ok, State#state{cluster_stable = false}}. - - -handle_cast({set_period, Period}, #state{mem3_cluster_pid = Pid} = State) -> - ok = mem3_cluster:set_period(Pid, Period), - {noreply, State}. - - -handle_info(restart_config_listener, State) -> - ok = config:listen_for_changes(?MODULE, nil), - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -%% Internal functions - - -handle_config_change("replicator", "cluster_quiet_period", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_period, list_to_integer(V)}), - {ok, S}; -handle_config_change(_, _, _, _, S) -> - {ok, S}. - - -handle_config_terminate(_, stop, _) -> ok; -handle_config_terminate(_S, _R, _St) -> - Pid = whereis(?MODULE), - erlang:send_after(?RELISTEN_DELAY, Pid, restart_config_listener). - - --spec owner_int(binary(), binary()) -> node(). -owner_int(ShardName, DocId) -> - DbName = mem3:dbname(ShardName), - Live = [node() | nodes()], - Shards = mem3:shards(DbName, DocId), - Nodes = [N || #shard{node=N} <- Shards, lists:member(N, Live)], - mem3:owner(DbName, DocId, Nodes). - - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - - -replicator_clustering_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_stable_callback(), - t_unstable_callback() - ] - } - }. - - -t_stable_callback() -> - ?_test(begin - ?assertEqual(false, is_stable()), - cluster_stable(whereis(?MODULE)), - ?assertEqual(true, is_stable()) - end). - - -t_unstable_callback() -> - ?_test(begin - cluster_stable(whereis(?MODULE)), - ?assertEqual(true, is_stable()), - cluster_unstable(whereis(?MODULE)), - ?assertEqual(false, is_stable()) - end). - - -setup_all() -> - meck:expect(couch_log, notice, 2, ok), - meck:expect(config, get, fun(_, _, Default) -> Default end), - meck:expect(config, listen_for_changes, 2, ok), - meck:expect(couch_stats, update_gauge, 2, ok), - meck:expect(couch_replicator_notifier, notify, 1, ok). - - -teardown_all(_) -> - meck:unload(). - - -setup() -> - meck:reset([ - config, - couch_log, - couch_stats, - couch_replicator_notifier - ]), - stop_clustering_process(), - {ok, Pid} = start_link(), - Pid. - - -teardown(Pid) -> - stop_clustering_process(Pid). - - -stop_clustering_process() -> - stop_clustering_process(whereis(?MODULE)). - - -stop_clustering_process(undefined) -> - ok; - -stop_clustering_process(Pid) when is_pid(Pid) -> - Ref = erlang:monitor(process, Pid), - unlink(Pid), - exit(Pid, kill), - receive {'DOWN', Ref, _, _, _} -> ok end. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_db_changes.erl b/src/couch_replicator/src/couch_replicator_db_changes.erl deleted file mode 100644 index 92b0222c4..000000000 --- a/src/couch_replicator/src/couch_replicator_db_changes.erl +++ /dev/null @@ -1,108 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_db_changes). - --behaviour(gen_server). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3 -]). - --export([ - notify_cluster_event/2 -]). - --record(state, { - event_listener :: pid(), - mdb_changes :: pid() | nil -}). - - --spec notify_cluster_event(pid(), {cluster, any()}) -> ok. -notify_cluster_event(Server, {cluster, _} = Event) -> - gen_server:cast(Server, Event). - - --spec start_link() -> - {ok, pid()} | ignore | {error, any()}. -start_link() -> - gen_server:start_link(?MODULE, [], []). - - -init([]) -> - EvtPid = couch_replicator_clustering:link_cluster_event_listener(?MODULE, - notify_cluster_event, [self()]), - State = #state{event_listener = EvtPid, mdb_changes = nil}, - case couch_replicator_clustering:is_stable() of - true -> - {ok, restart_mdb_changes(State)}; - false -> - {ok, State} - end. - - -terminate(_Reason, _State) -> - ok. - - -handle_call(_Msg, _From, State) -> - {reply, {error, invalid_call}, State}. - - -handle_cast({cluster, unstable}, State) -> - {noreply, stop_mdb_changes(State)}; - -handle_cast({cluster, stable}, State) -> - {noreply, restart_mdb_changes(State)}. - - -handle_info(_Msg, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - --spec restart_mdb_changes(#state{}) -> #state{}. -restart_mdb_changes(#state{mdb_changes = nil} = State) -> - Suffix = <<"_replicator">>, - CallbackMod = couch_replicator_doc_processor, - Options = [skip_ddocs], - {ok, Pid} = couch_multidb_changes:start_link(Suffix, CallbackMod, nil, - Options), - couch_stats:increment_counter([couch_replicator, db_scans]), - couch_log:notice("Started replicator db changes listener ~p", [Pid]), - State#state{mdb_changes = Pid}; - -restart_mdb_changes(#state{mdb_changes = _Pid} = State) -> - restart_mdb_changes(stop_mdb_changes(State)). - - --spec stop_mdb_changes(#state{}) -> #state{}. -stop_mdb_changes(#state{mdb_changes = nil} = State) -> - State; -stop_mdb_changes(#state{mdb_changes = Pid} = State) -> - couch_log:notice("Stopping replicator db changes listener ~p", [Pid]), - unlink(Pid), - exit(Pid, kill), - State#state{mdb_changes = nil}. diff --git a/src/couch_replicator/src/couch_replicator_doc_processor.erl b/src/couch_replicator/src/couch_replicator_doc_processor.erl deleted file mode 100644 index 6778d537d..000000000 --- a/src/couch_replicator/src/couch_replicator_doc_processor.erl +++ /dev/null @@ -1,962 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_doc_processor). - --behaviour(gen_server). --behaviour(couch_multidb_changes). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3 -]). - --export([ - db_created/2, - db_deleted/2, - db_found/2, - db_change/3 -]). - --export([ - docs/1, - doc/2, - doc_lookup/3, - update_docs/0, - get_worker_ref/1, - notify_cluster_event/2 -]). - --include_lib("couch/include/couch_db.hrl"). --include("couch_replicator.hrl"). --include_lib("mem3/include/mem3.hrl"). - --import(couch_replicator_utils, [ - get_json_value/2, - get_json_value/3 -]). - --define(DEFAULT_UPDATE_DOCS, false). --define(ERROR_MAX_BACKOFF_EXPONENT, 12). % ~ 1 day on average --define(TS_DAY_SEC, 86400). --define(INITIAL_BACKOFF_EXPONENT, 64). --define(MIN_FILTER_DELAY_SEC, 60). - --type filter_type() :: nil | view | user | docids | mango. --type repstate() :: initializing | error | scheduled. - - --record(rdoc, { - id :: db_doc_id() | '_' | {any(), '_'}, - state :: repstate() | '_', - rep :: #rep{} | nil | '_', - rid :: rep_id() | nil | '_', - filter :: filter_type() | '_', - info :: binary() | nil | '_', - errcnt :: non_neg_integer() | '_', - worker :: reference() | nil | '_', - last_updated :: erlang:timestamp() | '_' -}). - - -% couch_multidb_changes API callbacks - -db_created(DbName, Server) -> - couch_stats:increment_counter([couch_replicator, docs, dbs_created]), - couch_replicator_docs:ensure_rep_ddoc_exists(DbName), - Server. - - -db_deleted(DbName, Server) -> - couch_stats:increment_counter([couch_replicator, docs, dbs_deleted]), - ok = gen_server:call(?MODULE, {clean_up_replications, DbName}, infinity), - Server. - - -db_found(DbName, Server) -> - couch_stats:increment_counter([couch_replicator, docs, dbs_found]), - couch_replicator_docs:ensure_rep_ddoc_exists(DbName), - Server. - - -db_change(DbName, {ChangeProps} = Change, Server) -> - couch_stats:increment_counter([couch_replicator, docs, db_changes]), - try - ok = process_change(DbName, Change) - catch - exit:{Error, {gen_server, call, [?MODULE, _, _]}} -> - ErrMsg = "~p exited ~p while processing change from db ~p", - couch_log:error(ErrMsg, [?MODULE, Error, DbName]); - _Tag:Error -> - {RepProps} = get_json_value(doc, ChangeProps), - DocId = get_json_value(<<"_id">>, RepProps), - couch_replicator_docs:update_failed(DbName, DocId, Error) - end, - Server. - - --spec get_worker_ref(db_doc_id()) -> reference() | nil. -get_worker_ref({DbName, DocId}) when is_binary(DbName), is_binary(DocId) -> - case ets:lookup(?MODULE, {DbName, DocId}) of - [#rdoc{worker = WRef}] when is_reference(WRef) -> - WRef; - [#rdoc{worker = nil}] -> - nil; - [] -> - nil - end. - - -% Cluster membership change notification callback --spec notify_cluster_event(pid(), {cluster, any()}) -> ok. -notify_cluster_event(Server, {cluster, _} = Event) -> - gen_server:cast(Server, Event). - - -process_change(DbName, {Change}) -> - {RepProps} = JsonRepDoc = get_json_value(doc, Change), - DocId = get_json_value(<<"_id">>, RepProps), - Owner = couch_replicator_clustering:owner(DbName, DocId), - Id = {DbName, DocId}, - case {Owner, get_json_value(deleted, Change, false)} of - {_, true} -> - ok = gen_server:call(?MODULE, {removed, Id}, infinity); - {unstable, false} -> - couch_log:notice("Not starting '~s' as cluster is unstable", [DocId]); - {ThisNode, false} when ThisNode =:= node() -> - case get_json_value(<<"_replication_state">>, RepProps) of - undefined -> - ok = process_updated(Id, JsonRepDoc); - <<"triggered">> -> - maybe_remove_state_fields(DbName, DocId), - ok = process_updated(Id, JsonRepDoc); - <<"completed">> -> - ok = gen_server:call(?MODULE, {completed, Id}, infinity); - <<"error">> -> - % Handle replications started from older versions of replicator - % which wrote transient errors to replication docs - maybe_remove_state_fields(DbName, DocId), - ok = process_updated(Id, JsonRepDoc); - <<"failed">> -> - ok - end; - {Owner, false} -> - ok - end, - ok. - - -maybe_remove_state_fields(DbName, DocId) -> - case update_docs() of - true -> - ok; - false -> - couch_replicator_docs:remove_state_fields(DbName, DocId) - end. - - -process_updated({DbName, _DocId} = Id, JsonRepDoc) -> - % Parsing replication doc (but not calculating the id) could throw an - % exception which would indicate this document is malformed. This exception - % should propagate to db_change function and will be recorded as permanent - % failure in the document. User will have to update the documet to fix the - % problem. - Rep0 = couch_replicator_docs:parse_rep_doc_without_id(JsonRepDoc), - Rep = Rep0#rep{db_name = DbName, start_time = os:timestamp()}, - Filter = case couch_replicator_filters:parse(Rep#rep.options) of - {ok, nil} -> - nil; - {ok, {user, _FName, _QP}} -> - user; - {ok, {view, _FName, _QP}} -> - view; - {ok, {docids, _DocIds}} -> - docids; - {ok, {mango, _Selector}} -> - mango; - {error, FilterError} -> - throw(FilterError) - end, - gen_server:call(?MODULE, {updated, Id, Rep, Filter}, infinity). - - -% Doc processor gen_server API and callbacks - -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - - -init([]) -> - ?MODULE = ets:new(?MODULE, [named_table, {keypos, #rdoc.id}, - {read_concurrency, true}, {write_concurrency, true}]), - couch_replicator_clustering:link_cluster_event_listener(?MODULE, - notify_cluster_event, [self()]), - {ok, nil}. - - -terminate(_Reason, _State) -> - ok. - - -handle_call({updated, Id, Rep, Filter}, _From, State) -> - ok = updated_doc(Id, Rep, Filter), - {reply, ok, State}; - -handle_call({removed, Id}, _From, State) -> - ok = removed_doc(Id), - {reply, ok, State}; - -handle_call({completed, Id}, _From, State) -> - true = ets:delete(?MODULE, Id), - {reply, ok, State}; - -handle_call({clean_up_replications, DbName}, _From, State) -> - ok = removed_db(DbName), - {reply, ok, State}. - -handle_cast({cluster, unstable}, State) -> - % Ignoring unstable state transition - {noreply, State}; - -handle_cast({cluster, stable}, State) -> - % Membership changed recheck all the replication document ownership - nil = ets:foldl(fun cluster_membership_foldl/2, nil, ?MODULE), - {noreply, State}; - -handle_cast(Msg, State) -> - {stop, {error, unexpected_message, Msg}, State}. - - -handle_info({'DOWN', _, _, _, #doc_worker_result{id = Id, wref = Ref, - result = Res}}, State) -> - ok = worker_returned(Ref, Id, Res), - {noreply, State}; - -handle_info(_Msg, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -% Doc processor gen_server private helper functions - -% Handle doc update -- add to ets, then start a worker to try to turn it into -% a replication job. In most cases it will succeed quickly but for filtered -% replications or if there are duplicates, it could take longer -% (theoretically indefinitely) until a replication could be started. Before -% adding replication job, make sure to delete all old jobs associated with -% same document. --spec updated_doc(db_doc_id(), #rep{}, filter_type()) -> ok. -updated_doc(Id, Rep, Filter) -> - NormCurRep = couch_replicator_utils:normalize_rep(current_rep(Id)), - NormNewRep = couch_replicator_utils:normalize_rep(Rep), - case NormCurRep == NormNewRep of - false -> - removed_doc(Id), - Row = #rdoc{ - id = Id, - state = initializing, - rep = Rep, - rid = nil, - filter = Filter, - info = nil, - errcnt = 0, - worker = nil, - last_updated = os:timestamp() - }, - true = ets:insert(?MODULE, Row), - ok = maybe_start_worker(Id); - true -> - ok - end. - - -% Return current #rep{} record if any. If replication hasn't been submitted -% to the scheduler yet, #rep{} record will be in the document processor's -% ETS table, otherwise query scheduler for the #rep{} record. --spec current_rep({binary(), binary()}) -> #rep{} | nil. -current_rep({DbName, DocId}) when is_binary(DbName), is_binary(DocId) -> - case ets:lookup(?MODULE, {DbName, DocId}) of - [] -> - nil; - [#rdoc{state = scheduled, rep = nil, rid = JobId}] -> - % When replication is scheduled, #rep{} record which can be quite - % large compared to other bits in #rdoc is removed in order to avoid - % having to keep 2 copies of it. So have to fetch it from the - % scheduler. - couch_replicator_scheduler:rep_state(JobId); - [#rdoc{rep = Rep}] -> - Rep - end. - - --spec worker_returned(reference(), db_doc_id(), rep_start_result()) -> ok. -worker_returned(Ref, Id, {ok, RepId}) -> - case ets:lookup(?MODULE, Id) of - [#rdoc{worker = Ref} = Row] -> - Row0 = Row#rdoc{ - state = scheduled, - errcnt = 0, - worker = nil, - last_updated = os:timestamp() - }, - NewRow = case Row0 of - #rdoc{rid = RepId, filter = user} -> - % Filtered replication id didn't change. - Row0; - #rdoc{rid = nil, filter = user} -> - % Calculated new replication id for a filtered replication. Make - % sure to schedule another check as filter code could change. - % Replication starts could have been failing, so also clear - % error count. - Row0#rdoc{rid = RepId}; - #rdoc{rid = OldRepId, filter = user} -> - % Replication id of existing replication job with filter has - % changed. Remove old replication job from scheduler and - % schedule check to check for future changes. - ok = couch_replicator_scheduler:remove_job(OldRepId), - Msg = io_lib:format("Replication id changed: ~p -> ~p", [ - OldRepId, RepId]), - Row0#rdoc{rid = RepId, info = couch_util:to_binary(Msg)}; - #rdoc{rid = nil} -> - % Calculated new replication id for non-filtered replication. - % Remove replication doc body, after this we won't need it - % anymore. - Row0#rdoc{rep=nil, rid=RepId, info=nil} - end, - true = ets:insert(?MODULE, NewRow), - ok = maybe_update_doc_triggered(Row#rdoc.rep, RepId), - ok = maybe_start_worker(Id); - _ -> - ok % doc could have been deleted, ignore - end, - ok; - -worker_returned(_Ref, _Id, ignore) -> - ok; - -worker_returned(Ref, Id, {temporary_error, Reason}) -> - case ets:lookup(?MODULE, Id) of - [#rdoc{worker = Ref, errcnt = ErrCnt} = Row] -> - NewRow = Row#rdoc{ - rid = nil, - state = error, - info = Reason, - errcnt = ErrCnt + 1, - worker = nil, - last_updated = os:timestamp() - }, - true = ets:insert(?MODULE, NewRow), - ok = maybe_update_doc_error(NewRow#rdoc.rep, Reason), - ok = maybe_start_worker(Id); - _ -> - ok % doc could have been deleted, ignore - end, - ok; - -worker_returned(Ref, Id, {permanent_failure, _Reason}) -> - case ets:lookup(?MODULE, Id) of - [#rdoc{worker = Ref}] -> - true = ets:delete(?MODULE, Id); - _ -> - ok % doc could have been deleted, ignore - end, - ok. - - --spec maybe_update_doc_error(#rep{}, any()) -> ok. -maybe_update_doc_error(Rep, Reason) -> - case update_docs() of - true -> - couch_replicator_docs:update_error(Rep, Reason); - false -> - ok - end. - - --spec maybe_update_doc_triggered(#rep{}, rep_id()) -> ok. -maybe_update_doc_triggered(Rep, RepId) -> - case update_docs() of - true -> - couch_replicator_docs:update_triggered(Rep, RepId); - false -> - ok - end. - - --spec error_backoff(non_neg_integer()) -> seconds(). -error_backoff(ErrCnt) -> - Exp = min(ErrCnt, ?ERROR_MAX_BACKOFF_EXPONENT), - % ErrCnt is the exponent here. The reason 64 is used is to start at - % 64 (about a minute) max range. Then first backoff would be 30 sec - % on average. Then 1 minute and so on. - couch_rand:uniform(?INITIAL_BACKOFF_EXPONENT bsl Exp). - - --spec filter_backoff() -> seconds(). -filter_backoff() -> - Total = ets:info(?MODULE, size), - % This value scaled by the number of replications. If the are a lot of them - % wait is longer, but not more than a day (?TS_DAY_SEC). If there are just - % few, wait is shorter, starting at about 30 seconds. `2 *` is used since - % the expected wait would then be 0.5 * Range so it is easier to see the - % average wait. `1 +` is used because couch_rand:uniform only - % accepts >= 1 values and crashes otherwise. - Range = 1 + min(2 * (Total / 10), ?TS_DAY_SEC), - ?MIN_FILTER_DELAY_SEC + couch_rand:uniform(round(Range)). - - -% Document removed from db -- clear ets table and remove all scheduled jobs --spec removed_doc(db_doc_id()) -> ok. -removed_doc({DbName, DocId} = Id) -> - ets:delete(?MODULE, Id), - RepIds = couch_replicator_scheduler:find_jobs_by_doc(DbName, DocId), - lists:foreach(fun couch_replicator_scheduler:remove_job/1, RepIds). - - -% Whole db shard is gone -- remove all its ets rows and stop jobs --spec removed_db(binary()) -> ok. -removed_db(DbName) -> - EtsPat = #rdoc{id = {DbName, '_'}, _ = '_'}, - ets:match_delete(?MODULE, EtsPat), - RepIds = couch_replicator_scheduler:find_jobs_by_dbname(DbName), - lists:foreach(fun couch_replicator_scheduler:remove_job/1, RepIds). - - -% Spawn a worker process which will attempt to calculate a replication id, then -% start a replication. Returns a process monitor reference. The worker is -% guaranteed to exit with rep_start_result() type only. --spec maybe_start_worker(db_doc_id()) -> ok. -maybe_start_worker(Id) -> - case ets:lookup(?MODULE, Id) of - [] -> - ok; - [#rdoc{state = scheduled, filter = Filter}] when Filter =/= user -> - ok; - [#rdoc{rep = Rep} = Doc] -> - % For any replication with a user created filter function, periodically - % (every `filter_backoff/0` seconds) to try to see if the user filter - % has changed by using a worker to check for changes. When the worker - % returns check if replication ID has changed. If it hasn't keep - % checking (spawn another worker and so on). If it has stop the job - % with the old ID and continue checking. - Wait = get_worker_wait(Doc), - Ref = make_ref(), - true = ets:insert(?MODULE, Doc#rdoc{worker = Ref}), - couch_replicator_doc_processor_worker:spawn_worker(Id, Rep, Wait, Ref), - ok - end. - - --spec get_worker_wait(#rdoc{}) -> seconds(). -get_worker_wait(#rdoc{state = scheduled, filter = user}) -> - filter_backoff(); -get_worker_wait(#rdoc{state = error, errcnt = ErrCnt}) -> - error_backoff(ErrCnt); -get_worker_wait(#rdoc{state = initializing}) -> - 0. - - --spec update_docs() -> boolean(). -update_docs() -> - config:get_boolean("replicator", "update_docs", ?DEFAULT_UPDATE_DOCS). - - -% _scheduler/docs HTTP endpoint helpers - --spec docs([atom()]) -> [{[_]}] | []. -docs(States) -> - HealthThreshold = couch_replicator_scheduler:health_threshold(), - ets:foldl(fun(RDoc, Acc) -> - case ejson_doc(RDoc, HealthThreshold) of - nil -> - Acc; % Could have been deleted if job just completed - {Props} = EJson -> - {state, DocState} = lists:keyfind(state, 1, Props), - case ejson_doc_state_filter(DocState, States) of - true -> - [EJson | Acc]; - false -> - Acc - end - end - end, [], ?MODULE). - - --spec doc(binary(), binary()) -> {ok, {[_]}} | {error, not_found}. -doc(Db, DocId) -> - HealthThreshold = couch_replicator_scheduler:health_threshold(), - Res = (catch ets:foldl(fun(RDoc, nil) -> - {Shard, RDocId} = RDoc#rdoc.id, - case {mem3:dbname(Shard), RDocId} of - {Db, DocId} -> - throw({found, ejson_doc(RDoc, HealthThreshold)}); - {_OtherDb, _OtherDocId} -> - nil - end - end, nil, ?MODULE)), - case Res of - {found, DocInfo} -> - {ok, DocInfo}; - nil -> - {error, not_found} - end. - - --spec doc_lookup(binary(), binary(), integer()) -> - {ok, {[_]}} | {error, not_found}. -doc_lookup(Db, DocId, HealthThreshold) -> - case ets:lookup(?MODULE, {Db, DocId}) of - [#rdoc{} = RDoc] -> - {ok, ejson_doc(RDoc, HealthThreshold)}; - [] -> - {error, not_found} - end. - - --spec ejson_rep_id(rep_id() | nil) -> binary() | null. -ejson_rep_id(nil) -> - null; -ejson_rep_id({BaseId, Ext}) -> - iolist_to_binary([BaseId, Ext]). - - --spec ejson_doc(#rdoc{}, non_neg_integer()) -> {[_]} | nil. -ejson_doc(#rdoc{state = scheduled} = RDoc, HealthThreshold) -> - #rdoc{id = {DbName, DocId}, rid = RepId} = RDoc, - JobProps = couch_replicator_scheduler:job_summary(RepId, HealthThreshold), - case JobProps of - nil -> - nil; - [{_, _} | _] -> - {[ - {doc_id, DocId}, - {database, DbName}, - {id, ejson_rep_id(RepId)}, - {node, node()} | JobProps - ]} - end; - -ejson_doc(#rdoc{state = RepState} = RDoc, _HealthThreshold) -> - #rdoc{ - id = {DbName, DocId}, - info = StateInfo, - rid = RepId, - errcnt = ErrorCount, - last_updated = StateTime, - rep = Rep - } = RDoc, - {[ - {doc_id, DocId}, - {database, DbName}, - {id, ejson_rep_id(RepId)}, - {state, RepState}, - {info, couch_replicator_utils:ejson_state_info(StateInfo)}, - {error_count, ErrorCount}, - {node, node()}, - {last_updated, couch_replicator_utils:iso8601(StateTime)}, - {start_time, couch_replicator_utils:iso8601(Rep#rep.start_time)} - ]}. - - --spec ejson_doc_state_filter(atom(), [atom()]) -> boolean(). -ejson_doc_state_filter(_DocState, []) -> - true; -ejson_doc_state_filter(State, States) when is_list(States), is_atom(State) -> - lists:member(State, States). - - --spec cluster_membership_foldl(#rdoc{}, nil) -> nil. -cluster_membership_foldl(#rdoc{id = {DbName, DocId} = Id, rid = RepId}, nil) -> - case couch_replicator_clustering:owner(DbName, DocId) of - unstable -> - nil; - ThisNode when ThisNode =:= node() -> - nil; - OtherNode -> - Msg = "Replication doc ~p:~p with id ~p usurped by node ~p", - couch_log:notice(Msg, [DbName, DocId, RepId, OtherNode]), - removed_doc(Id), - nil - end. - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - --define(DB, <<"db">>). --define(EXIT_DB, <<"exit_db">>). --define(DOC1, <<"doc1">>). --define(DOC2, <<"doc2">>). --define(R1, {"1", ""}). --define(R2, {"2", ""}). - - -doc_processor_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_bad_change(), - t_regular_change(), - t_change_with_doc_processor_crash(), - t_change_with_existing_job(), - t_deleted_change(), - t_triggered_change(), - t_completed_change(), - t_active_replication_completed(), - t_error_change(), - t_failed_change(), - t_change_for_different_node(), - t_change_when_cluster_unstable(), - t_ejson_docs(), - t_cluster_membership_foldl() - ] - } - }. - - -% Can't parse replication doc, so should write failure state to document. -t_bad_change() -> - ?_test(begin - ?assertEqual(acc, db_change(?DB, bad_change(), acc)), - ?assert(updated_doc_with_failed_state()) - end). - - -% Regular change, parse to a #rep{} and then add job. -t_regular_change() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Handle cases where doc processor exits or crashes while processing a change -t_change_with_doc_processor_crash() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(acc, db_change(?EXIT_DB, change(), acc)), - ?assert(failed_state_not_updated()) - end). - - -% Regular change, parse to a #rep{} and then add job but there is already -% a running job with same Id found. -t_change_with_existing_job() -> - ?_test(begin - mock_existing_jobs_lookup([test_rep(?R2)]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Change is a deletion, and job is running, so remove job. -t_deleted_change() -> - ?_test(begin - mock_existing_jobs_lookup([test_rep(?R2)]), - ?assertEqual(ok, process_change(?DB, deleted_change())), - ?assert(removed_job(?R2)) - end). - - -% Change is in `triggered` state. Remove legacy state and add job. -t_triggered_change() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change(<<"triggered">>))), - ?assert(removed_state_fields()), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Change is in `completed` state, so skip over it. -t_completed_change() -> - ?_test(begin - ?assertEqual(ok, process_change(?DB, change(<<"completed">>))), - ?assert(did_not_remove_state_fields()), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(did_not_spawn_worker()) - end). - - -% Completed change comes for what used to be an active job. In this case -% remove entry from doc_processor's ets (because there is no linkage or -% callback mechanism for scheduler to tell doc_processsor a replication just -% completed). -t_active_replication_completed() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assertEqual(ok, process_change(?DB, change(<<"completed">>))), - ?assert(did_not_remove_state_fields()), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})) - end). - - -% Change is in `error` state. Remove legacy state and retry -% running the job. This state was used for transient erorrs which are not -% written to the document anymore. -t_error_change() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change(<<"error">>))), - ?assert(removed_state_fields()), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(started_worker({?DB, ?DOC1})) - end). - - -% Change is in `failed` state. This is a terminal state and it will not -% be tried again, so skip over it. -t_failed_change() -> - ?_test(begin - ?assertEqual(ok, process_change(?DB, change(<<"failed">>))), - ?assert(did_not_remove_state_fields()), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(did_not_spawn_worker()) - end). - - -% Normal change, but according to cluster ownership algorithm, replication -% belongs to a different node, so this node should skip it. -t_change_for_different_node() -> - ?_test(begin - meck:expect(couch_replicator_clustering, owner, 2, different_node), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(did_not_spawn_worker()) - end). - - -% Change handled when cluster is unstable (nodes are added or removed), so -% job is not added. A rescan will be triggered soon and change will be -% evaluated again. -t_change_when_cluster_unstable() -> - ?_test(begin - meck:expect(couch_replicator_clustering, owner, 2, unstable), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(did_not_spawn_worker()) - end). - - -% Check if docs/0 function produces expected ejson after adding a job -t_ejson_docs() -> - ?_test(begin - mock_existing_jobs_lookup([]), - ?assertEqual(ok, process_change(?DB, change())), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - EJsonDocs = docs([]), - ?assertMatch([{[_|_]}], EJsonDocs), - [{DocProps}] = EJsonDocs, - {value, StateTime, DocProps1} = lists:keytake(last_updated, 1, - DocProps), - ?assertMatch({last_updated, BinVal1} when is_binary(BinVal1), - StateTime), - {value, StartTime, DocProps2} = lists:keytake(start_time, 1, DocProps1), - ?assertMatch({start_time, BinVal2} when is_binary(BinVal2), StartTime), - ExpectedProps = [ - {database, ?DB}, - {doc_id, ?DOC1}, - {error_count, 0}, - {id, null}, - {info, null}, - {node, node()}, - {state, initializing} - ], - ?assertEqual(ExpectedProps, lists:usort(DocProps2)) - end). - - -% Check that when cluster membership changes records from doc processor and job -% scheduler get removed -t_cluster_membership_foldl() -> - ?_test(begin - mock_existing_jobs_lookup([test_rep(?R1)]), - ?assertEqual(ok, process_change(?DB, change())), - meck:expect(couch_replicator_clustering, owner, 2, different_node), - ?assert(ets:member(?MODULE, {?DB, ?DOC1})), - gen_server:cast(?MODULE, {cluster, stable}), - meck:wait(2, couch_replicator_scheduler, find_jobs_by_doc, 2, 5000), - ?assertNot(ets:member(?MODULE, {?DB, ?DOC1})), - ?assert(removed_job(?R1)) - end). - - -get_worker_ref_test_() -> - { - setup, - fun() -> - ets:new(?MODULE, [named_table, public, {keypos, #rdoc.id}]) - end, - fun(_) -> ets:delete(?MODULE) end, - ?_test(begin - Id = {<<"db">>, <<"doc">>}, - ?assertEqual(nil, get_worker_ref(Id)), - ets:insert(?MODULE, #rdoc{id = Id, worker = nil}), - ?assertEqual(nil, get_worker_ref(Id)), - Ref = make_ref(), - ets:insert(?MODULE, #rdoc{id = Id, worker = Ref}), - ?assertEqual(Ref, get_worker_ref(Id)) - end) - }. - - -% Test helper functions - - -setup_all() -> - meck:expect(couch_log, info, 2, ok), - meck:expect(couch_log, notice, 2, ok), - meck:expect(couch_log, warning, 2, ok), - meck:expect(couch_log, error, 2, ok), - meck:expect(config, get, fun(_, _, Default) -> Default end), - meck:expect(config, listen_for_changes, 2, ok), - meck:expect(couch_replicator_clustering, owner, 2, node()), - meck:expect(couch_replicator_clustering, link_cluster_event_listener, 3, - ok), - meck:expect(couch_replicator_doc_processor_worker, spawn_worker, fun - ({?EXIT_DB, _}, _, _, _) -> exit(kapow); - (_, _, _, _) -> pid - end), - meck:expect(couch_replicator_scheduler, remove_job, 1, ok), - meck:expect(couch_replicator_docs, remove_state_fields, 2, ok), - meck:expect(couch_replicator_docs, update_failed, 3, ok). - - -teardown_all(_) -> - meck:unload(). - - -setup() -> - meck:reset([ - config, - couch_log, - couch_replicator_clustering, - couch_replicator_doc_processor_worker, - couch_replicator_docs, - couch_replicator_scheduler - ]), - % Set this expectation back to the default for - % each test since some tests change it - meck:expect(couch_replicator_clustering, owner, 2, node()), - {ok, Pid} = start_link(), - unlink(Pid), - Pid. - - -teardown(Pid) -> - exit(Pid, kill). - - -removed_state_fields() -> - meck:called(couch_replicator_docs, remove_state_fields, [?DB, ?DOC1]). - - -started_worker(_Id) -> - 1 == meck:num_calls(couch_replicator_doc_processor_worker, spawn_worker, 4). - - -removed_job(Id) -> - meck:called(couch_replicator_scheduler, remove_job, [test_rep(Id)]). - - -did_not_remove_state_fields() -> - 0 == meck:num_calls(couch_replicator_docs, remove_state_fields, '_'). - - -did_not_spawn_worker() -> - 0 == meck:num_calls(couch_replicator_doc_processor_worker, spawn_worker, - '_'). - -updated_doc_with_failed_state() -> - 1 == meck:num_calls(couch_replicator_docs, update_failed, '_'). - -failed_state_not_updated() -> - 0 == meck:num_calls(couch_replicator_docs, update_failed, '_'). - -mock_existing_jobs_lookup(ExistingJobs) -> - meck:expect(couch_replicator_scheduler, find_jobs_by_doc, fun - (?EXIT_DB, ?DOC1) -> []; - (?DB, ?DOC1) -> ExistingJobs - end). - - -test_rep(Id) -> - #rep{id = Id, start_time = {0, 0, 0}}. - - -change() -> - {[ - {<<"id">>, ?DOC1}, - {doc, {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>} - ]}} - ]}. - - -change(State) -> - {[ - {<<"id">>, ?DOC1}, - {doc, {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>}, - {<<"_replication_state">>, State} - ]}} - ]}. - - -deleted_change() -> - {[ - {<<"id">>, ?DOC1}, - {<<"deleted">>, true}, - {doc, {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>} - ]}} - ]}. - - -bad_change() -> - {[ - {<<"id">>, ?DOC2}, - {doc, {[ - {<<"_id">>, ?DOC2}, - {<<"source">>, <<"src">>} - ]}} - ]}. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_doc_processor_worker.erl b/src/couch_replicator/src/couch_replicator_doc_processor_worker.erl deleted file mode 100644 index a4c829323..000000000 --- a/src/couch_replicator/src/couch_replicator_doc_processor_worker.erl +++ /dev/null @@ -1,284 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_doc_processor_worker). - --export([ - spawn_worker/4 -]). - --include("couch_replicator.hrl"). - --import(couch_replicator_utils, [ - pp_rep_id/1 -]). - -% 61 seconds here because request usually have 10, 15, 30 second -% timeouts set. We'd want the worker to get a chance to make a few -% requests (maybe one failing one and a retry) and then fail with its -% own error (timeout, network error), which would be more specific and -% informative, before it simply gets killed because of the timeout -% here. That is, if all fails and the worker is actually blocked then -% 61 sec is a safety net to brutally kill the worker so doesn't end up -% hung forever. --define(WORKER_TIMEOUT_MSEC, 61000). - - -% Spawn a worker which attempts to calculate replication id then add a -% replication job to scheduler. This function create a monitor to the worker -% a worker will then exit with the #doc_worker_result{} record within -% ?WORKER_TIMEOUT_MSEC timeout period.A timeout is considered a -%`temporary_error`. Result will be sent as the `Reason` in the {'DOWN',...} -% message. --spec spawn_worker(db_doc_id(), #rep{}, seconds(), reference()) -> pid(). -spawn_worker(Id, Rep, WaitSec, WRef) -> - {Pid, _Ref} = spawn_monitor(fun() -> - worker_fun(Id, Rep, WaitSec, WRef) - end), - Pid. - - -% Private functions - --spec worker_fun(db_doc_id(), #rep{}, seconds(), reference()) -> no_return(). -worker_fun(Id, Rep, WaitSec, WRef) -> - timer:sleep(WaitSec * 1000), - Fun = fun() -> - try maybe_start_replication(Id, Rep, WRef) of - Res -> - exit(Res) - catch - throw:{filter_fetch_error, Reason} -> - exit({temporary_error, Reason}); - _Tag:Reason -> - exit({temporary_error, Reason}) - end - end, - {Pid, Ref} = spawn_monitor(Fun), - receive - {'DOWN', Ref, _, Pid, Result} -> - exit(#doc_worker_result{id = Id, wref = WRef, result = Result}) - after ?WORKER_TIMEOUT_MSEC -> - erlang:demonitor(Ref, [flush]), - exit(Pid, kill), - {DbName, DocId} = Id, - TimeoutSec = round(?WORKER_TIMEOUT_MSEC / 1000), - Msg = io_lib:format("Replication for db ~p doc ~p failed to start due " - "to timeout after ~B seconds", [DbName, DocId, TimeoutSec]), - Result = {temporary_error, couch_util:to_binary(Msg)}, - exit(#doc_worker_result{id = Id, wref = WRef, result = Result}) - end. - - -% Try to start a replication. Used by a worker. This function should return -% rep_start_result(), also throws {filter_fetch_error, Reason} if cannot fetch -% filter.It can also block for an indeterminate amount of time while fetching -% filter. -maybe_start_replication(Id, RepWithoutId, WRef) -> - Rep = couch_replicator_docs:update_rep_id(RepWithoutId), - case maybe_add_job_to_scheduler(Id, Rep, WRef) of - ignore -> - ignore; - {ok, RepId} -> - {ok, RepId}; - {temporary_error, Reason} -> - {temporary_error, Reason}; - {permanent_failure, Reason} -> - {DbName, DocId} = Id, - couch_replicator_docs:update_failed(DbName, DocId, Reason), - {permanent_failure, Reason} - end. - - --spec maybe_add_job_to_scheduler(db_doc_id(), #rep{}, reference()) -> - rep_start_result(). -maybe_add_job_to_scheduler({DbName, DocId}, Rep, WRef) -> - RepId = Rep#rep.id, - case couch_replicator_scheduler:rep_state(RepId) of - nil -> - % Before adding a job check that this worker is still the current - % worker. This is to handle a race condition where a worker which was - % sleeping and then checking a replication filter may inadvertently - % re-add a replication which was already deleted. - case couch_replicator_doc_processor:get_worker_ref({DbName, DocId}) of - WRef -> - ok = couch_replicator_scheduler:add_job(Rep), - {ok, RepId}; - _NilOrOtherWRef -> - ignore - end; - #rep{doc_id = DocId} -> - {ok, RepId}; - #rep{doc_id = null} -> - Msg = io_lib:format("Replication `~s` specified by document `~s`" - " already running as a transient replication, started via" - " `_replicate` API endpoint", [pp_rep_id(RepId), DocId]), - {temporary_error, couch_util:to_binary(Msg)}; - #rep{db_name = OtherDb, doc_id = OtherDocId} -> - Msg = io_lib:format("Replication `~s` specified by document `~s`" - " already started, triggered by document `~s` from db `~s`", - [pp_rep_id(RepId), DocId, OtherDocId, mem3:dbname(OtherDb)]), - {permanent_failure, couch_util:to_binary(Msg)} - end. - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - --define(DB, <<"db">>). --define(DOC1, <<"doc1">>). --define(R1, {"ad08e05057046eabe898a2572bbfb573", ""}). - - -doc_processor_worker_test_() -> - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_should_add_job(), - t_already_running_same_docid(), - t_already_running_transient(), - t_already_running_other_db_other_doc(), - t_spawn_worker(), - t_ignore_if_doc_deleted(), - t_ignore_if_worker_ref_does_not_match() - ] - }. - - -% Replication is already running, with same doc id. Ignore change. -t_should_add_job() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertEqual({ok, ?R1}, maybe_start_replication(Id, Rep, nil)), - ?assert(added_job()) - end). - - -% Replication is already running, with same doc id. Ignore change. -t_already_running_same_docid() -> - ?_test(begin - Id = {?DB, ?DOC1}, - mock_already_running(?DB, ?DOC1), - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertEqual({ok, ?R1}, maybe_start_replication(Id, Rep, nil)), - ?assert(did_not_add_job()) - end). - - -% There is a transient replication with same replication id running. Ignore. -t_already_running_transient() -> - ?_test(begin - Id = {?DB, ?DOC1}, - mock_already_running(null, null), - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertMatch({temporary_error, _}, maybe_start_replication(Id, Rep, - nil)), - ?assert(did_not_add_job()) - end). - - -% There is a duplicate replication potentially from a different db and doc. -% Write permanent failure to doc. -t_already_running_other_db_other_doc() -> - ?_test(begin - Id = {?DB, ?DOC1}, - mock_already_running(<<"otherdb">>, <<"otherdoc">>), - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - ?assertMatch({permanent_failure, _}, maybe_start_replication(Id, Rep, - nil)), - ?assert(did_not_add_job()), - 1 == meck:num_calls(couch_replicator_docs, update_failed, '_') - end). - - -% Should spawn worker -t_spawn_worker() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - WRef = make_ref(), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, WRef), - Pid = spawn_worker(Id, Rep, 0, WRef), - Res = receive {'DOWN', _Ref, process, Pid, Reason} -> Reason - after 1000 -> timeout end, - Expect = #doc_worker_result{id = Id, wref = WRef, result = {ok, ?R1}}, - ?assertEqual(Expect, Res), - ?assert(added_job()) - end). - - -% Should not add job if by the time worker got to fetching the filter -% and getting a replication id, replication doc was deleted -t_ignore_if_doc_deleted() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, nil), - ?assertEqual(ignore, maybe_start_replication(Id, Rep, make_ref())), - ?assertNot(added_job()) - end). - - -% Should not add job if by the time worker got to fetchign the filter -% and building a replication id, another worker was spawned. -t_ignore_if_worker_ref_does_not_match() -> - ?_test(begin - Id = {?DB, ?DOC1}, - Rep = couch_replicator_docs:parse_rep_doc_without_id(change()), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, - make_ref()), - ?assertEqual(ignore, maybe_start_replication(Id, Rep, make_ref())), - ?assertNot(added_job()) - end). - - -% Test helper functions - -setup() -> - meck:expect(couch_replicator_scheduler, add_job, 1, ok), - meck:expect(config, get, fun(_, _, Default) -> Default end), - meck:expect(couch_server, get_uuid, 0, this_is_snek), - meck:expect(couch_replicator_docs, update_failed, 3, ok), - meck:expect(couch_replicator_scheduler, rep_state, 1, nil), - meck:expect(couch_replicator_doc_processor, get_worker_ref, 1, nil), - ok. - - -teardown(_) -> - meck:unload(). - - -mock_already_running(DbName, DocId) -> - meck:expect(couch_replicator_scheduler, rep_state, - fun(RepId) -> #rep{id = RepId, doc_id = DocId, db_name = DbName} end). - - -added_job() -> - 1 == meck:num_calls(couch_replicator_scheduler, add_job, '_'). - - -did_not_add_job() -> - 0 == meck:num_calls(couch_replicator_scheduler, add_job, '_'). - - -change() -> - {[ - {<<"_id">>, ?DOC1}, - {<<"source">>, <<"http://srchost.local/src">>}, - {<<"target">>, <<"http://tgthost.local/tgt">>} - ]}. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_docs.erl b/src/couch_replicator/src/couch_replicator_docs.erl index 619063222..f84d1299a 100644 --- a/src/couch_replicator/src/couch_replicator_docs.erl +++ b/src/couch_replicator/src/couch_replicator_docs.erl @@ -13,306 +13,142 @@ -module(couch_replicator_docs). -export([ - parse_rep_doc/1, - parse_rep_doc/2, - parse_rep_db/3, - parse_rep_doc_without_id/1, - parse_rep_doc_without_id/2, + remove_state_fields/3, + update_completed/4, + update_failed/4, before_doc_update/3, - after_doc_read/2, - ensure_rep_ddoc_exists/1, - ensure_cluster_rep_ddoc_exists/1, - remove_state_fields/2, - update_doc_completed/3, - update_failed/3, - update_rep_id/1, - update_triggered/2, - update_error/2 + after_doc_read/2 ]). -include_lib("couch/include/couch_db.hrl"). --include_lib("ibrowse/include/ibrowse.hrl"). --include_lib("mem3/include/mem3.hrl"). -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). -include("couch_replicator.hrl"). --include("couch_replicator_js_functions.hrl"). - --import(couch_util, [ - get_value/2, - get_value/3, - to_binary/1 -]). - --import(couch_replicator_utils, [ - get_json_value/2, - get_json_value/3 -]). --define(REP_DB_NAME, <<"_replicator">>). --define(REP_DESIGN_DOC, <<"_design/_replicator">>). -define(OWNER, <<"owner">>). -define(CTX, {user_ctx, #user_ctx{roles=[<<"_admin">>, <<"_replicator">>]}}). -define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})). -remove_state_fields(DbName, DocId) -> - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, undefined}, - {<<"_replication_state_time">>, undefined}, - {<<"_replication_state_reason">>, undefined}, - {<<"_replication_id">>, undefined}, - {<<"_replication_stats">>, undefined}]). +remove_state_fields(null, null, null) -> + ok; +remove_state_fields(DbName, DbUUID, DocId) -> + update_rep_doc(DbName, DbUUID, DocId, [ + {?REPLICATION_STATE, undefined}, + {?REPLICATION_STATE_TIME, undefined}, + {?REPLICATION_STATE_REASON, undefined}, + {?REPLICATION_ID, undefined}, + {?REPLICATION_STATS, undefined} + ]), + ok. --spec update_doc_completed(binary(), binary(), [_]) -> any(). -update_doc_completed(DbName, DocId, Stats) -> - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"completed">>}, - {<<"_replication_state_reason">>, undefined}, - {<<"_replication_stats">>, {Stats}}]), - couch_stats:increment_counter([couch_replicator, docs, - completed_state_updates]). +-spec update_completed(binary(), binary(), binary(), [_]) -> ok. +update_completed(null, null, _, _) -> + ok; --spec update_failed(binary(), binary(), any()) -> any(). -update_failed(DbName, DocId, Error) -> - Reason = error_reason(Error), - couch_log:error("Error processing replication doc `~s` from `~s`: ~s", - [DocId, DbName, Reason]), - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"failed">>}, - {<<"_replication_stats">>, undefined}, - {<<"_replication_state_reason">>, Reason}]), +update_completed(DbName, DbUUID, DocId, #{} = Stats0) -> + Stats = {maps:to_list(Stats0)}, + update_rep_doc(DbName, DbUUID, DocId, [ + {?REPLICATION_STATE, ?ST_COMPLETED}, + {?REPLICATION_STATE_REASON, undefined}, + {?REPLICATION_STATS, Stats}]), couch_stats:increment_counter([couch_replicator, docs, - failed_state_updates]). - - --spec update_triggered(#rep{}, rep_id()) -> ok. -update_triggered(Rep, {Base, Ext}) -> - #rep{ - db_name = DbName, - doc_id = DocId - } = Rep, - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"triggered">>}, - {<<"_replication_state_reason">>, undefined}, - {<<"_replication_id">>, iolist_to_binary([Base, Ext])}, - {<<"_replication_stats">>, undefined}]), + completed_state_updates + ]), ok. --spec update_error(#rep{}, any()) -> ok. -update_error(#rep{db_name = DbName, doc_id = DocId, id = RepId}, Error) -> - Reason = error_reason(Error), - BinRepId = case RepId of - {Base, Ext} -> - iolist_to_binary([Base, Ext]); - _Other -> - null - end, - update_rep_doc(DbName, DocId, [ - {<<"_replication_state">>, <<"error">>}, - {<<"_replication_state_reason">>, Reason}, - {<<"_replication_stats">>, undefined}, - {<<"_replication_id">>, BinRepId}]), - ok. - +-spec update_failed(binary(), binary(), binary(), any()) -> ok. +update_failed(null, null, null, _) -> + ok; --spec ensure_rep_ddoc_exists(binary()) -> ok. -ensure_rep_ddoc_exists(RepDb) -> - case mem3:belongs(RepDb, ?REP_DESIGN_DOC) of - true -> - ensure_rep_ddoc_exists(RepDb, ?REP_DESIGN_DOC); - false -> - ok - end. - - --spec ensure_rep_ddoc_exists(binary(), binary()) -> ok. -ensure_rep_ddoc_exists(RepDb, DDocId) -> - case open_rep_doc(RepDb, DDocId) of - {not_found, no_db_file} -> - %% database was deleted. - ok; - {not_found, _Reason} -> - DocProps = replication_design_doc_props(DDocId), - DDoc = couch_doc:from_json_obj({DocProps}), - couch_log:notice("creating replicator ddoc ~p", [RepDb]), - {ok, _Rev} = save_rep_doc(RepDb, DDoc); - {ok, Doc} -> - Latest = replication_design_doc_props(DDocId), - {Props0} = couch_doc:to_json_obj(Doc, []), - {value, {_, Rev}, Props} = lists:keytake(<<"_rev">>, 1, Props0), - case compare_ejson({Props}, {Latest}) of - true -> - ok; - false -> - LatestWithRev = [{<<"_rev">>, Rev} | Latest], - DDoc = couch_doc:from_json_obj({LatestWithRev}), - couch_log:notice("updating replicator ddoc ~p", [RepDb]), - try - {ok, _} = save_rep_doc(RepDb, DDoc) - catch - throw:conflict -> - %% ignore, we'll retry next time - ok - end - end - end, +update_failed(DbName, DbUUID, DocId, Error) -> + Reason = error_reason(Error), + couch_log:error("Error processing replication doc `~s` from `~s`: ~s", + [DocId, DbName, Reason]), + update_rep_doc(DbName, DbUUID, DocId, [ + {?REPLICATION_STATE, ?ST_FAILED}, + {?REPLICATION_STATS, undefined}, + {?REPLICATION_STATE_REASON, Reason} + ]), + couch_stats:increment_counter([couch_replicator, docs, + failed_state_updates]), ok. --spec ensure_cluster_rep_ddoc_exists(binary()) -> ok. -ensure_cluster_rep_ddoc_exists(RepDb) -> - DDocId = ?REP_DESIGN_DOC, - [#shard{name = DbShard} | _] = mem3:shards(RepDb, DDocId), - ensure_rep_ddoc_exists(DbShard, DDocId). - - --spec compare_ejson({[_]}, {[_]}) -> boolean(). -compare_ejson(EJson1, EJson2) -> - EjsonSorted1 = couch_replicator_filters:ejsort(EJson1), - EjsonSorted2 = couch_replicator_filters:ejsort(EJson2), - EjsonSorted1 == EjsonSorted2. - - --spec replication_design_doc_props(binary()) -> [_]. -replication_design_doc_props(DDocId) -> - [ - {<<"_id">>, DDocId}, - {<<"language">>, <<"javascript">>}, - {<<"validate_doc_update">>, ?REP_DB_DOC_VALIDATE_FUN} - ]. - +-spec before_doc_update(#doc{}, Db::any(), couch_db:update_type()) -> #doc{}. +before_doc_update(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _, _) -> + Doc; +before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) -> + #user_ctx{roles = Roles, name = Name} = fabric2_db:get_user_ctx(Db), + IsReplicator = lists:member(<<"_replicator">>, Roles), -% Note: parse_rep_doc can handle filtered replications. During parsing of the -% replication doc it will make possibly remote http requests to the source -% database. If failure or parsing of filter docs fails, parse_doc throws a -% {filter_fetch_error, Error} excation. This exception should be considered -% transient in respect to the contents of the document itself, since it depends -% on netowrk availability of the source db and other factors. --spec parse_rep_doc({[_]}) -> #rep{}. -parse_rep_doc(RepDoc) -> - {ok, Rep} = try - parse_rep_doc(RepDoc, rep_user_ctx(RepDoc)) - catch - throw:{error, Reason} -> - throw({bad_rep_doc, Reason}); - throw:{filter_fetch_error, Reason} -> - throw({filter_fetch_error, Reason}); - Tag:Err -> - throw({bad_rep_doc, to_binary({Tag, Err})}) + Doc1 = case IsReplicator of true -> Doc; false -> + case couch_util:get_value(?OWNER, Body) of + undefined -> + Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; + Name -> + Doc; + Other -> + case (catch fabric2_db:check_is_admin(Db)) of + ok when Other =:= null -> + Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; + ok -> + Doc; + _ -> + throw({forbidden, <<"Can't update replication", + "documents from other users.">>}) + end + end end, - Rep. - --spec parse_rep_doc_without_id({[_]}) -> #rep{}. -parse_rep_doc_without_id(RepDoc) -> - {ok, Rep} = try - parse_rep_doc_without_id(RepDoc, rep_user_ctx(RepDoc)) - catch - throw:{error, Reason} -> - throw({bad_rep_doc, Reason}); - Tag:Err -> - throw({bad_rep_doc, to_binary({Tag, Err})}) + Deleted = Doc1#doc.deleted, + IsFailed = couch_util:get_value(?REPLICATION_STATE, Body) == ?ST_FAILED, + case IsReplicator orelse Deleted orelse IsFailed of true -> ok; false -> + try + couch_replicator_parse:parse_rep_doc(Doc1#doc.body) + catch + throw:{bad_rep_doc, Error} -> + throw({forbidden, Error}) + end end, - Rep. - - --spec parse_rep_doc({[_]}, #user_ctx{}) -> {ok, #rep{}}. -parse_rep_doc(Doc, UserCtx) -> - {ok, Rep} = parse_rep_doc_without_id(Doc, UserCtx), - Cancel = get_value(cancel, Rep#rep.options, false), - Id = get_value(id, Rep#rep.options, nil), - case {Cancel, Id} of - {true, nil} -> - % Cancel request with no id, must parse id out of body contents - {ok, update_rep_id(Rep)}; - {true, Id} -> - % Cancel request with an id specified, so do not parse id from body - {ok, Rep}; - {false, _Id} -> - % Not a cancel request, regular replication doc - {ok, update_rep_id(Rep)} - end. - - --spec parse_rep_doc_without_id({[_]}, #user_ctx{}) -> {ok, #rep{}}. -parse_rep_doc_without_id({Props}, UserCtx) -> - {SrcProxy, TgtProxy} = parse_proxy_settings(Props), - Opts = make_options(Props), - case get_value(cancel, Opts, false) andalso - (get_value(id, Opts, nil) =/= nil) of - true -> - {ok, #rep{options = Opts, user_ctx = UserCtx}}; - false -> - Source = parse_rep_db(get_value(<<"source">>, Props), SrcProxy, Opts), - Target = parse_rep_db(get_value(<<"target">>, Props), TgtProxy, Opts), - {Type, View} = case couch_replicator_filters:view_type(Props, Opts) of - {error, Error} -> - throw({bad_request, Error}); - Result -> - Result - end, - Rep = #rep{ - source = Source, - target = Target, - options = Opts, - user_ctx = UserCtx, - type = Type, - view = View, - doc_id = get_value(<<"_id">>, Props, null) - }, - % Check if can parse filter code, if not throw exception - case couch_replicator_filters:parse(Opts) of - {error, FilterError} -> - throw({error, FilterError}); - {ok, _Filter} -> - ok - end, - {ok, Rep} - end. + Doc1. -parse_proxy_settings(Props) when is_list(Props) -> - Proxy = get_value(<<"proxy">>, Props, <<>>), - SrcProxy = get_value(<<"source_proxy">>, Props, <<>>), - TgtProxy = get_value(<<"target_proxy">>, Props, <<>>), - - case Proxy =/= <<>> of - true when SrcProxy =/= <<>> -> - Error = "`proxy` is mutually exclusive with `source_proxy`", - throw({bad_request, Error}); - true when TgtProxy =/= <<>> -> - Error = "`proxy` is mutually exclusive with `target_proxy`", - throw({bad_request, Error}); - true -> - {Proxy, Proxy}; - false -> - {SrcProxy, TgtProxy} +-spec after_doc_read(#doc{}, Db::any()) -> #doc{}. +after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db) -> + Doc; +after_doc_read(#doc{body = {Body}} = Doc, Db) -> + #user_ctx{name = Name} = fabric2_db:get_user_ctx(Db), + case (catch fabric2_db:check_is_admin(Db)) of ok -> Doc; _ -> + case couch_util:get_value(?OWNER, Body) of Name -> Doc; _ -> + Source0 = couch_util:get_value(<<"source">>, Body), + Target0 = couch_util:get_value(<<"target">>, Body), + Source = strip_credentials(Source0), + Target = strip_credentials(Target0), + NewBody0 = ?replace(Body, <<"source">>, Source), + NewBody = ?replace(NewBody0, <<"target">>, Target), + #doc{revs = {Pos, [_ | Revs]}} = Doc, + NewDoc = Doc#doc{body = {NewBody}, revs = {Pos - 1, Revs}}, + fabric2_db:new_revid(Db, NewDoc) + end end. -% Update a #rep{} record with a replication_id. Calculating the id might involve -% fetching a filter from the source db, and so it could fail intermetently. -% In case of a failure to fetch the filter this function will throw a -% `{filter_fetch_error, Reason} exception. -update_rep_id(Rep) -> - RepId = couch_replicator_ids:replication_id(Rep), - Rep#rep{id = RepId}. +update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs) -> + update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs, 1). -update_rep_doc(RepDbName, RepDocId, KVs) -> - update_rep_doc(RepDbName, RepDocId, KVs, 1). - - -update_rep_doc(RepDbName, RepDocId, KVs, Wait) when is_binary(RepDocId) -> +update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs, Wait) + when is_binary(RepDocId) -> try - case open_rep_doc(RepDbName, RepDocId) of + case open_rep_doc(RepDbName, RepDbUUID, RepDocId) of {ok, LastRepDoc} -> - update_rep_doc(RepDbName, LastRepDoc, KVs, Wait * 2); + update_rep_doc(RepDbName, RepDbUUID, LastRepDoc, KVs, + Wait * 2); _ -> ok end @@ -321,25 +157,25 @@ update_rep_doc(RepDbName, RepDocId, KVs, Wait) when is_binary(RepDocId) -> Msg = "Conflict when updating replication doc `~s`. Retrying.", couch_log:error(Msg, [RepDocId]), ok = timer:sleep(couch_rand:uniform(erlang:min(128, Wait)) * 100), - update_rep_doc(RepDbName, RepDocId, KVs, Wait * 2) + update_rep_doc(RepDbName, RepDbUUID, RepDocId, KVs, Wait * 2) end; -update_rep_doc(RepDbName, #doc{body = {RepDocBody}} = RepDoc, KVs, _Try) -> +update_rep_doc(RepDbName, RepDbUUID, #doc{body = {RepDocBody}} = RepDoc, KVs, _Try) -> NewRepDocBody = lists:foldl( - fun({K, undefined}, Body) -> + fun({K, undefined}, Body) when is_binary(K) -> lists:keydelete(K, 1, Body); - ({<<"_replication_state">> = K, State} = KV, Body) -> - case get_json_value(K, Body) of + ({?REPLICATION_STATE = K, State} = KV, Body) when is_binary(K) -> + case couch_util:get_value(K, Body) of State -> Body; _ -> Body1 = lists:keystore(K, 1, Body, KV), - Timestamp = couch_replicator_utils:iso8601(os:timestamp()), + Timestamp = couch_replicator_utils:iso8601(), lists:keystore( - <<"_replication_state_time">>, 1, Body1, - {<<"_replication_state_time">>, Timestamp}) + ?REPLICATION_STATE_TIME, 1, Body1, + {?REPLICATION_STATE_TIME, Timestamp}) end; - ({K, _V} = KV, Body) -> + ({K, _V} = KV, Body) when is_binary(K) -> lists:keystore(K, 1, Body, KV) end, RepDocBody, KVs), @@ -349,331 +185,37 @@ update_rep_doc(RepDbName, #doc{body = {RepDocBody}} = RepDoc, KVs, _Try) -> _ -> % Might not succeed - when the replication doc is deleted right % before this update (not an error, ignore). - save_rep_doc(RepDbName, RepDoc#doc{body = {NewRepDocBody}}) + save_rep_doc(RepDbName, RepDbUUID, RepDoc#doc{body = {NewRepDocBody}}) end. -open_rep_doc(DbName, DocId) -> - case couch_db:open_int(DbName, [?CTX, sys_db]) of - {ok, Db} -> - try - couch_db:open_doc(Db, DocId, [ejson_body]) - after - couch_db:close(Db) - end; - Else -> - Else +open_rep_doc(DbName, DbUUID, DocId) when is_binary(DbName), is_binary(DbUUID), + is_binary(DocId) -> + try + case fabric2_db:open(DbName, [?CTX, sys_db, {uuid, DbUUID}]) of + {ok, Db} -> fabric2_db:open_doc(Db, DocId, [ejson_body]); + Else -> Else + end + catch + error:database_does_not_exist -> + {not_found, database_does_not_exist} end. -save_rep_doc(DbName, Doc) -> - {ok, Db} = couch_db:open_int(DbName, [?CTX, sys_db]), +save_rep_doc(DbName, DbUUID, Doc) when is_binary(DbName), is_binary(DbUUID) -> try - couch_db:update_doc(Db, Doc, []) + {ok, Db} = fabric2_db:open(DbName, [?CTX, sys_db, {uuid, DbUUID}]), + fabric2_db:update_doc(Db, Doc, []) catch + error:database_does_not_exist -> + {not_found, database_does_not_exist}; % User can accidently write a VDU which prevents _replicator from % updating replication documents. Avoid crashing replicator and thus % preventing all other replication jobs on the node from running. throw:{forbidden, Reason} -> - Msg = "~p VDU function preventing doc update to ~s ~s ~p", + Msg = "~p VDU or BDU function preventing doc update to ~s ~s ~p", couch_log:error(Msg, [?MODULE, DbName, Doc#doc.id, Reason]), {ok, forbidden} - after - couch_db:close(Db) - end. - - --spec rep_user_ctx({[_]}) -> #user_ctx{}. -rep_user_ctx({RepDoc}) -> - case get_json_value(<<"user_ctx">>, RepDoc) of - undefined -> - #user_ctx{}; - {UserCtx} -> - #user_ctx{ - name = get_json_value(<<"name">>, UserCtx, null), - roles = get_json_value(<<"roles">>, UserCtx, []) - } - end. - - --spec parse_rep_db({[_]} | binary(), binary(), [_]) -> #httpd{} | binary(). -parse_rep_db({Props}, Proxy, Options) -> - ProxyParams = parse_proxy_params(Proxy), - ProxyURL = case ProxyParams of - [] -> undefined; - _ -> binary_to_list(Proxy) - end, - Url = maybe_add_trailing_slash(get_value(<<"url">>, Props)), - {AuthProps} = get_value(<<"auth">>, Props, {[]}), - {BinHeaders} = get_value(<<"headers">>, Props, {[]}), - Headers = lists:ukeysort(1, [{?b2l(K), ?b2l(V)} || {K, V} <- BinHeaders]), - DefaultHeaders = (#httpdb{})#httpdb.headers, - #httpdb{ - url = Url, - auth_props = AuthProps, - headers = lists:ukeymerge(1, Headers, DefaultHeaders), - ibrowse_options = lists:keysort(1, - [{socket_options, get_value(socket_options, Options)} | - ProxyParams ++ ssl_params(Url)]), - timeout = get_value(connection_timeout, Options), - http_connections = get_value(http_connections, Options), - retries = get_value(retries, Options), - proxy_url = ProxyURL - }; - -parse_rep_db(<<"http://", _/binary>> = Url, Proxy, Options) -> - parse_rep_db({[{<<"url">>, Url}]}, Proxy, Options); - -parse_rep_db(<<"https://", _/binary>> = Url, Proxy, Options) -> - parse_rep_db({[{<<"url">>, Url}]}, Proxy, Options); - -parse_rep_db(<<_/binary>>, _Proxy, _Options) -> - throw({error, local_endpoints_not_supported}); - -parse_rep_db(undefined, _Proxy, _Options) -> - throw({error, <<"Missing replicator database">>}). - - --spec maybe_add_trailing_slash(binary() | list()) -> list(). -maybe_add_trailing_slash(Url) when is_binary(Url) -> - maybe_add_trailing_slash(?b2l(Url)); -maybe_add_trailing_slash(Url) -> - case lists:member($?, Url) of - true -> - Url; % skip if there are query params - false -> - case lists:last(Url) of - $/ -> - Url; - _ -> - Url ++ "/" - end - end. - - --spec make_options([_]) -> [_]. -make_options(Props) -> - Options0 = lists:ukeysort(1, convert_options(Props)), - Options = check_options(Options0), - DefWorkers = config:get("replicator", "worker_processes", "4"), - DefBatchSize = config:get("replicator", "worker_batch_size", "500"), - DefConns = config:get("replicator", "http_connections", "20"), - DefTimeout = config:get("replicator", "connection_timeout", "30000"), - DefRetries = config:get("replicator", "retries_per_request", "5"), - UseCheckpoints = config:get("replicator", "use_checkpoints", "true"), - DefCheckpointInterval = config:get("replicator", "checkpoint_interval", - "30000"), - {ok, DefSocketOptions} = couch_util:parse_term( - config:get("replicator", "socket_options", - "[{keepalive, true}, {nodelay, false}]")), - lists:ukeymerge(1, Options, lists:keysort(1, [ - {connection_timeout, list_to_integer(DefTimeout)}, - {retries, list_to_integer(DefRetries)}, - {http_connections, list_to_integer(DefConns)}, - {socket_options, DefSocketOptions}, - {worker_batch_size, list_to_integer(DefBatchSize)}, - {worker_processes, list_to_integer(DefWorkers)}, - {use_checkpoints, list_to_existing_atom(UseCheckpoints)}, - {checkpoint_interval, list_to_integer(DefCheckpointInterval)} - ])). - - --spec convert_options([_]) -> [_]. -convert_options([])-> - []; -convert_options([{<<"cancel">>, V} | _R]) when not is_boolean(V)-> - throw({bad_request, <<"parameter `cancel` must be a boolean">>}); -convert_options([{<<"cancel">>, V} | R]) -> - [{cancel, V} | convert_options(R)]; -convert_options([{IdOpt, V} | R]) when IdOpt =:= <<"_local_id">>; - IdOpt =:= <<"replication_id">>; IdOpt =:= <<"id">> -> - [{id, couch_replicator_ids:convert(V)} | convert_options(R)]; -convert_options([{<<"create_target">>, V} | _R]) when not is_boolean(V)-> - throw({bad_request, <<"parameter `create_target` must be a boolean">>}); -convert_options([{<<"create_target">>, V} | R]) -> - [{create_target, V} | convert_options(R)]; -convert_options([{<<"create_target_params">>, V} | _R]) when not is_tuple(V) -> - throw({bad_request, - <<"parameter `create_target_params` must be a JSON object">>}); -convert_options([{<<"create_target_params">>, V} | R]) -> - [{create_target_params, V} | convert_options(R)]; -convert_options([{<<"continuous">>, V} | _R]) when not is_boolean(V)-> - throw({bad_request, <<"parameter `continuous` must be a boolean">>}); -convert_options([{<<"continuous">>, V} | R]) -> - [{continuous, V} | convert_options(R)]; -convert_options([{<<"filter">>, V} | R]) -> - [{filter, V} | convert_options(R)]; -convert_options([{<<"query_params">>, V} | R]) -> - [{query_params, V} | convert_options(R)]; -convert_options([{<<"doc_ids">>, null} | R]) -> - convert_options(R); -convert_options([{<<"doc_ids">>, V} | _R]) when not is_list(V) -> - throw({bad_request, <<"parameter `doc_ids` must be an array">>}); -convert_options([{<<"doc_ids">>, V} | R]) -> - % Ensure same behaviour as old replicator: accept a list of percent - % encoded doc IDs. - DocIds = lists:usort([?l2b(couch_httpd:unquote(Id)) || Id <- V]), - [{doc_ids, DocIds} | convert_options(R)]; -convert_options([{<<"selector">>, V} | _R]) when not is_tuple(V) -> - throw({bad_request, <<"parameter `selector` must be a JSON object">>}); -convert_options([{<<"selector">>, V} | R]) -> - [{selector, V} | convert_options(R)]; -convert_options([{<<"worker_processes">>, V} | R]) -> - [{worker_processes, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"worker_batch_size">>, V} | R]) -> - [{worker_batch_size, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"http_connections">>, V} | R]) -> - [{http_connections, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"connection_timeout">>, V} | R]) -> - [{connection_timeout, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"retries_per_request">>, V} | R]) -> - [{retries, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([{<<"socket_options">>, V} | R]) -> - {ok, SocketOptions} = couch_util:parse_term(V), - [{socket_options, SocketOptions} | convert_options(R)]; -convert_options([{<<"since_seq">>, V} | R]) -> - [{since_seq, V} | convert_options(R)]; -convert_options([{<<"use_checkpoints">>, V} | R]) -> - [{use_checkpoints, V} | convert_options(R)]; -convert_options([{<<"checkpoint_interval">>, V} | R]) -> - [{checkpoint_interval, couch_util:to_integer(V)} | convert_options(R)]; -convert_options([_ | R]) -> % skip unknown option - convert_options(R). - - --spec check_options([_]) -> [_]. -check_options(Options) -> - DocIds = lists:keyfind(doc_ids, 1, Options), - Filter = lists:keyfind(filter, 1, Options), - Selector = lists:keyfind(selector, 1, Options), - case {DocIds, Filter, Selector} of - {false, false, false} -> Options; - {false, false, _} -> Options; - {false, _, false} -> Options; - {_, false, false} -> Options; - _ -> - throw({bad_request, - "`doc_ids`,`filter`,`selector` are mutually exclusive"}) - end. - - --spec parse_proxy_params(binary() | [_]) -> [_]. -parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl) -> - parse_proxy_params(?b2l(ProxyUrl)); -parse_proxy_params([]) -> - []; -parse_proxy_params(ProxyUrl) -> - #url{ - host = Host, - port = Port, - username = User, - password = Passwd, - protocol = Protocol - } = ibrowse_lib:parse_url(ProxyUrl), - [ - {proxy_protocol, Protocol}, - {proxy_host, Host}, - {proxy_port, Port} - ] ++ case is_list(User) andalso is_list(Passwd) of - false -> - []; - true -> - [{proxy_user, User}, {proxy_password, Passwd}] - end. - - --spec ssl_params([_]) -> [_]. -ssl_params(Url) -> - case ibrowse_lib:parse_url(Url) of - #url{protocol = https} -> - Depth = list_to_integer( - config:get("replicator", "ssl_certificate_max_depth", "3") - ), - VerifyCerts = config:get("replicator", "verify_ssl_certificates"), - CertFile = config:get("replicator", "cert_file", undefined), - KeyFile = config:get("replicator", "key_file", undefined), - Password = config:get("replicator", "password", undefined), - SslOpts = [{depth, Depth} | ssl_verify_options(VerifyCerts =:= "true")], - SslOpts1 = case CertFile /= undefined andalso KeyFile /= undefined of - true -> - case Password of - undefined -> - [{certfile, CertFile}, {keyfile, KeyFile}] ++ SslOpts; - _ -> - [{certfile, CertFile}, {keyfile, KeyFile}, - {password, Password}] ++ SslOpts - end; - false -> SslOpts - end, - [{is_ssl, true}, {ssl_options, SslOpts1}]; - #url{protocol = http} -> - [] - end. - - --spec ssl_verify_options(true | false) -> [_]. -ssl_verify_options(true) -> - CAFile = config:get("replicator", "ssl_trusted_certificates_file"), - [{verify, verify_peer}, {cacertfile, CAFile}]; -ssl_verify_options(false) -> - [{verify, verify_none}]. - - --spec before_doc_update(#doc{}, Db::any(), couch_db:update_type()) -> #doc{}. -before_doc_update(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db, _UpdateType) -> - Doc; -before_doc_update(#doc{body = {Body}} = Doc, Db, _UpdateType) -> - #user_ctx{ - roles = Roles, - name = Name - } = couch_db:get_user_ctx(Db), - case lists:member(<<"_replicator">>, Roles) of - true -> - Doc; - false -> - case couch_util:get_value(?OWNER, Body) of - undefined -> - Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; - Name -> - Doc; - Other -> - case (catch couch_db:check_is_admin(Db)) of - ok when Other =:= null -> - Doc#doc{body = {?replace(Body, ?OWNER, Name)}}; - ok -> - Doc; - _ -> - throw({forbidden, <<"Can't update replication documents", - " from other users.">>}) - end - end - end. - - --spec after_doc_read(#doc{}, Db::any()) -> #doc{}. -after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, _Db) -> - Doc; -after_doc_read(#doc{body = {Body}} = Doc, Db) -> - #user_ctx{name = Name} = couch_db:get_user_ctx(Db), - case (catch couch_db:check_is_admin(Db)) of - ok -> - Doc; - _ -> - case couch_util:get_value(?OWNER, Body) of - Name -> - Doc; - _Other -> - Source = strip_credentials(couch_util:get_value(<<"source">>, -Body)), - Target = strip_credentials(couch_util:get_value(<<"target">>, -Body)), - NewBody0 = ?replace(Body, <<"source">>, Source), - NewBody = ?replace(NewBody0, <<"target">>, Target), - #doc{revs = {Pos, [_ | Revs]}} = Doc, - NewDoc = Doc#doc{body = {NewBody}, revs = {Pos - 1, Revs}}, - NewRevId = couch_db:new_revid(NewDoc), - NewDoc#doc{revs = {Pos, [NewRevId | Revs]}} - end end. @@ -698,164 +240,14 @@ strip_credentials({Props0}) -> error_reason({shutdown, Error}) -> error_reason(Error); error_reason({bad_rep_doc, Reason}) -> - to_binary(Reason); + couch_util:to_binary(Reason); +error_reason(#{<<"error">> := Error, <<"reason">> := Reason}) + when is_binary(Error), is_binary(Reason) -> + couch_util:to_binary(io_list:format("~s: ~s", [Error, Reason])); error_reason({error, {Error, Reason}}) - when is_atom(Error), is_binary(Reason) -> - to_binary(io_lib:format("~s: ~s", [Error, Reason])); + when is_atom(Error), is_binary(Reason) -> + couch_util:to_binary(io_lib:format("~s: ~s", [Error, Reason])); error_reason({error, Reason}) -> - to_binary(Reason); + couch_util:to_binary(Reason); error_reason(Reason) -> - to_binary(Reason). - - --ifdef(TEST). - - --include_lib("couch/include/couch_eunit.hrl"). - - -check_options_pass_values_test() -> - ?assertEqual(check_options([]), []), - ?assertEqual(check_options([baz, {other, fiz}]), [baz, {other, fiz}]), - ?assertEqual(check_options([{doc_ids, x}]), [{doc_ids, x}]), - ?assertEqual(check_options([{filter, x}]), [{filter, x}]), - ?assertEqual(check_options([{selector, x}]), [{selector, x}]). - - -check_options_fail_values_test() -> - ?assertThrow({bad_request, _}, - check_options([{doc_ids, x}, {filter, y}])), - ?assertThrow({bad_request, _}, - check_options([{doc_ids, x}, {selector, y}])), - ?assertThrow({bad_request, _}, - check_options([{filter, x}, {selector, y}])), - ?assertThrow({bad_request, _}, - check_options([{doc_ids, x}, {selector, y}, {filter, z}])). - - -check_convert_options_pass_test() -> - ?assertEqual([], convert_options([])), - ?assertEqual([], convert_options([{<<"random">>, 42}])), - ?assertEqual([{cancel, true}], - convert_options([{<<"cancel">>, true}])), - ?assertEqual([{create_target, true}], - convert_options([{<<"create_target">>, true}])), - ?assertEqual([{continuous, true}], - convert_options([{<<"continuous">>, true}])), - ?assertEqual([{doc_ids, [<<"id">>]}], - convert_options([{<<"doc_ids">>, [<<"id">>]}])), - ?assertEqual([{selector, {key, value}}], - convert_options([{<<"selector">>, {key, value}}])). - - -check_convert_options_fail_test() -> - ?assertThrow({bad_request, _}, - convert_options([{<<"cancel">>, <<"true">>}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"create_target">>, <<"true">>}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"continuous">>, <<"true">>}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"doc_ids">>, not_a_list}])), - ?assertThrow({bad_request, _}, - convert_options([{<<"selector">>, [{key, value}]}])). - -check_strip_credentials_test() -> - [?assertEqual(Expected, strip_credentials(Body)) || {Expected, Body} <- [ - { - undefined, - undefined - }, - { - <<"https://remote_server/database">>, - <<"https://foo:bar@remote_server/database">> - }, - { - {[{<<"_id">>, <<"foo">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"headers">>, <<"bar">>}]} - }, - { - {[{<<"_id">>, <<"foo">>}, {<<"other">>, <<"bar">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"other">>, <<"bar">>}]} - }, - { - {[{<<"_id">>, <<"foo">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"headers">>, <<"baz">>}]} - }, - { - {[{<<"_id">>, <<"foo">>}]}, - {[{<<"_id">>, <<"foo">>}, {<<"auth">>, <<"pluginsecret">>}]} - } - ]]. - - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - create_vdu(DbName), - DbName. - - -teardown(DbName) when is_binary(DbName) -> - couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - - -create_vdu(DbName) -> - couch_util:with_db(DbName, fun(Db) -> - VduFun = <<"function(newdoc, olddoc, userctx) {throw({'forbidden':'fail'})}">>, - Doc = #doc{ - id = <<"_design/vdu">>, - body = {[{<<"validate_doc_update">>, VduFun}]} - }, - {ok, _} = couch_db:update_docs(Db, [Doc]) - end). - - -update_replicator_doc_with_bad_vdu_test_() -> - { - setup, - fun test_util:start_couch/0, - fun test_util:stop_couch/1, - { - foreach, fun setup/0, fun teardown/1, - [ - fun t_vdu_does_not_crash_on_save/1 - ] - } - }. - - -t_vdu_does_not_crash_on_save(DbName) -> - ?_test(begin - Doc = #doc{id = <<"some_id">>, body = {[{<<"foo">>, 42}]}}, - ?assertEqual({ok, forbidden}, save_rep_doc(DbName, Doc)) - end). - - -local_replication_endpoint_error_test_() -> - { - foreach, - fun () -> meck:expect(config, get, - fun(_, _, Default) -> Default end) - end, - fun (_) -> meck:unload() end, - [ - t_error_on_local_endpoint() - ] - }. - - -t_error_on_local_endpoint() -> - ?_test(begin - RepDoc = {[ - {<<"_id">>, <<"someid">>}, - {<<"source">>, <<"localdb">>}, - {<<"target">>, <<"http://somehost.local/tgt">>} - ]}, - Expect = local_endpoints_not_supported, - ?assertThrow({bad_rep_doc, Expect}, parse_rep_doc_without_id(RepDoc)) - end). - --endif. + couch_util:to_binary(Reason). diff --git a/src/couch_replicator/src/couch_replicator_epi.erl b/src/couch_replicator/src/couch_replicator_epi.erl new file mode 100644 index 000000000..9fb1790b5 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_epi.erl @@ -0,0 +1,58 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_replicator_epi). + + +-behaviour(couch_epi_plugin). + + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + + +app() -> + couch_replicator. + + +providers() -> + [ + {fabric2_db, couch_replicator_fabric2_plugin} + ]. + + +services() -> + []. + + +data_subscriptions() -> + []. + + +data_providers() -> + []. + + +processes() -> + []. + + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/couch_replicator/src/couch_replicator_fabric.erl b/src/couch_replicator/src/couch_replicator_fabric.erl deleted file mode 100644 index 1650105b5..000000000 --- a/src/couch_replicator/src/couch_replicator_fabric.erl +++ /dev/null @@ -1,155 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_fabric). - --export([ - docs/5 -]). - --include_lib("fabric/include/fabric.hrl"). --include_lib("mem3/include/mem3.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - -docs(DbName, Options, QueryArgs, Callback, Acc) -> - Shards = mem3:shards(DbName), - Workers0 = fabric_util:submit_jobs( - Shards, couch_replicator_fabric_rpc, docs, [Options, QueryArgs]), - RexiMon = fabric_util:create_monitors(Workers0), - try - case fabric_streams:start(Workers0, #shard.ref) of - {ok, Workers} -> - try - docs_int(DbName, Workers, QueryArgs, Callback, Acc) - after - fabric_streams:cleanup(Workers) - end; - {timeout, NewState} -> - DefunctWorkers = fabric_util:remove_done_workers( - NewState#stream_acc.workers, waiting - ), - fabric_util:log_timeout( - DefunctWorkers, - "replicator docs" - ), - Callback({error, timeout}, Acc); - {error, Error} -> - Callback({error, Error}, Acc) - end - after - rexi_monitor:stop(RexiMon) - end. - - -docs_int(DbName, Workers, QueryArgs, Callback, Acc0) -> - #mrargs{limit = Limit, skip = Skip} = QueryArgs, - State = #collector{ - db_name = DbName, - query_args = QueryArgs, - callback = Callback, - counters = fabric_dict:init(Workers, 0), - skip = Skip, - limit = Limit, - user_acc = Acc0, - update_seq = nil - }, - case rexi_utils:recv(Workers, #shard.ref, fun handle_message/3, - State, infinity, 5000) of - {ok, NewState} -> - {ok, NewState#collector.user_acc}; - {timeout, NewState} -> - Callback({error, timeout}, NewState#collector.user_acc); - {error, Resp} -> - {ok, Resp} - end. - -handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _, State) -> - fabric_view:check_down_shards(State, NodeRef); - -handle_message({rexi_EXIT, Reason}, Worker, State) -> - fabric_view:handle_worker_exit(State, Worker, Reason); - -handle_message({meta, Meta0}, {Worker, From}, State) -> - Tot = couch_util:get_value(total, Meta0, 0), - Off = couch_util:get_value(offset, Meta0, 0), - #collector{ - callback = Callback, - counters = Counters0, - total_rows = Total0, - offset = Offset0, - user_acc = AccIn - } = State, - % Assert that we don't have other messages from this - % worker when the total_and_offset message arrives. - 0 = fabric_dict:lookup_element(Worker, Counters0), - rexi:stream_ack(From), - Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), - Total = Total0 + Tot, - Offset = Offset0 + Off, - case fabric_dict:any(0, Counters1) of - true -> - {ok, State#collector{ - counters = Counters1, - total_rows = Total, - offset = Offset - }}; - false -> - FinalOffset = erlang:min(Total, Offset+State#collector.skip), - Meta = [{total, Total}, {offset, FinalOffset}], - {Go, Acc} = Callback({meta, Meta}, AccIn), - {Go, State#collector{ - counters = fabric_dict:decrement_all(Counters1), - total_rows = Total, - offset = FinalOffset, - user_acc = Acc - }} - end; - -handle_message(#view_row{id = Id, doc = Doc} = Row0, {Worker, From}, State) -> - #collector{query_args = Args, counters = Counters0, rows = Rows0} = State, - case maybe_fetch_and_filter_doc(Id, Doc, State) of - {[_ | _]} = NewDoc -> - Row = Row0#view_row{doc = NewDoc}, - Dir = Args#mrargs.direction, - Rows = merge_row(Dir, Row#view_row{worker={Worker, From}}, Rows0), - Counters1 = fabric_dict:update_counter(Worker, 1, Counters0), - State1 = State#collector{rows=Rows, counters=Counters1}, - fabric_view:maybe_send_row(State1); - skip -> - rexi:stream_ack(From), - {ok, State} - end; - -handle_message(complete, Worker, State) -> - Counters = fabric_dict:update_counter(Worker, 1, State#collector.counters), - fabric_view:maybe_send_row(State#collector{counters = Counters}). - - -merge_row(fwd, Row, Rows) -> - lists:keymerge(#view_row.id, [Row], Rows); -merge_row(rev, Row, Rows) -> - lists:rkeymerge(#view_row.id, [Row], Rows). - - -maybe_fetch_and_filter_doc(Id, undecided, State) -> - #collector{db_name = DbName, query_args = #mrargs{extra = Extra}} = State, - FilterStates = proplists:get_value(filter_states, Extra), - case couch_replicator:active_doc(DbName, Id) of - {ok, {Props} = DocInfo} -> - DocState = couch_util:get_value(state, Props), - couch_replicator_utils:filter_state(DocState, FilterStates, DocInfo); - {error, not_found} -> - skip % could have been deleted - end; -maybe_fetch_and_filter_doc(_Id, Doc, _State) -> - Doc. diff --git a/src/couch_replicator/src/couch_replicator_fabric2_plugin.erl b/src/couch_replicator/src/couch_replicator_fabric2_plugin.erl new file mode 100644 index 000000000..7bf614512 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_fabric2_plugin.erl @@ -0,0 +1,36 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_replicator_fabric2_plugin). + + +-export([ + after_db_create/2, + after_db_delete/2, + after_doc_write/6 +]). + + +after_db_create(DbName, DbUUID) -> + couch_replicator:after_db_create(DbName, DbUUID), + [DbName, DbUUID]. + + +after_db_delete(DbName, DbUUID) -> + couch_replicator:after_db_delete(DbName, DbUUID), + [DbName, DbUUID]. + + +after_doc_write(Db, Doc, Winner, OldWinner, RevId, Seq)-> + couch_replicator:after_doc_write(Db, Doc, Winner, OldWinner, RevId, Seq), + [Db, Doc, Winner, OldWinner, RevId, Seq]. diff --git a/src/couch_replicator/src/couch_replicator_fabric_rpc.erl b/src/couch_replicator/src/couch_replicator_fabric_rpc.erl deleted file mode 100644 index d67f87548..000000000 --- a/src/couch_replicator/src/couch_replicator_fabric_rpc.erl +++ /dev/null @@ -1,97 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_fabric_rpc). - --export([ - docs/3 -]). - --include_lib("fabric/include/fabric.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - - -docs(DbName, Options, Args0) -> - set_io_priority(DbName, Options), - #mrargs{skip = Skip, limit = Limit, extra = Extra} = Args0, - FilterStates = proplists:get_value(filter_states, Extra), - Args = Args0#mrargs{skip = 0, limit = Skip + Limit}, - HealthThreshold = couch_replicator_scheduler:health_threshold(), - {ok, Db} = couch_db:open_int(DbName, Options), - Acc = {DbName, FilterStates, HealthThreshold}, - couch_mrview:query_all_docs(Db, Args, fun docs_cb/2, Acc). - - -docs_cb({meta, Meta}, Acc) -> - ok = rexi:stream2({meta, Meta}), - {ok, Acc}; -docs_cb({row, Row}, {DbName, States, HealthThreshold} = Acc) -> - Id = couch_util:get_value(id, Row), - Doc = couch_util:get_value(doc, Row), - ViewRow = #view_row{ - id = Id, - key = couch_util:get_value(key, Row), - value = couch_util:get_value(value, Row) - }, - case rep_doc_state(DbName, Id, Doc, States, HealthThreshold) of - skip -> - ok; - Other -> - ok = rexi:stream2(ViewRow#view_row{doc = Other}) - end, - {ok, Acc}; -docs_cb(complete, Acc) -> - ok = rexi:stream_last(complete), - {ok, Acc}. - - -set_io_priority(DbName, Options) -> - case lists:keyfind(io_priority, 1, Options) of - {io_priority, Pri} -> - erlang:put(io_priority, Pri); - false -> - erlang:put(io_priority, {interactive, DbName}) - end. - - -%% Get the state of the replication document. If it is found and has a terminal -%% state then it can be filtered and either included in the results or skipped. -%% If it is not in a terminal state, look it up in the local doc processor ETS -%% table. If it is there then filter by state. If it is not found there either -%% then mark it as `undecided` and let the coordinator try to fetch it. The -%% The idea is to do as much work as possible locally and leave the minimum -%% amount of work for the coordinator. -rep_doc_state(_Shard, <<"_design/", _/binary>>, _, _, _) -> - skip; -rep_doc_state(Shard, Id, {[_ | _]} = Doc, States, HealthThreshold) -> - DbName = mem3:dbname(Shard), - DocInfo = couch_replicator:info_from_doc(DbName, Doc), - case get_doc_state(DocInfo) of - null -> - % Fetch from local doc processor. If there, filter by state. - % If not there, mark as undecided. Let coordinator figure it out. - case couch_replicator_doc_processor:doc_lookup(Shard, Id, - HealthThreshold) of - {ok, EtsInfo} -> - State = get_doc_state(EtsInfo), - couch_replicator_utils:filter_state(State, States, EtsInfo); - {error, not_found} -> - undecided - end; - OtherState when is_atom(OtherState) -> - couch_replicator_utils:filter_state(OtherState, States, DocInfo) - end. - - -get_doc_state({Props})-> - couch_util:get_value(state, Props). diff --git a/src/couch_replicator/src/couch_replicator_filters.erl b/src/couch_replicator/src/couch_replicator_filters.erl index c8980001a..1cadce1dd 100644 --- a/src/couch_replicator/src/couch_replicator_filters.erl +++ b/src/couch_replicator/src/couch_replicator_filters.erl @@ -20,6 +20,7 @@ ]). -include_lib("couch/include/couch_db.hrl"). +-include("couch_replicator.hrl"). % Parse the filter from replication options proplist. @@ -27,17 +28,17 @@ % For `user` filter, i.e. filters specified as user code % in source database, this code doesn't fetch the filter % code, but only returns the name of the filter. --spec parse([_]) -> +-spec parse(#{}) -> {ok, nil} | {ok, {view, binary(), {[_]}}} | {ok, {user, {binary(), binary()}, {[_]}}} | {ok, {docids, [_]}} | {ok, {mango, {[_]}}} | {error, binary()}. -parse(Options) -> - Filter = couch_util:get_value(filter, Options), - DocIds = couch_util:get_value(doc_ids, Options), - Selector = couch_util:get_value(selector, Options), +parse(#{} = Options) -> + Filter = maps:get(<<"filter">>, Options, undefined), + DocIds = maps:get(<<"doc_ids">>, Options, undefined), + Selector = maps:get(<<"selector">>, Options, undefined), case {Filter, DocIds, Selector} of {undefined, undefined, undefined} -> {ok, nil}; @@ -53,7 +54,10 @@ parse(Options) -> {undefined, _, undefined} -> {ok, {docids, DocIds}}; {undefined, undefined, _} -> - {ok, {mango, ejsort(mango_selector:normalize(Selector))}}; + % Translate it to proplist as normalize doesn't know how + % to handle maps + Selector1 = ?JSON_DECODE(?JSON_ENCODE(Selector)), + {ok, {mango, ejsort(mango_selector:normalize(Selector1))}}; _ -> Err = "`selector`, `filter` and `doc_ids` are mutually exclusive", {error, list_to_binary(Err)} @@ -88,22 +92,24 @@ fetch(DDocName, FilterName, Source) -> % Get replication type and view (if any) from replication document props --spec view_type([_], [_]) -> - {view, {binary(), binary()}} | {db, nil} | {error, binary()}. -view_type(Props, Options) -> - case couch_util:get_value(<<"filter">>, Props) of - <<"_view">> -> - {QP} = couch_util:get_value(query_params, Options, {[]}), - ViewParam = couch_util:get_value(<<"view">>, QP), - case re:split(ViewParam, <<"/">>) of - [DName, ViewName] -> - {view, {<< "_design/", DName/binary >>, ViewName}}; - _ -> - {error, <<"Invalid `view` parameter.">>} - end; +-spec view_type(#{}, #{}) -> + {binary(), #{}} | {error, binary()}. +view_type(#{?FILTER := <<"_view">>}, #{} = Options) -> + QP = maps:get(<<"query_params">>, Options, #{}), + ViewParam = maps:get(<<"view">>, QP, <<>>), + case re:split(ViewParam, <<"/">>) of + [DName, ViewName] -> + DDocMap = #{ + <<"ddoc">> => <<"_design/",DName/binary>>, + <<"view">> => ViewName + }, + {<<"view">>, DDocMap}; _ -> - {db, nil} - end. + {error, <<"Invalid `view` parameter.">>} + end; + +view_type(#{}, #{}) -> + {<<"db">>, #{}}. % Private functions @@ -144,16 +150,16 @@ fetch_internal(DDocName, FilterName, Source) -> couch_replicator_api_wrap:db_uri(Source), couch_util:to_binary(CodeError)] ), - throw({fetch_error, CodeErrorMsg}) + throw({fetch_error, iolist_to_binary(CodeErrorMsg)}) end after couch_replicator_api_wrap:db_close(Db) end. --spec query_params([_]) -> {[_]}. -query_params(Options)-> - couch_util:get_value(query_params, Options, {[]}). +-spec query_params(#{}) -> #{}. +query_params(#{} = Options)-> + maps:get(<<"query_params">>, Options, #{}). parse_user_filter(Filter) -> diff --git a/src/couch_replicator/src/couch_replicator_httpc.erl b/src/couch_replicator/src/couch_replicator_httpc.erl index 4dce319dc..f11d1895d 100644 --- a/src/couch_replicator/src/couch_replicator_httpc.erl +++ b/src/couch_replicator/src/couch_replicator_httpc.erl @@ -327,7 +327,7 @@ total_error_time_exceeded(#httpdb{first_error_timestamp = nil}) -> false; total_error_time_exceeded(#httpdb{first_error_timestamp = ErrorTimestamp}) -> - HealthThresholdSec = couch_replicator_scheduler:health_threshold(), + HealthThresholdSec = couch_replicator_job:health_threshold(), % Theshold value is halved because in the calling code the next step % is a doubling. Not halving here could mean sleeping too long and % exceeding the health threshold. diff --git a/src/couch_replicator/src/couch_replicator_httpc_pool.erl b/src/couch_replicator/src/couch_replicator_httpc_pool.erl index 90234a6a0..c63a5efa6 100644 --- a/src/couch_replicator/src/couch_replicator_httpc_pool.erl +++ b/src/couch_replicator/src/couch_replicator_httpc_pool.erl @@ -20,7 +20,7 @@ % gen_server API -export([init/1, handle_call/3, handle_info/2, handle_cast/2]). --export([code_change/3, terminate/2]). +-export([code_change/3, terminate/2, format_status/2]). -include_lib("couch/include/couch_db.hrl"). @@ -145,6 +145,18 @@ code_change(_OldVsn, #state{}=State, _Extra) -> terminate(_Reason, _State) -> ok. +format_status(_Opt, [_PDict, State]) -> + #state{ + url = Url, + proxy_url = ProxyURL, + limit = Limit + } = State, + {[ + {url, couch_util:url_strip_password(Url)}, + {proxy_url, couch_util:url_strip_password(ProxyURL)}, + {limit, Limit} + ]}. + monitor_client(Callers, Worker, {ClientPid, _}) -> [{Worker, erlang:monitor(process, ClientPid)} | Callers]. diff --git a/src/couch_replicator/src/couch_replicator_httpd.erl b/src/couch_replicator/src/couch_replicator_httpd.erl index abd9f7fd0..196fcf203 100644 --- a/src/couch_replicator/src/couch_replicator_httpd.erl +++ b/src/couch_replicator/src/couch_replicator_httpd.erl @@ -12,9 +12,6 @@ -module(couch_replicator_httpd). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - -export([ handle_req/1, handle_scheduler_req/1 @@ -26,48 +23,40 @@ send_method_not_allowed/2 ]). --import(couch_util, [ - to_binary/1 -]). + +-include_lib("couch/include/couch_db.hrl"). +-include("couch_replicator.hrl"). -define(DEFAULT_TASK_LIMIT, 100). --define(REPDB, <<"_replicator">>). -% This is a macro so it can be used as a guard --define(ISREPDB(X), X =:= ?REPDB orelse binary_part(X, {byte_size(X), -12}) - =:= <<"/_replicator">>). handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"jobs">>]}=Req) -> - Limit = couch_replicator_httpd_util:parse_int_param(Req, "limit", + Limit = couch_replicator_utils:parse_int_param(Req, "limit", ?DEFAULT_TASK_LIMIT, 0, infinity), - Skip = couch_replicator_httpd_util:parse_int_param(Req, "skip", 0, 0, + Skip = couch_replicator_utils:parse_int_param(Req, "skip", 0, 0, infinity), - {Replies, _BadNodes} = rpc:multicall(couch_replicator_scheduler, jobs, []), - Flatlist = lists:concat(Replies), - % couch_replicator_scheduler:job_ejson/1 guarantees {id, Id} to be the - % the first item in the list - Sorted = lists:sort(fun({[{id,A}|_]},{[{id,B}|_]}) -> A =< B end, Flatlist), - Total = length(Sorted), + Jobs1 = couch_replicator:jobs(), + Total = length(Jobs1), Offset = min(Skip, Total), - Sublist = lists:sublist(Sorted, Offset+1, Limit), - Sublist1 = [couch_replicator_httpd_util:update_db_name(Task) - || Task <- Sublist], - send_json(Req, {[{total_rows, Total}, {offset, Offset}, {jobs, Sublist1}]}); + Jobs2 = lists:sublist(Jobs1, Offset + 1, Limit), + send_json(Req, #{ + <<"total_rows">> => Total, + <<"offset">> => Offset, + <<"jobs">> => Jobs2 + }); handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"jobs">>,JobId]}=Req) -> case couch_replicator:job(JobId) of - {ok, JobInfo} -> - send_json(Req, couch_replicator_httpd_util:update_db_name(JobInfo)); - {error, not_found} -> - throw(not_found) + {ok, JobInfo} -> send_json(Req, JobInfo); + {error, not_found} -> throw(not_found) end; handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>]}=Req) -> - handle_scheduler_docs(?REPDB, Req); + handle_scheduler_docs(?REP_DB_NAME, Req); handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>,Db]}=Req) - when ?ISREPDB(Db) -> + when ?IS_REP_DB(Db) -> handle_scheduler_docs(Db, Req); handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>,Db,DocId]} - = Req) when ?ISREPDB(Db) -> + = Req) when ?IS_REP_DB(Db) -> handle_scheduler_doc(Db, DocId, Req); % Allow users to pass in unencoded _replicator database names (/ are not % escaped). This is possible here because _replicator is not a valid document @@ -82,77 +71,80 @@ handle_scheduler_req(#httpd{method='GET', path_parts=[_,<<"docs">>|Unquoted]} {error, invalid} -> throw(bad_request) end; -handle_scheduler_req(#httpd{method='GET'} = Req) -> - send_json(Req, 404, {[{error, <<"not found">>}]}); +handle_scheduler_req(#httpd{method='GET'} = _Req) -> + throw(not_found); handle_scheduler_req(Req) -> send_method_not_allowed(Req, "GET,HEAD"). handle_req(#httpd{method = 'POST', user_ctx = UserCtx} = Req) -> couch_httpd:validate_ctype(Req, "application/json"), - RepDoc = {Props} = couch_httpd:json_body_obj(Req), - couch_replicator_httpd_util:validate_rep_props(Props), + RepDoc = couch_httpd:json_body_obj(Req), case couch_replicator:replicate(RepDoc, UserCtx) of - {error, {Error, Reason}} -> - send_json( - Req, 500, - {[{error, to_binary(Error)}, {reason, to_binary(Reason)}]}); - {error, not_found} -> - % Tried to cancel a replication that didn't exist. - send_json(Req, 404, {[{error, <<"not found">>}]}); - {error, Reason} -> - send_json(Req, 500, {[{error, to_binary(Reason)}]}); - {ok, {cancelled, RepId}} -> - send_json(Req, 200, {[{ok, true}, {<<"_local_id">>, RepId}]}); - {ok, {continuous, RepId}} -> - send_json(Req, 202, {[{ok, true}, {<<"_local_id">>, RepId}]}); - {ok, {HistoryResults}} -> - send_json(Req, {[{ok, true} | HistoryResults]}) - end; + {error, {Error, Reason}} -> + send_json(Req, 500, #{ + <<"error">> => couch_util:to_binary(Error), + <<"reason">> => couch_util:to_binary(Reason) + }); + {error, not_found} -> + throw(not_found); + {error, Reason} -> + send_json(Req, 500, #{<<"error">> => couch_util:to_binary(Reason)}); + {ok, {cancelled, JobId}} -> + send_json(Req, 200, #{<<"ok">> => true, <<"_local_id">> => JobId}); + {ok, {continuous, JobId}} -> + send_json(Req, 202, #{<<"ok">> => true, <<"_local_id">> => JobId}); + {ok, #{} = CheckpointHistory} -> + Res = maps:merge(#{<<"ok">> => true}, CheckpointHistory), + send_json(Req, Res) + end; handle_req(Req) -> send_method_not_allowed(Req, "POST"). -handle_scheduler_docs(Db, Req) when is_binary(Db) -> - VArgs0 = couch_mrview_http:parse_params(Req, undefined), - StatesQs = chttpd:qs_value(Req, "states"), - States = couch_replicator_httpd_util:parse_replication_state_filter(StatesQs), - VArgs1 = VArgs0#mrargs{ - view_type = map, - include_docs = true, - reduce = false, - extra = [{filter_states, States}] - }, - VArgs2 = couch_mrview_util:validate_args(VArgs1), - Opts = [{user_ctx, Req#httpd.user_ctx}], - Max = chttpd:chunked_response_buffer_size(), - Acc = couch_replicator_httpd_util:docs_acc_new(Req, Db, Max), - Cb = fun couch_replicator_httpd_util:docs_cb/2, - {ok, RAcc} = couch_replicator_fabric:docs(Db, Opts, VArgs2, Cb, Acc), - {ok, couch_replicator_httpd_util:docs_acc_response(RAcc)}. - - -handle_scheduler_doc(Db, DocId, Req) when is_binary(Db), is_binary(DocId) -> - UserCtx = Req#httpd.user_ctx, - case couch_replicator:doc(Db, DocId, UserCtx#user_ctx.roles) of - {ok, DocInfo} -> - send_json(Req, couch_replicator_httpd_util:update_db_name(DocInfo)); - {error, not_found} -> +handle_scheduler_docs(DbName, #httpd{user_ctx = UserCtx} = Req) -> + try fabric2_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + ok = fabric2_db:check_is_member(Db), + StatesQs = chttpd:qs_value(Req, "states"), + States = couch_replicator_utils:parse_replication_states(StatesQs), + Docs = couch_replicator:docs(Db, States), + send_json(Req, #{ + <<"total_rows">> => length(Docs), + <<"offset">> => 0, + <<"docs">> => Docs + }) + catch + error:database_does_not_exist -> throw(not_found) end. +handle_scheduler_doc(DbName, DocId, #httpd{user_ctx = UserCtx} = Req) -> + try fabric2_db:open(DbName, [{user_ctx, UserCtx}]) of + {ok, Db} -> + ok = fabric2_db:check_is_member(Db), + case couch_replicator:doc(Db, DocId) of + {ok, DocInfo} -> send_json(Req, DocInfo); + {error, not_found} -> throw(not_found) + end + catch + error:database_does_not_exist -> + throw(not_found) + end. + + parse_unquoted_docs_path([_, _ | _] = Unquoted) -> - DbAndAfter = lists:dropwhile(fun(E) -> E =/= ?REPDB end, Unquoted), - BeforeRDb = lists:takewhile(fun(E) -> E =/= ?REPDB end, Unquoted), + DbAndAfter = lists:dropwhile(fun(E) -> E =/= ?REP_DB_NAME end, Unquoted), + BeforeRDb = lists:takewhile(fun(E) -> E =/= ?REP_DB_NAME end, Unquoted), case DbAndAfter of [] -> {error, invalid}; - [?REPDB] -> - {db_only, filename:join(BeforeRDb ++ [?REPDB])}; - [?REPDB, DocId] -> - {db_and_doc, filename:join(BeforeRDb ++ [?REPDB]), DocId} + [?REP_DB_NAME] -> + {db_only, filename:join(BeforeRDb ++ [?REP_DB_NAME])}; + [?REP_DB_NAME, DocId] -> + {db_and_doc, filename:join(BeforeRDb ++ [?REP_DB_NAME]), DocId} end. @@ -163,10 +155,13 @@ parse_unquoted_docs_path([_, _ | _] = Unquoted) -> unquoted_scheduler_docs_path_test_() -> [?_assertEqual(Res, parse_unquoted_docs_path(Path)) || {Res, Path} <- [ {{error, invalid}, [<<"a">>,<< "b">>]}, - {{db_only, <<"a/_replicator">>}, [<<"a">>, ?REPDB]}, - {{db_only, <<"a/b/_replicator">>}, [<<"a">>, <<"b">>, ?REPDB]}, - {{db_and_doc, <<"_replicator">>, <<"x">>}, [?REPDB, <<"x">>]}, - {{db_and_doc, <<"a/_replicator">>, <<"x">>}, [<<"a">>, ?REPDB, <<"x">>]}, + {{db_only, <<"a/_replicator">>}, [<<"a">>, ?REP_DB_NAME]}, + {{db_only, <<"a/b/_replicator">>}, [<<"a">>, <<"b">>, + ?REP_DB_NAME]}, + {{db_and_doc, <<"_replicator">>, <<"x">>}, + [?REP_DB_NAME, <<"x">>]}, + {{db_and_doc, <<"a/_replicator">>, <<"x">>}, [<<"a">>, + ?REP_DB_NAME, <<"x">>]}, {{error, invalid}, [<<"a/_replicator">>,<<"x">>]} ]]. diff --git a/src/couch_replicator/src/couch_replicator_httpd_util.erl b/src/couch_replicator/src/couch_replicator_httpd_util.erl deleted file mode 100644 index 624eddd2f..000000000 --- a/src/couch_replicator/src/couch_replicator_httpd_util.erl +++ /dev/null @@ -1,201 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_httpd_util). - --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_mrview/include/couch_mrview.hrl"). - --export([ - validate_rep_props/1, - parse_int_param/5, - parse_replication_state_filter/1, - update_db_name/1, - docs_acc_new/3, - docs_acc_response/1, - docs_cb/2 -]). - --import(couch_httpd, [ - send_json/2, - send_json/3, - send_method_not_allowed/2 -]). - --import(couch_util, [ - to_binary/1 -]). - - -parse_replication_state_filter(undefined) -> - []; % This is the default (wildcard) filter -parse_replication_state_filter(States) when is_list(States) -> - AllStates = couch_replicator:replication_states(), - StrStates = [string:to_lower(S) || S <- string:tokens(States, ",")], - AtomStates = try - [list_to_existing_atom(S) || S <- StrStates] - catch error:badarg -> - Msg1 = io_lib:format("States must be one or more of ~w", [AllStates]), - throw({query_parse_error, ?l2b(Msg1)}) - end, - AllSet = sets:from_list(AllStates), - StatesSet = sets:from_list(AtomStates), - Diff = sets:to_list(sets:subtract(StatesSet, AllSet)), - case Diff of - [] -> - AtomStates; - _ -> - Args = [Diff, AllStates], - Msg2 = io_lib:format("Unknown states ~w. Choose from: ~w", Args), - throw({query_parse_error, ?l2b(Msg2)}) - end. - - -parse_int_param(Req, Param, Default, Min, Max) -> - IntVal = try - list_to_integer(chttpd:qs_value(Req, Param, integer_to_list(Default))) - catch error:badarg -> - Msg1 = io_lib:format("~s must be an integer", [Param]), - throw({query_parse_error, ?l2b(Msg1)}) - end, - case IntVal >= Min andalso IntVal =< Max of - true -> - IntVal; - false -> - Msg2 = io_lib:format("~s not in range of [~w,~w]", [Param, Min, Max]), - throw({query_parse_error, ?l2b(Msg2)}) - end. - - -validate_rep_props([]) -> - ok; -validate_rep_props([{<<"query_params">>, {Params}}|Rest]) -> - lists:foreach(fun - ({_,V}) when is_binary(V) -> ok; - ({K,_}) -> throw({bad_request, - <<K/binary," value must be a string.">>}) - end, Params), - validate_rep_props(Rest); -validate_rep_props([_|Rest]) -> - validate_rep_props(Rest). - - -prepend_val(#vacc{prepend=Prepend}) -> - case Prepend of - undefined -> - ""; - _ -> - Prepend - end. - - -maybe_flush_response(#vacc{bufsize=Size, threshold=Max} = Acc, Data, Len) - when Size > 0 andalso (Size + Len) > Max -> - #vacc{buffer = Buffer, resp = Resp} = Acc, - {ok, R1} = chttpd:send_delayed_chunk(Resp, Buffer), - {ok, Acc#vacc{prepend = ",\r\n", buffer = Data, bufsize = Len, resp = R1}}; -maybe_flush_response(Acc0, Data, Len) -> - #vacc{buffer = Buf, bufsize = Size} = Acc0, - Acc = Acc0#vacc{ - prepend = ",\r\n", - buffer = [Buf | Data], - bufsize = Size + Len - }, - {ok, Acc}. - -docs_acc_new(Req, Db, Threshold) -> - #vacc{db=Db, req=Req, threshold=Threshold}. - -docs_acc_response(#vacc{resp = Resp}) -> - Resp. - -docs_cb({error, Reason}, #vacc{resp=undefined}=Acc) -> - {ok, Resp} = chttpd:send_error(Acc#vacc.req, Reason), - {ok, Acc#vacc{resp=Resp}}; - -docs_cb(complete, #vacc{resp=undefined}=Acc) -> - % Nothing in view - {ok, Resp} = chttpd:send_json(Acc#vacc.req, 200, {[{rows, []}]}), - {ok, Acc#vacc{resp=Resp}}; - -docs_cb(Msg, #vacc{resp=undefined}=Acc) -> - %% Start response - Headers = [], - {ok, Resp} = chttpd:start_delayed_json_response(Acc#vacc.req, 200, Headers), - docs_cb(Msg, Acc#vacc{resp=Resp, should_close=true}); - -docs_cb({error, Reason}, #vacc{resp=Resp}=Acc) -> - {ok, Resp1} = chttpd:send_delayed_error(Resp, Reason), - {ok, Acc#vacc{resp=Resp1}}; - -docs_cb(complete, #vacc{resp=Resp, buffer=Buf, threshold=Max}=Acc) -> - % Finish view output and possibly end the response - {ok, Resp1} = chttpd:close_delayed_json_object(Resp, Buf, "\r\n]}", Max), - case Acc#vacc.should_close of - true -> - {ok, Resp2} = chttpd:end_delayed_json_response(Resp1), - {ok, Acc#vacc{resp=Resp2}}; - _ -> - {ok, Acc#vacc{resp=Resp1, meta_sent=false, row_sent=false, - prepend=",\r\n", buffer=[], bufsize=0}} - end; - -docs_cb({meta, Meta}, #vacc{meta_sent=false, row_sent=false}=Acc) -> - % Sending metadata as we've not sent it or any row yet - Parts = case couch_util:get_value(total, Meta) of - undefined -> []; - Total -> [io_lib:format("\"total_rows\":~p", [adjust_total(Total)])] - end ++ case couch_util:get_value(offset, Meta) of - undefined -> []; - Offset -> [io_lib:format("\"offset\":~p", [Offset])] - end ++ ["\"docs\":["], - Chunk = [prepend_val(Acc), "{", string:join(Parts, ","), "\r\n"], - {ok, AccOut} = maybe_flush_response(Acc, Chunk, iolist_size(Chunk)), - {ok, AccOut#vacc{prepend="", meta_sent=true}}; - - -docs_cb({meta, _Meta}, #vacc{}=Acc) -> - %% ignore metadata - {ok, Acc}; - -docs_cb({row, Row}, #vacc{meta_sent=false}=Acc) -> - %% sorted=false and row arrived before meta - % Adding another row - Chunk = [prepend_val(Acc), "{\"docs\":[\r\n", row_to_json(Row)], - maybe_flush_response(Acc#vacc{meta_sent=true, row_sent=true}, Chunk, iolist_size(Chunk)); - -docs_cb({row, Row}, #vacc{meta_sent=true}=Acc) -> - % Adding another row - Chunk = [prepend_val(Acc), row_to_json(Row)], - maybe_flush_response(Acc#vacc{row_sent=true}, Chunk, iolist_size(Chunk)). - - -update_db_name({Props}) -> - {value, {database, DbName}, Props1} = lists:keytake(database, 1, Props), - {[{database, normalize_db_name(DbName)} | Props1]}. - -normalize_db_name(<<"shards/", _/binary>> = DbName) -> - mem3:dbname(DbName); -normalize_db_name(DbName) -> - DbName. - -row_to_json(Row) -> - Doc0 = couch_util:get_value(doc, Row), - Doc1 = update_db_name(Doc0), - ?JSON_ENCODE(Doc1). - - -%% Adjust Total as there is an automatically created validation design doc -adjust_total(Total) when is_integer(Total), Total > 0 -> - Total - 1; -adjust_total(Total) when is_integer(Total) -> - 0. diff --git a/src/couch_replicator/src/couch_replicator_ids.erl b/src/couch_replicator/src/couch_replicator_ids.erl index 04e71c3ef..d1cbe571c 100644 --- a/src/couch_replicator/src/couch_replicator_ids.erl +++ b/src/couch_replicator/src/couch_replicator_ids.erl @@ -14,7 +14,9 @@ -export([ replication_id/1, - replication_id/2, + base_id/2, + job_id/3, + job_id/2, convert/1 ]). @@ -30,28 +32,31 @@ % {filter_fetch_error, Error} exception. % -replication_id(#rep{options = Options} = Rep) -> - BaseId = replication_id(Rep, ?REP_ID_VERSION), - {BaseId, maybe_append_options([continuous, create_target], Options)}. +replication_id(#{?OPTIONS := Options} = Rep) -> + BaseId = base_id(Rep, ?REP_ID_VERSION), + UseOpts = [<<"continuous">>, <<"create_target">>], + ExtId = maybe_append_options(UseOpts, Options), + RepId = iolist_to_binary([BaseId, ExtId]), + {RepId, BaseId}. % Versioned clauses for generating replication IDs. % If a change is made to how replications are identified, % please add a new clause and increase ?REP_ID_VERSION. -replication_id(#rep{} = Rep, 4) -> +base_id(#{?SOURCE := Src, ?TARGET := Tgt} = Rep, 4) -> UUID = couch_server:get_uuid(), - SrcInfo = get_v4_endpoint(Rep#rep.source), - TgtInfo = get_v4_endpoint(Rep#rep.target), + SrcInfo = get_v4_endpoint(Src), + TgtInfo = get_v4_endpoint(Tgt), maybe_append_filters([UUID, SrcInfo, TgtInfo], Rep); -replication_id(#rep{} = Rep, 3) -> +base_id(#{?SOURCE := Src0, ?TARGET := Tgt0} = Rep, 3) -> UUID = couch_server:get_uuid(), - Src = get_rep_endpoint(Rep#rep.source), - Tgt = get_rep_endpoint(Rep#rep.target), + Src = get_rep_endpoint(Src0), + Tgt = get_rep_endpoint(Tgt0), maybe_append_filters([UUID, Src, Tgt], Rep); -replication_id(#rep{} = Rep, 2) -> +base_id(#{?SOURCE := Src0, ?TARGET := Tgt0} = Rep, 2) -> {ok, HostName} = inet:gethostname(), Port = case (catch mochiweb_socket_server:get(couch_httpd, port)) of P when is_number(P) -> @@ -64,47 +69,76 @@ replication_id(#rep{} = Rep, 2) -> % ... mochiweb_socket_server:get(https, port) list_to_integer(config:get("httpd", "port", "5984")) end, - Src = get_rep_endpoint(Rep#rep.source), - Tgt = get_rep_endpoint(Rep#rep.target), + Src = get_rep_endpoint(Src0), + Tgt = get_rep_endpoint(Tgt0), maybe_append_filters([HostName, Port, Src, Tgt], Rep); -replication_id(#rep{} = Rep, 1) -> +base_id(#{?SOURCE := Src0, ?TARGET := Tgt0} = Rep, 1) -> {ok, HostName} = inet:gethostname(), - Src = get_rep_endpoint(Rep#rep.source), - Tgt = get_rep_endpoint(Rep#rep.target), + Src = get_rep_endpoint(Src0), + Tgt = get_rep_endpoint(Tgt0), maybe_append_filters([HostName, Src, Tgt], Rep). --spec convert([_] | binary() | {string(), string()}) -> {string(), string()}. -convert(Id) when is_list(Id) -> - convert(?l2b(Id)); +-spec job_id(#{}, binary() | null, binary() | null) -> binary(). +job_id(#{} = Rep, null = _DbUUID, null = _DocId) -> + #{ + ?SOURCE := Src, + ?TARGET := Tgt, + ?REP_USER := UserName, + ?OPTIONS := Options + } = Rep, + UUID = couch_server:get_uuid(), + SrcInfo = get_v4_endpoint(Src), + TgtInfo = get_v4_endpoint(Tgt), + UseOpts = [<<"continuous">>, <<"create_target">>], + Opts = maybe_append_options(UseOpts, Options), + IdParts = [UUID, SrcInfo, TgtInfo, UserName, Opts], + maybe_append_filters(IdParts, Rep, false); + +job_id(#{} = _Rep, DbUUID, DocId) when is_binary(DbUUID), is_binary(DocId) -> + job_id(DbUUID, DocId). + + +-spec job_id(binary(), binary()) -> binary(). +job_id(DbUUID, DocId) when is_binary(DbUUID), is_binary(DocId) -> + <<DbUUID/binary, "|", DocId/binary>>. + + +-spec convert(binary()) -> binary(). convert(Id0) when is_binary(Id0) -> % Spaces can result from mochiweb incorrectly unquoting + characters from % the URL path. So undo the incorrect parsing here to avoid forcing % users to url encode + characters. - Id = binary:replace(Id0, <<" ">>, <<"+">>, [global]), - lists:splitwith(fun(Char) -> Char =/= $+ end, ?b2l(Id)); -convert({BaseId, Ext} = Id) when is_list(BaseId), is_list(Ext) -> - Id. + binary:replace(Id0, <<" ">>, <<"+">>, [global]). % Private functions -maybe_append_filters(Base, - #rep{source = Source, options = Options}) -> +maybe_append_filters(Base, #{} = Rep) -> + maybe_append_filters(Base, Rep, true). + + +maybe_append_filters(Base, #{} = Rep, FetchFilter) -> + #{ + ?SOURCE := Source, + ?OPTIONS := Options + } = Rep, Base2 = Base ++ case couch_replicator_filters:parse(Options) of {ok, nil} -> []; {ok, {view, Filter, QueryParams}} -> [Filter, QueryParams]; - {ok, {user, {Doc, Filter}, QueryParams}} -> + {ok, {user, {Doc, Filter}, QueryParams}} when FetchFilter =:= true -> case couch_replicator_filters:fetch(Doc, Filter, Source) of {ok, Code} -> [Code, QueryParams]; {error, Error} -> throw({filter_fetch_error, Error}) end; + {ok, {user, {Doc, Filter}, QueryParams}} when FetchFilter =:= false -> + [Doc, Filter, QueryParams]; {ok, {docids, DocIds}} -> [DocIds]; {ok, {mango, Selector}} -> @@ -112,27 +146,33 @@ maybe_append_filters(Base, {error, FilterParseError} -> throw({error, FilterParseError}) end, - couch_util:to_hex(couch_hash:md5_hash(term_to_binary(Base2))). + Res = couch_util:to_hex(couch_hash:md5_hash(term_to_binary(Base2))), + list_to_binary(Res). -maybe_append_options(Options, RepOptions) -> +maybe_append_options(Options, #{} = RepOptions) -> lists:foldl(fun(Option, Acc) -> Acc ++ - case couch_util:get_value(Option, RepOptions, false) of - true -> - "+" ++ atom_to_list(Option); - false -> - "" + case maps:get(Option, RepOptions, false) of + true -> "+" ++ binary_to_list(Option); + false -> "" end end, [], Options). -get_rep_endpoint(#httpdb{url=Url, headers=Headers}) -> +get_rep_endpoint(#{<<"url">> := Url0, <<"headers">> := Headers0}) -> + % We turn everything to lists and proplists to calculate the same + % replication ID as CouchDB <= 3.x + Url = binary_to_list(Url0), + Headers1 = maps:fold(fun(K, V, Acc) -> + [{binary_to_list(K), binary_to_list(V)} | Acc] + end, [], Headers0), + Headers2 = lists:keysort(1, Headers1), DefaultHeaders = (#httpdb{})#httpdb.headers, - {remote, Url, Headers -- DefaultHeaders}. + {remote, Url, Headers2 -- DefaultHeaders}. -get_v4_endpoint(#httpdb{} = HttpDb) -> +get_v4_endpoint(#{} = HttpDb) -> {remote, Url, Headers} = get_rep_endpoint(HttpDb), {{UserFromHeaders, _}, HeadersWithoutBasicAuth} = couch_replicator_utils:remove_basic_auth_from_headers(Headers), @@ -184,92 +224,132 @@ get_non_default_port(_Schema, Port) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). replication_id_convert_test_() -> [?_assertEqual(Expected, convert(Id)) || {Expected, Id} <- [ - {{"abc", ""}, "abc"}, - {{"abc", ""}, <<"abc">>}, - {{"abc", "+x+y"}, <<"abc+x+y">>}, - {{"abc", "+x+y"}, {"abc", "+x+y"}}, - {{"abc", "+x+y"}, <<"abc x y">>} + {<<"abc">>, <<"abc">>}, + {<<"abc+x">>, <<"abc+x">>}, + {<<"abc+x">>, <<"abc x">>}, + {<<"abc+x+y">>, <<"abc+x+y">>}, + {<<"abc+x+y">>, <<"abc x y">>} ]]. + http_v4_endpoint_test_() -> [?_assertMatch({remote, User, Host, Port, Path, HeadersNoAuth, undefined}, - get_v4_endpoint(#httpdb{url = Url, headers = Headers})) || + get_v4_endpoint(#{<<"url">> => Url, <<"headers">> => Headers})) || {{User, Host, Port, Path, HeadersNoAuth}, {Url, Headers}} <- [ { {undefined, "host", default, "/", []}, - {"http://host", []} + {<<"http://host">>, #{}} }, { {undefined, "host", default, "/", []}, - {"https://host", []} + {<<"https://host">>, #{}} }, { {undefined, "host", default, "/", []}, - {"http://host:5984", []} + {<<"http://host:5984">>, #{}} }, { {undefined, "host", 1, "/", []}, - {"http://host:1", []} + {<<"http://host:1">>, #{}} }, { {undefined, "host", 2, "/", []}, - {"https://host:2", []} + {<<"https://host:2">>, #{}} }, { - {undefined, "host", default, "/", [{"h","v"}]}, - {"http://host", [{"h","v"}]} + {undefined, "host", default, "/", [{"h", "v"}]}, + {<<"http://host">>, #{<<"h">> => <<"v">>}} }, { {undefined, "host", default, "/a/b", []}, - {"http://host/a/b", []} + {<<"http://host/a/b">>, #{}} }, { {"user", "host", default, "/", []}, - {"http://user:pass@host", []} + {<<"http://user:pass@host">>, #{}} }, { {"user", "host", 3, "/", []}, - {"http://user:pass@host:3", []} + {<<"http://user:pass@host:3">>, #{}} }, { {"user", "host", default, "/", []}, - {"http://user:newpass@host", []} + {<<"http://user:newpass@host">>, #{}} }, { {"user", "host", default, "/", []}, - {"http://host", [basic_auth("user","pass")]} + {<<"http://host">>, basic_auth(<<"user">>, <<"pass">>)} }, { {"user", "host", default, "/", []}, - {"http://host", [basic_auth("user","newpass")]} + {<<"http://host">>, basic_auth(<<"user">>, <<"newpass">>)} }, { {"user1", "host", default, "/", []}, - {"http://user1:pass1@host", [basic_auth("user2","pass2")]} + {<<"http://user1:pass1@host">>, basic_auth(<<"user2">>, + <<"pass2">>)} }, { {"user", "host", default, "/", [{"h", "v"}]}, - {"http://host", [{"h", "v"}, basic_auth("user","pass")]} + {<<"http://host">>, maps:merge(#{<<"h">> => <<"v">>}, + basic_auth(<<"user">>, <<"pass">>))} }, { {undefined, "random_junk", undefined, undefined}, - {"random_junk", []} + {<<"random_junk">>, #{}} }, { {undefined, "host", default, "/", []}, - {"http://host", [{"Authorization", "Basic bad"}]} + {<<"http://host">>, #{<<"Authorization">> => + <<"Basic bad">>}} } ] ]. basic_auth(User, Pass) -> - B64Auth = base64:encode_to_string(User ++ ":" ++ Pass), - {"Authorization", "Basic " ++ B64Auth}. + B64Auth = base64:encode(<<User/binary, ":", Pass/binary>>), + #{<<"Authorization">> => <<"Basic ", B64Auth/binary>>}. + + +version4_matches_couchdb3_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(id_matches_couchdb3) + ] + }. + + +setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end). + + +teardown(_) -> + meck:unload(). + + +id_matches_couchdb3(_) -> + {ok, Rep} = couch_replicator_parse:parse_rep(#{ + <<"source">> => <<"http://adm:pass@127.0.0.1/abc">>, + <<"target">> => <<"http://adm:pass@127.0.0.1/xyz">>, + <<"create_target">> => true, + <<"continuous">> => true + }, null), + meck:expect(couch_server, get_uuid, 0, "somefixedid"), + {RepId, BaseId} = replication_id(Rep), + % Calculated on CouchDB 3.x + RepId3x = <<"ff71e1208f93ba054eb60e7ca8683fe4+continuous+create_target">>, + BaseId3x = <<"ff71e1208f93ba054eb60e7ca8683fe4">>, + ?assertEqual(RepId3x, RepId), + ?assertEqual(BaseId3x, BaseId). -endif. diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl new file mode 100644 index 000000000..c8c143a58 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_job.erl @@ -0,0 +1,1609 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_job). + + +-behaviour(gen_server). + + +-export([ + start_link/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + format_status/2, + code_change/3 +]). + +-export([ + accept/0, + health_threshold/0 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). +-include("couch_replicator.hrl"). + + +-define(LOWEST_SEQ, 0). +-define(DEFAULT_CHECKPOINT_INTERVAL, 30000). +-define(STARTUP_JITTER_DEFAULT, 5000). +-define(DEFAULT_MIN_BACKOFF_PENALTY_SEC, 32). +-define(DEFAULT_MAX_BACKOFF_PENALTY_SEC, 2 * 24 * 3600). +-define(DEFAULT_HEALTH_THRESHOLD_SEC, 2 * 60). +-define(DEFAULT_MAX_HISTORY, 10). +-define(DEFAULT_STATS_UPDATE_INTERVAL_SEC, 10). + + +-record(rep_state, { + job, + job_data, + id, + base_id, + doc_id, + db_name, + db_uuid, + source_name, + target_name, + source, + target, + history, + checkpoint_history, + start_seq, + committed_seq, + current_through_seq, + seqs_in_progress = [], + highest_seq_done = {0, ?LOWEST_SEQ}, + source_log, + target_log, + rep_starttime, + src_starttime, + tgt_starttime, + checkpoint_timer, + stats_timer, + changes_queue, + changes_manager, + changes_reader, + workers, + stats = couch_replicator_stats:new(), + session_id, + source_seq = nil, + use_checkpoints = true, + checkpoint_interval = ?DEFAULT_CHECKPOINT_INTERVAL, + user = null, + options = #{} +}). + + +start_link() -> + gen_server:start_link(?MODULE, [], []). + + +init(_) -> + process_flag(trap_exit, true), + {ok, delayed_init, 0}. + + +terminate(normal, #rep_state{} = State) -> + #rep_state{ + job = Job, + job_data = JobData, + checkpoint_history = History + } = State, + ok = complete_job(undefined, Job, JobData, History), + close_endpoints(State); + +terminate(shutdown, #rep_state{} = State0) -> + % Replication stopped by the job server + State1 = cancel_timers(State0), + State3 = case do_checkpoint(State1) of + {ok, State2} -> + State2; + Error -> + Msg = "~p : Failed last checkpoint. Job: ~p Error: ~p", + couch_log:error(Msg, [?MODULE, State1#rep_state.id, Error]), + State1 + end, + #rep_state{job = Job, job_data = JobData} = State3, + ok = reschedule(undefined, Job, JobData), + ok = close_endpoints(State3); + +terminate({shutdown, Error}, {init_error, Stack}) -> + % Termination in init, before the job had initialized + case Error of + max_backoff -> couch_log:warning("~p job backed off", [?MODULE]); + finished -> couch_log:notice("~p job finished in init", [?MODULE]); + _ -> couch_log:error("~p job failed ~p ~p", [?MODULE, Error, Stack]) + end, + ok; + +terminate({shutdown, finished}, #rep_state{} = State) -> + % Job state was already updated and job is marked as finished + ok = close_endpoints(State); + +terminate({shutdown, halt}, #rep_state{} = State) -> + % Job is re-enqueued and possibly already running somewhere else + couch_log:error("~p job ~p halted", [?MODULE, State#rep_state.id]), + ok = close_endpoints(State); + +terminate(Reason0, #rep_state{} = State0) -> + State = update_job_state(State0), + Reason = case Reason0 of + {shutdown, Err} -> Err; + _ -> Reason0 + end, + #rep_state{ + id = RepId, + job = Job, + job_data = JobData, + source_name = Source, + target_name = Target + } = State, + couch_log:error("Replication `~s` (`~s` -> `~s`) failed: ~p", + [RepId, Source, Target, Reason]), + ok = reschedule_on_error(undefined, Job, JobData, Reason), + ok = close_endpoints(State). + + +handle_call({add_stats, Stats}, From, State) -> + gen_server:reply(From, ok), + NewStats = couch_replicator_stats:sum_stats(State#rep_state.stats, Stats), + {noreply, State#rep_state{stats = NewStats}}; + +handle_call({report_seq_done, Seq, StatsInc}, From, #rep_state{} = State) -> + #rep_state{ + seqs_in_progress = SeqsInProgress, + highest_seq_done = HighestDone, + current_through_seq = ThroughSeq, + stats = Stats + } = State, + gen_server:reply(From, ok), + {NewThroughSeq0, NewSeqsInProgress} = case SeqsInProgress of + [] -> + {Seq, []}; + [Seq | Rest] -> + {Seq, Rest}; + [_ | _] -> + {ThroughSeq, ordsets:del_element(Seq, SeqsInProgress)} + end, + NewHighestDone = lists:max([HighestDone, Seq]), + NewThroughSeq = case NewSeqsInProgress of + [] -> + lists:max([NewThroughSeq0, NewHighestDone]); + _ -> + NewThroughSeq0 + end, + couch_log:debug("Worker reported seq ~p, through seq was ~p, " + "new through seq is ~p, highest seq done was ~p, " + "new highest seq done is ~p~n" + "Seqs in progress were: ~p~nSeqs in progress are now: ~p", + [Seq, ThroughSeq, NewThroughSeq, HighestDone, + NewHighestDone, SeqsInProgress, NewSeqsInProgress]), + NewState = State#rep_state{ + stats = couch_replicator_stats:sum_stats(Stats, StatsInc), + current_through_seq = NewThroughSeq, + seqs_in_progress = NewSeqsInProgress, + highest_seq_done = NewHighestDone + }, + {noreply, maybe_update_job_state(NewState)}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast({report_seq, Seq}, + #rep_state{seqs_in_progress = SeqsInProgress} = State) -> + NewSeqsInProgress = ordsets:add_element(Seq, SeqsInProgress), + {noreply, State#rep_state{seqs_in_progress = NewSeqsInProgress}}; + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(timeout, delayed_init) -> + try delayed_init() of + {ok, State} -> {noreply, State}; + {stop, Reason, State} -> {stop, Reason, State} + catch + exit:{shutdown, Exit} when Exit =:= finished orelse Exit =:= halt -> + Stack = erlang:get_stacktrace(), + {stop, {shutdown, Exit}, {init_error, Stack}}; + _Tag:Error -> + ShutdownReason = {error, replication_start_error(Error)}, + Stack = erlang:get_stacktrace(), + {stop, {shutdown, ShutdownReason}, {init_error, Stack}} + end; + +handle_info(stats_update, #rep_state{} = State) -> + State1 = cancel_stats_timer(State), + State2 = update_job_state(State1), + {noreply, State2}; + +handle_info(checkpoint, State0) -> + State = cancel_checkpoint_timer(State0), + ok = check_user_filter(State), + case do_checkpoint(State) of + {ok, State1} -> + couch_stats:increment_counter([couch_replicator, checkpoints, + success]), + {noreply, start_checkpoint_timer(State1)}; + Error -> + couch_stats:increment_counter([couch_replicator, checkpoints, + failure]), + {stop, Error, State} + end; + +handle_info(shutdown, St) -> + {stop, shutdown, St}; + +handle_info({'EXIT', Pid, max_backoff}, State) -> + couch_log:error("Max backoff reached child process ~p", [Pid]), + {stop, {shutdown, max_backoff}, State}; + +handle_info({'EXIT', Pid, {shutdown, max_backoff}}, State) -> + couch_log:error("Max backoff reached child process ~p", [Pid]), + {stop, {shutdown, max_backoff}, State}; + +handle_info({'EXIT', Pid, normal}, #rep_state{changes_reader=Pid} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, Reason0}, #rep_state{changes_reader=Pid} = State) -> + couch_stats:increment_counter([couch_replicator, changes_reader_deaths]), + Reason = case Reason0 of + {changes_req_failed, _, _} = HttpFail -> + HttpFail; + {http_request_failed, _, _, {error, {code, Code}}} -> + {changes_req_failed, Code}; + {http_request_failed, _, _, {error, Err}} -> + {changes_req_failed, Err}; + Other -> + {changes_reader_died, Other} + end, + couch_log:error("ChangesReader process died with reason: ~p", [Reason]), + {stop, {shutdown, Reason}, cancel_timers(State)}; + +handle_info({'EXIT', Pid, normal}, #rep_state{changes_manager=Pid} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, Reason}, #rep_state{changes_manager=Pid} = State) -> + couch_stats:increment_counter([couch_replicator, changes_manager_deaths]), + couch_log:error("ChangesManager process died with reason: ~p", [Reason]), + {stop, {shutdown, {changes_manager_died, Reason}}, cancel_timers(State)}; + +handle_info({'EXIT', Pid, normal}, #rep_state{changes_queue=Pid} = State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) -> + couch_stats:increment_counter([couch_replicator, changes_queue_deaths]), + couch_log:error("ChangesQueue process died with reason: ~p", [Reason]), + {stop, {shutdown, {changes_queue_died, Reason}}, cancel_timers(State)}; + +handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) -> + case Workers -- [Pid] of + Workers -> + %% Processes might be linked by replicator's auth plugins so + %% we tolerate them exiting `normal` here and don't crash + LogMsg = "~p: unknown pid exited `normal` ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), + {noreply, State#rep_state{workers = Workers}}; + [] -> + catch unlink(State#rep_state.changes_manager), + catch exit(State#rep_state.changes_manager, kill), + do_last_checkpoint(State); + Workers2 -> + {noreply, State#rep_state{workers = Workers2}} + end; + +handle_info({'EXIT', Pid, Reason}, #rep_state{workers = Workers} = State) -> + State2 = cancel_timers(State), + case lists:member(Pid, Workers) of + false -> + {stop, {unknown_process_died, Pid, Reason}, State2}; + true -> + couch_stats:increment_counter([couch_replicator, worker_deaths]), + StopReason = case Reason of + {shutdown, _} = Err -> + Err; + Other -> + ErrLog = "Worker ~p died with reason: ~p", + couch_log:error(ErrLog, [Pid, Reason]), + {worker_died, Pid, Other} + end, + {stop, StopReason, State2} + end; + +handle_info({Ref, ready}, St) when is_reference(Ref) -> + LogMsg = "~p : spurious erlfdb future ready message ~p", + couch_log:notice(LogMsg, [?MODULE, Ref]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +format_status(_Opt, [_PDict, State]) -> + #rep_state{ + id = Id, + source = Source, + target = Target, + start_seq = StartSeq, + source_seq = SourceSeq, + committed_seq = CommitedSeq, + current_through_seq = ThroughSeq, + highest_seq_done = HighestSeqDone, + session_id = SessionId, + doc_id = DocId, + db_name = DbName, + options = Options + } = state_strip_creds(State), + [ + {rep_id, Id}, + {source, couch_replicator_api_wrap:db_uri(Source)}, + {target, couch_replicator_api_wrap:db_uri(Target)}, + {db_name, DbName}, + {doc_id, DocId}, + {options, Options}, + {session_id, SessionId}, + {start_seq, StartSeq}, + {source_seq, SourceSeq}, + {committed_seq, CommitedSeq}, + {current_through_seq, ThroughSeq}, + {highest_seq_done, HighestSeqDone} + ]. + + +code_change(_OldVsn, #rep_state{}=State, _Extra) -> + {ok, State}. + + +accept() -> + couch_stats:increment_counter([couch_replicator, jobs, accepts]), + Now = erlang:system_time(second), + case couch_replicator_jobs:accept_job(Now + 5) of + {ok, Job, #{?REP := Rep} = JobData} -> + Normal = case Rep of + #{?OPTIONS := #{} = Options} -> + not maps:get(<<"continuous">>, Options, false); + _ -> + true + end, + couch_replicator_job_server:accepted(self(), Normal), + {ok, Job, JobData}; + {error, not_found} -> + timer:sleep(accept_jitter_msec()), + ?MODULE:accept() + end. + + +% Health threshold is the minimum amount of time an unhealthy job should run +% crashing before it is considered to be healthy again. HealtThreashold should +% not be 0 as jobs could start and immediately crash, and it shouldn't be +% infinity, since then consecutive crashes would accumulate forever even if +% job is back to normal. +health_threshold() -> + config:get_integer("replicator", "health_threshold_sec", + ?DEFAULT_HEALTH_THRESHOLD_SEC). + + +delayed_init() -> + {ok, Job, JobData} = accept(), + try do_init(Job, JobData) of + State = #rep_state{} -> {ok, State} + catch + exit:{http_request_failed, _, _, max_backoff} -> + Stack = erlang:get_stacktrace(), + reschedule_on_error(undefined, Job, JobData, max_backoff), + {stop, {shutdown, max_backoff}, {init_error, Stack}}; + exit:{shutdown, Exit} when Exit =:= finished orelse Exit =:= halt -> + Stack = erlang:get_stacktrace(), + {stop, {shutdown, Exit}, {init_error, Stack}}; + _Tag:Error -> + Reason = {error, replication_start_error(Error)}, + Stack = erlang:get_stacktrace(), + ErrMsg = "~p : job ~p failed during startup ~p stack:~p", + couch_log:error(ErrMsg, [?MODULE, Job, Reason, Stack]), + reschedule_on_error(undefined, Job, JobData, Reason), + {stop, {shutdown, Reason}, {init_error, Stack}} + end. + + +do_init(Job, #{} = JobData) -> + couch_stats:increment_counter([couch_replicator, jobs, starts]), + % This may make a network request, then may fail and reschedule the job + {RepId, BaseId} = get_rep_id(undefined, Job, JobData), + #{ + ?DB_NAME := DbName, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + + ok = couch_replicator_docs:remove_state_fields(DbName, DbUUID, DocId), + + % Finish if job is in a failed state already + case JobData of + #{?STATE := ?ST_FAILED, ?STATE_INFO := Error} -> + ok = fail_job(undefined, Job, JobData, Error), + exit({shutdown, finished}); + #{?STATE := St} when is_binary(St), St =/= ?ST_FAILED -> + ok + end, + + JobsTx = couch_jobs_fdb:get_jtx(), + {Job1, JobData1, Owner} = couch_jobs_fdb:tx(JobsTx, fun(JTx) -> + init_job_data(JTx, Job, JobData, RepId, BaseId) + end), + + % Handle ownership decision here to be outside of the transaction + case Owner of + owner -> ok; + not_owner -> exit({shutdown, finished}) + end, + + #rep_state{ + source = Source, + target = Target, + start_seq = {_Ts, StartSeq}, + options = Options, + doc_id = DocId, + db_name = DbName + } = State = init_state(Job1, JobData1), + + NumWorkers = maps:get(<<"worker_processes">>, Options), + BatchSize = maps:get(<<"worker_batch_size">>, Options), + {ok, ChangesQueue} = couch_work_queue:new([ + {max_items, BatchSize * NumWorkers * 2}, + {max_size, 100 * 1024 * NumWorkers} + ]), + + % This starts the _changes reader process. It adds the changes from the + % source db to the ChangesQueue. + {ok, ChangesReader} = couch_replicator_changes_reader:start_link( + StartSeq, Source, ChangesQueue, Options + ), + + % Changes manager - responsible for dequeing batches from the changes queue + % and deliver them to the worker processes. + ChangesManager = spawn_changes_manager(self(), ChangesQueue, BatchSize), + + % This starts the worker processes. They ask the changes queue manager for + % a a batch of _changes rows to process -> check which revs are missing in + % the target, and for the missing ones, it copies them from the source to + % the target. + MaxConns = maps:get(<<"http_connections">>, Options), + Workers = lists:map(fun(_) -> + couch_stats:increment_counter([couch_replicator, workers_started]), + {ok, Pid} = couch_replicator_worker:start_link(self(), Source, Target, + ChangesManager, MaxConns), + Pid + end, lists:seq(1, NumWorkers)), + + log_replication_start(State), + + State1 = State#rep_state{ + changes_queue = ChangesQueue, + changes_manager = ChangesManager, + changes_reader = ChangesReader, + workers = Workers + }, + + update_job_state(State1). + + +init_job_data(#{jtx := true} = JTx, Job, #{} = JobData, RepId, BaseId) -> + #{ + ?REP := Rep, + ?REP_ID := OldRepId, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + JobId = couch_replicator_ids:job_id(Rep, DbUUID, DocId), + Now = erlang:system_time(second), + JobData1 = JobData#{ + ?REP_ID := RepId, + ?BASE_ID := BaseId, + ?STATE := ?ST_RUNNING, + ?STATE_INFO := null, + ?LAST_START := Now, + ?REP_NODE := erlang:atom_to_binary(node(), utf8), + ?REP_PID := list_to_binary(pid_to_list(self())), + ?LAST_UPDATED := Now + }, + JobData2 = case is_binary(OldRepId) andalso OldRepId =/= RepId of + true -> + % Handle Replication ID change + ok = couch_replicator_jobs:clear_old_rep_id(JTx, JobId, OldRepId), + JobData1#{ + ?REP_STATS := #{}, + ?JOB_HISTORY := [] + }; + false -> + JobData1 + end, + JobData3 = hist_append(?HIST_STARTED, Now, JobData2, undefined), + case check_ownership(JTx, Job, JobData3) of + owner -> + couch_stats:increment_counter([couch_replicator, jobs, starts]), + {Job1, JobData4} = update_job_data(JTx, Job, JobData3), + {Job1, JobData4, owner}; + not_owner -> + {Job, JobData3, not_owner} + end. + + +check_ownership(#{jtx := true} = JTx, Job, JobData) -> + #{ + ?REP_ID := RepId, + ?REP := Rep, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + JobId = couch_replicator_ids:job_id(Rep, DbUUID, DocId), + case couch_replicator_jobs:try_update_rep_id(JTx, JobId, RepId) of + ok -> + owner; + {error, {replication_job_conflict, OtherJobId}} -> + case couch_replicator_jobs:get_job_data(JTx, OtherJobId) of + {ok, #{?STATE := S, ?DB_NAME := null}} when + S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a transient job, not associated with a + % _replicator doc, so we let this job retry. This is also + % partly done for compatibility with pervious replicator + % behavior. + Error = <<"Duplicate job running: ", OtherJobId/binary>>, + reschedule_on_error(JTx, Job, JobData, Error), + not_owner; + {ok, #{?STATE := S, ?DB_NAME := <<_/binary>>}} when + S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a permanent replication job, so this + % job is marked as failed. + Error = <<"Duplicate job running: ", OtherJobId/binary>>, + fail_job(JTx, Job, JobData, Error), + not_owner; + {ok, #{}} -> + LogMsg = "~p : Job ~p usurping job ~p for replication ~p", + couch_log:warning(LogMsg, [?MODULE, JobId, OtherJobId, + RepId]), + couch_replicator_jobs:update_rep_id(JTx, JobId, RepId), + owner; + {error, not_found} -> + LogMsg = "~p : Orphan replication job reference ~p -> ~p", + couch_log:error(LogMsg, [?MODULE, RepId, OtherJobId]), + couch_replicator_jobs:update_rep_id(JTx, JobId, RepId), + owner + end + end. + + +update_job_data(Tx, #rep_state{} = State) -> + #rep_state{job = Job, job_data = JobData} = State, + {Job1, JobData1} = update_job_data(Tx, Job, JobData), + State#rep_state{job = Job1, job_data = JobData1}. + + +update_job_data(Tx, Job, #{} = JobData) -> + case couch_replicator_jobs:update_job_data(Tx, Job, JobData) of + {ok, Job1} -> + {Job1, JobData}; + {error, halt} -> + exit({shutdown, halt}) + end. + + +update_active_task_info(#rep_state{} = State) -> + #rep_state{ + job_data = JobData, + user = User, + id = RepId, + db_name = DbName, + doc_id = DocId, + source_name = Source, + target_name = Target, + options = Options, + highest_seq_done = {_, SourceSeq}, + checkpoint_interval = CheckpointInterval + } = State, + + #{ + ?REP := #{?START_TIME := StartTime}, + ?REP_STATS := Stats, + ?REP_NODE := Node, + ?REP_PID := Pid, + ?LAST_UPDATED := LastUpdated + } = JobData, + + Info = maps:merge(Stats, #{ + <<"type">> => <<"replication">>, + <<"user">> => User, + <<"replication_id">> => RepId, + <<"database">> => DbName, + <<"doc_id">> => DocId, + <<"source">> => ?l2b(Source), + <<"target">> => ?l2b(Target), + <<"continuous">> => maps:get(<<"continuous">>, Options, false), + <<"source_seq">> => SourceSeq, + <<"checkpoint_interval">> => CheckpointInterval, + <<"node">> => Node, + <<"pid">> => Pid, + <<"updated_on">> => LastUpdated, + <<"started_on">> => StartTime + }), + + JobData1 = fabric2_active_tasks:update_active_task_info(JobData, Info), + State#rep_state{job_data = JobData1}. + + +% Transient jobs don't get rescheduled on error with the exception of +% max_backoff errors. +% +reschedule_on_error(JTx, Job, #{?DB_NAME := null} = JobData, Error) when + Error =/= max_backoff -> + fail_job(JTx, Job, JobData, Error); + +reschedule_on_error(JTx, Job, #{} = JobData0, Error0) -> + Error = error_info(Error0), + + Now = erlang:system_time(second), + + JobData = maybe_heal(JobData0, Now), + #{?ERROR_COUNT := ErrorCount} = JobData, + JobData1 = JobData#{ + ?STATE := ?ST_CRASHING, + ?STATE_INFO := Error, + ?ERROR_COUNT := ErrorCount + 1, + ?LAST_ERROR := Error, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_CRASHED, Now, JobData1, Error), + JobData3 = hist_append(?HIST_PENDING, Now, JobData2, undefined), + JobData4 = fabric2_active_tasks:update_active_task_info(JobData3, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, crashes]), + + Time = get_backoff_time(ErrorCount + 1), + case couch_replicator_jobs:reschedule_job(JTx, Job, JobData4, Time) of + ok -> ok; + {error, halt} -> exit({shutdown, halt}) + end. + + +reschedule(JTx, Job, #{} = JobData) -> + Now = erlang:system_time(second), + + JobData1 = JobData#{ + ?STATE := ?ST_PENDING, + ?STATE_INFO := null, + ?LAST_ERROR := null, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_STOPPED, Now, JobData1, undefined), + JobData3 = hist_append(?HIST_PENDING, Now, JobData2, undefined), + JobData4 = fabric2_active_tasks:update_active_task_info(JobData3, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, stops]), + + Time = Now + couch_replicator_job_server:scheduling_interval_sec(), + case couch_replicator_jobs:reschedule_job(JTx, Job, JobData4, Time) of + ok -> ok; + {error, halt} -> exit({shutdown, halt}) + end. + + +fail_job(JTx, Job, #{} = JobData, Error0) -> + Error = error_info(Error0), + + Now = erlang:system_time(second), + + #{ + ?ERROR_COUNT := ErrorCount, + ?DB_NAME := DbName, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId + } = JobData, + + JobData1 = JobData#{ + ?STATE := ?ST_FAILED, + ?STATE_INFO := Error, + ?ERROR_COUNT := ErrorCount + 1, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_CRASHED, Now, JobData1, Error), + JobData3 = fabric2_active_tasks:update_active_task_info(JobData2, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, crashes]), + + case couch_replicator_jobs:finish_job(JTx, Job, JobData3) of + ok -> + couch_replicator_docs:update_failed(DbName, DbUUID, DocId, Error), + ok; + {error, halt} -> + exit({shutdown, halt}) + end. + + +complete_job(JTx, Job, #{} = JobData, CheckpointHistory) -> + #{ + ?DB_NAME := Db, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId, + ?REP_STATS := RepStats, + ?REP := Rep + } = JobData, + + Now = erlang:system_time(second), + + #{?START_TIME := StartTime} = Rep, + JobData1 = JobData#{ + ?STATE := ?ST_COMPLETED, + ?CHECKPOINT_HISTORY := CheckpointHistory, + ?STATE_INFO := RepStats, + ?REP_NODE := null, + ?REP_PID := null + }, + JobData2 = hist_append(?HIST_STOPPED, Now, JobData1, undefined), + JobData3 = fabric2_active_tasks:update_active_task_info(JobData2, #{}), + + couch_stats:increment_counter([couch_replicator, jobs, stops]), + + case couch_replicator_jobs:finish_job(JTx, Job, JobData3) of + ok -> + StartISO8601 = couch_replicator_utils:iso8601(StartTime), + Stats = maps:merge(RepStats, #{<<"start_time">> => StartISO8601}), + couch_replicator_docs:update_completed(Db, DbUUID, DocId, Stats), + ok; + {error, halt} -> + exit({shutdown, halt}) + end. + + +error_info(Error0) -> + case Error0 of + <<_/binary>> -> + Error0; + undefined -> + undefined; + null -> + null; + Atom when is_atom(Atom) -> + atom_to_binary(Atom, utf8); + {shutdown, Atom} when is_atom(Atom) -> + atom_to_binary(Atom, utf8); + {shutdown, Err} -> + couch_replicator_utils:rep_error_to_binary(Err); + {error, Atom} when is_atom(Atom) -> + atom_to_binary(Atom, utf8); + {error, {Err, Reason}} when is_atom(Err) -> + ReasonBin = couch_replicator_utils:rep_error_to_binary(Reason), + #{ + <<"error">> => atom_to_binary(Err, utf8), + <<"reason">> => ReasonBin + }; + _Other -> + couch_replicator_utils:rep_error_to_binary(Error0) + end. + + +get_rep_id(JTx, Job, #{} = JobData) -> + #{?REP := Rep} = JobData, + try + couch_replicator_ids:replication_id(Rep) + catch + throw:{filter_fetch_error, _} = Error -> + reschedule_on_error(JTx, Job, JobData, {error, Error}), + exit({shutdown, finished}) + end. + + +% After job run continuously for some time we consider it "healed" and reset +% its consecutive error count. +maybe_heal(#{} = JobData, Now) -> + #{?LAST_START := LastStart} = JobData, + case Now - LastStart > health_threshold() of + true -> JobData#{?ERROR_COUNT := 0, ?LAST_ERROR := null}; + false -> JobData + end. + + +get_backoff_time(ErrCnt) -> + Max = min(max_backoff_penalty_sec(), 3600 * 24 * 30), + Min = max(min_backoff_penalty_sec(), 2), + + % Calculate the max exponent so exponentiation doesn't blow up + MaxExp = math:log2(Max) - math:log2(Min), + + % This is the recommended backoff amount + Wait = Min * math:pow(2, min(ErrCnt, MaxExp)), + + % Apply a 25% jitter to avoid a thundering herd effect + WaitJittered = Wait * 0.75 + rand:uniform(trunc(Wait * 0.25) + 1), + erlang:system_time(second) + trunc(WaitJittered). + + +headers_strip_creds([], Acc) -> + lists:reverse(Acc); + +headers_strip_creds([{Key, Value0} | Rest], Acc) -> + Value = case string:to_lower(Key) of + "authorization" -> "****"; + _ -> Value0 + end, + headers_strip_creds(Rest, [{Key, Value} | Acc]). + + +httpdb_strip_creds(#httpdb{url = Url, headers = Headers} = HttpDb) -> + HttpDb#httpdb{ + url = couch_util:url_strip_password(Url), + headers = headers_strip_creds(Headers, []) + }; + +httpdb_strip_creds(LocalDb) -> + LocalDb. + + +state_strip_creds(#rep_state{source = Source, target = Target} = State) -> + State#rep_state{ + source = httpdb_strip_creds(Source), + target = httpdb_strip_creds(Target) + }. + + +adjust_maxconn(Src = #{<<"http_connections">> := 1}, RepId) -> + Msg = "Adjusting minimum number of HTTP source connections to 2 for ~p", + couch_log:notice(Msg, [RepId]), + Src#{<<"http_connections">> := 2}; + +adjust_maxconn(Src, _RepId) -> + Src. + + +do_last_checkpoint(#rep_state{seqs_in_progress = [], + highest_seq_done = {_Ts, ?LOWEST_SEQ}} = State) -> + {stop, normal, cancel_timers(State)}; + +do_last_checkpoint(#rep_state{seqs_in_progress = [], + highest_seq_done = Seq} = State) -> + State1 = State#rep_state{current_through_seq = Seq}, + State2 = cancel_timers(State1), + case do_checkpoint(State2) of + {ok, State3} -> + couch_stats:increment_counter([couch_replicator, checkpoints, + success]), + {stop, normal, State3}; + Error -> + couch_stats:increment_counter([couch_replicator, checkpoints, + failure]), + {stop, Error, State2} + end. + + +start_checkpoint_timer(#rep_state{} = State) -> + CheckpointAfterMSec = State#rep_state.checkpoint_interval, + JobTimeoutMSec = couch_replicator_jobs:get_timeout() * 1000, + Wait1 = min(CheckpointAfterMSec, JobTimeoutMSec div 2), + Wait2 = trunc(Wait1 * 0.75) + rand:uniform(trunc(Wait1 * 0.25)), + TRef = erlang:send_after(Wait2, self(), checkpoint), + State#rep_state{checkpoint_timer = TRef}. + + +cancel_checkpoint_timer(#rep_state{checkpoint_timer = nil} = State) -> + State; +cancel_checkpoint_timer(#rep_state{checkpoint_timer = Timer} = State) -> + erlang:cancel_timer(Timer), + State#rep_state{checkpoint_timer = nil}. + + +start_stats_timer(#rep_state{} = State) -> + MSec = stats_update_interval_sec() * 1000, + TRef = erlang:send_after(MSec, self(), stats_update), + State#rep_state{stats_timer = TRef}. + + +cancel_stats_timer(#rep_state{stats_timer = nil} = State) -> + State; +cancel_stats_timer(#rep_state{stats_timer = Timer} = State) -> + erlang:cancel_timer(Timer), + receive stats_update -> ok after 0 -> ok end, + State#rep_state{stats_timer = nil}. + + +cancel_timers(#rep_state{} = State) -> + State1 = cancel_checkpoint_timer(State), + cancel_stats_timer(State1). + + +init_state(#{} = Job, #{} = JobData) -> + #{ + ?REP := Rep, + ?REP_ID := Id, + ?BASE_ID := BaseId, + ?DB_NAME := DbName, + ?DB_UUID := DbUUID, + ?DOC_ID := DocId, + ?LAST_ERROR := LastError + } = JobData, + #{ + ?SOURCE := Src0, + ?TARGET := Tgt, + ?START_TIME := StartTime, + ?OPTIONS := Options0, + ?REP_USER := User + } = Rep, + + % Optimize replication parameters if last time the jobs crashed because it + % was rate limited + Options = optimize_rate_limited_job(Options0, LastError), + + % Adjust minimum number of http source connections to 2 to avoid deadlock + Src = adjust_maxconn(Src0, BaseId), + {ok, Source} = couch_replicator_api_wrap:db_open(Src), + CreateTgt = maps:get(<<"create_target">>, Options, false), + TParams = maps:get(<<"create_target_params">>, Options, #{}), + + {ok, Target} = couch_replicator_api_wrap:db_open(Tgt, CreateTgt, TParams), + + {ok, SourceInfo} = couch_replicator_api_wrap:get_db_info(Source), + {ok, TargetInfo} = couch_replicator_api_wrap:get_db_info(Target), + + [SourceLog, TargetLog] = find_and_migrate_logs([Source, Target], Rep, + BaseId), + + {StartSeq0, History} = compare_replication_logs(SourceLog, TargetLog), + + #{?REP_STATS := Stats0} = JobData, + Stats1 = couch_replicator_stats:new(Stats0), + HistoryStats = case History of + [{[_ | _] = HProps} | _] -> couch_replicator_stats:new(HProps); + _ -> couch_replicator_stats:new() + end, + Stats2 = couch_replicator_stats:max_stats(Stats1, HistoryStats), + + StartSeq1 = maps:get(<<"since_seq">>, Options, StartSeq0), + StartSeq = {0, StartSeq1}, + + SourceSeq = get_value(<<"update_seq">>, SourceInfo, ?LOWEST_SEQ), + + #doc{body={CheckpointHistory}} = SourceLog, + + State = #rep_state{ + job = Job, + job_data = JobData, + id = Id, + base_id = BaseId, + source_name = couch_replicator_api_wrap:db_uri(Source), + target_name = couch_replicator_api_wrap:db_uri(Target), + source = Source, + target = Target, + options = Options, + history = History, + checkpoint_history = {[{<<"no_changes">>, true} | CheckpointHistory]}, + start_seq = StartSeq, + current_through_seq = StartSeq, + committed_seq = StartSeq, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = StartTime, + src_starttime = get_value(<<"instance_start_time">>, SourceInfo), + tgt_starttime = get_value(<<"instance_start_time">>, TargetInfo), + session_id = couch_uuids:random(), + source_seq = SourceSeq, + use_checkpoints = maps:get(<<"use_checkpoints">>, Options), + checkpoint_interval = maps:get(<<"checkpoint_interval">>, Options), + stats = Stats2, + stats_timer = nil, + doc_id = DocId, + db_name = DbName, + db_uuid = DbUUID, + user = User + }, + start_checkpoint_timer(State). + + +find_and_migrate_logs(DbList, #{} = Rep, BaseId) when is_binary(BaseId) -> + LogId = ?l2b(?LOCAL_DOC_PREFIX ++ BaseId), + fold_replication_logs(DbList, ?REP_ID_VERSION, LogId, LogId, Rep, []). + + +fold_replication_logs([], _Vsn, _LogId, _NewId, _Rep, Acc) -> + lists:reverse(Acc); + +fold_replication_logs([Db | Rest] = Dbs, Vsn, LogId, NewId, #{} = Rep, Acc) -> + case couch_replicator_api_wrap:open_doc(Db, LogId, [ejson_body]) of + {error, <<"not_found">>} when Vsn > 1 -> + OldRepId = couch_replicator_ids:base_id(Rep, Vsn - 1), + fold_replication_logs(Dbs, Vsn - 1, + ?l2b(?LOCAL_DOC_PREFIX ++ OldRepId), NewId, Rep, Acc); + {error, <<"not_found">>} -> + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, Rep, + [#doc{id = NewId} | Acc]); + {ok, Doc} when LogId =:= NewId -> + fold_replication_logs( + Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [Doc | Acc]); + {ok, Doc} -> + MigratedLog = #doc{id = NewId, body = Doc#doc.body}, + maybe_save_migrated_log(Rep, Db, MigratedLog, Doc#doc.id), + fold_replication_logs(Rest, ?REP_ID_VERSION, NewId, NewId, Rep, + [MigratedLog | Acc]) + end. + + +maybe_save_migrated_log(#{?OPTIONS := Options}, Db, #doc{} = Doc, OldId) -> + case maps:get(<<"use_checkpoints">>, Options) of + true -> + update_checkpoint(Db, Doc), + Msg = "Migrated replication checkpoint. Db:~p ~p -> ~p", + couch_log:notice(Msg, [httpdb_strip_creds(Db), OldId, Doc#doc.id]); + false -> + ok + end. + + +spawn_changes_manager(Parent, ChangesQueue, BatchSize) -> + spawn_link(fun() -> + changes_manager_loop_open(Parent, ChangesQueue, BatchSize, 1) + end). + + +changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts) -> + receive + {get_changes, From} -> + case couch_work_queue:dequeue(ChangesQueue, BatchSize) of + closed -> + From ! {closed, self()}; + {ok, ChangesOrLastSeqs} -> + ReportSeq = case lists:last(ChangesOrLastSeqs) of + {last_seq, Seq} -> {Ts, Seq}; + #doc_info{high_seq = Seq} -> {Ts, Seq} + end, + Changes = lists:filter(fun + (#doc_info{}) -> true; + ({last_seq, _Seq}) -> false + end, ChangesOrLastSeqs), + ok = gen_server:cast(Parent, {report_seq, ReportSeq}), + From ! {changes, self(), Changes, ReportSeq} + end, + changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts + 1) + end. + + +do_checkpoint(#rep_state{use_checkpoints=false} = State) -> + NewState = State#rep_state{ + checkpoint_history = {[{<<"use_checkpoints">>, false}]} + }, + {ok, update_job_state(NewState)}; +do_checkpoint(#rep_state{current_through_seq=S, committed_seq=S} = State) -> + {ok, update_job_state(State)}; +do_checkpoint(State) -> + #rep_state{ + source_name=SourceName, + target_name=TargetName, + source = Source, + target = Target, + history = OldHistory, + start_seq = {_, StartSeq}, + current_through_seq = {_Ts, NewSeq} = NewTsSeq, + source_log = SourceLog, + target_log = TargetLog, + rep_starttime = RepStartTime, + src_starttime = SrcInstanceStartTime, + tgt_starttime = TgtInstanceStartTime, + stats = Stats, + options = Options, + session_id = SessionId + } = State, + case commit_to_both(Source, Target) of + {source_error, Reason} -> + {checkpoint_commit_failure, <<"Failure on source commit: ", + (couch_util:to_binary(Reason))/binary>>}; + {target_error, Reason} -> + {checkpoint_commit_failure, <<"Failure on target commit: ", + (couch_util:to_binary(Reason))/binary>>}; + {SrcInstanceStartTime, TgtInstanceStartTime} -> + couch_log:notice("recording a checkpoint for `~s` -> `~s` at " + "source update_seq ~p", [SourceName, TargetName, NewSeq]), + StartTime = couch_replicator_utils:rfc1123_local(RepStartTime), + EndTime = couch_replicator_utils:rfc1123_local(), + NewHistoryEntry = {[ + {<<"session_id">>, SessionId}, + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"start_last_seq">>, StartSeq}, + {<<"end_last_seq">>, NewSeq}, + {<<"recorded_seq">>, NewSeq}, + {<<"missing_checked">>, + couch_replicator_stats:missing_checked(Stats)}, + {<<"missing_found">>, + couch_replicator_stats:missing_found(Stats)}, + {<<"docs_read">>, + couch_replicator_stats:docs_read(Stats)}, + {<<"docs_written">>, + couch_replicator_stats:docs_written(Stats)}, + {<<"doc_write_failures">>, + couch_replicator_stats:doc_write_failures(Stats)} + ]}, + BaseHistory = [ + {<<"session_id">>, SessionId}, + {<<"source_last_seq">>, NewSeq}, + {<<"replication_id_version">>, ?REP_ID_VERSION} + ] ++ case maps:get(<<"doc_ids">>, Options, undefined) of + undefined -> + []; + _DocIds -> + % backwards compatibility with the result of a replication + % by doc IDs in versions 0.11.x and 1.0.x TODO: deprecate + % (use same history format, simplify code) + [ + {<<"start_time">>, StartTime}, + {<<"end_time">>, EndTime}, + {<<"docs_read">>, + couch_replicator_stats:docs_read(Stats)}, + {<<"docs_written">>, + couch_replicator_stats:docs_written(Stats)}, + {<<"doc_write_failures">>, + couch_replicator_stats:doc_write_failures(Stats)} + ] + end, + % limit history to 50 entries + NewRepHistory = { + BaseHistory ++ [{<<"history">>, + lists:sublist([NewHistoryEntry | OldHistory], 50)}] + }, + + try + {SrcRevPos, SrcRevId} = update_checkpoint(Source, + SourceLog#doc{body = NewRepHistory}, source), + {TgtRevPos, TgtRevId} = update_checkpoint(Target, + TargetLog#doc{body = NewRepHistory}, target), + NewState = State#rep_state{ + checkpoint_history = NewRepHistory, + committed_seq = NewTsSeq, + source_log = SourceLog#doc{revs={SrcRevPos, [SrcRevId]}}, + target_log = TargetLog#doc{revs={TgtRevPos, [TgtRevId]}} + }, + {ok, update_job_state(NewState)} + catch throw:{checkpoint_commit_failure, _} = Failure -> + Failure + end; + {SrcInstanceStartTime, _NewTgtInstanceStartTime} -> + {checkpoint_commit_failure, <<"Target database out of sync. " + "Try to increase max_dbs_open at the target's server.">>}; + {_NewSrcInstanceStartTime, TgtInstanceStartTime} -> + {checkpoint_commit_failure, <<"Source database out of sync. " + "Try to increase max_dbs_open at the source's server.">>}; + {_NewSrcInstanceStartTime, _NewTgtInstanceStartTime} -> + {checkpoint_commit_failure, <<"Source and target databases out of " + "sync. Try to increase max_dbs_open at both servers.">>} + end. + + +update_checkpoint(Db, Doc, DbType) -> + try + update_checkpoint(Db, Doc) + catch throw:{checkpoint_commit_failure, Reason} -> + throw({checkpoint_commit_failure, <<"Error updating the ", + (couch_util:to_binary(DbType))/binary, " checkpoint document: ", + (couch_util:to_binary(Reason))/binary>>}) + end. + + +update_checkpoint(Db, #doc{id = LogId, body = LogBody} = Doc) -> + try + case couch_replicator_api_wrap:update_doc(Db, Doc, [delay_commit]) of + {ok, PosRevId} -> PosRevId; + {error, Reason} -> throw({checkpoint_commit_failure, Reason}) + end + catch throw:conflict -> + Opts = [ejson_body], + case (catch couch_replicator_api_wrap:open_doc(Db, LogId, Opts)) of + {ok, #doc{body = LogBody, revs = {Pos, [RevId | _]}}} -> + % This means that we were able to update successfully the + % checkpoint doc in a previous attempt but we got a connection + % error (timeout for e.g.) before receiving the success + % response. Therefore the request was retried and we got a + % conflict, as the revision we sent is not the current one. We + % confirm this by verifying the doc body we just got is the + % same that we have just sent. + {Pos, RevId}; + _ -> + throw({checkpoint_commit_failure, conflict}) + end + end. + + +commit_to_both(Source, Target) -> + % commit the src async + ParentPid = self(), + SrcCommitPid = spawn_link(fun() -> + Result = (catch couch_replicator_api_wrap:ensure_full_commit(Source)), + ParentPid ! {self(), Result} + end), + + % commit tgt sync + TgtResult = (catch couch_replicator_api_wrap:ensure_full_commit(Target)), + + SrcResult = receive + {SrcCommitPid, Result} -> + unlink(SrcCommitPid), + receive + {'EXIT', SrcCommitPid, _} -> + ok + after + 0 -> ok + end, + Result; + {'EXIT', SrcCommitPid, Reason} -> + {error, Reason} + end, + case TgtResult of + {ok, TargetStartTime} -> + case SrcResult of + {ok, SourceStartTime} -> + {SourceStartTime, TargetStartTime}; + SourceError -> + {source_error, SourceError} + end; + TargetError -> + {target_error, TargetError} + end. + + +compare_replication_logs(SrcDoc, TgtDoc) -> + #doc{body={RepRecProps}} = SrcDoc, + #doc{body={RepRecPropsTgt}} = TgtDoc, + SrcSession = get_value(<<"session_id">>, RepRecProps), + TgtSession = get_value(<<"session_id">>, RepRecPropsTgt), + case SrcSession == TgtSession of + true -> + % if the records have the same session id, + % then we have a valid replication history + OldSeqNum = get_value(<<"source_last_seq">>, RepRecProps, + ?LOWEST_SEQ), + OldHistory = get_value(<<"history">>, RepRecProps, []), + {OldSeqNum, OldHistory}; + false -> + SourceHistory = get_value(<<"history">>, RepRecProps, []), + TargetHistory = get_value(<<"history">>, RepRecPropsTgt, []), + couch_log:notice("Replication records differ. " + "Scanning histories to find a common ancestor.", []), + couch_log:debug("Record on source:~p~nRecord on target:~p~n", + [RepRecProps, RepRecPropsTgt]), + compare_rep_history(SourceHistory, TargetHistory) + end. + + +compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> + couch_log:notice("no common ancestry -- performing full replication", []), + {?LOWEST_SEQ, []}; + +compare_rep_history([{S} | SourceRest], [{T} | TargetRest] = Target) -> + SourceId = get_value(<<"session_id">>, S), + case has_session_id(SourceId, Target) of + true -> + RecordSeqNum = get_value(<<"recorded_seq">>, S, ?LOWEST_SEQ), + couch_log:notice("found a common replication record with " + "source_seq ~p", [RecordSeqNum]), + {RecordSeqNum, SourceRest}; + false -> + TargetId = get_value(<<"session_id">>, T), + case has_session_id(TargetId, SourceRest) of + true -> + RecordSeqNum = get_value(<<"recorded_seq">>, T, + ?LOWEST_SEQ), + couch_log:notice("found a common replication record with " + "source_seq ~p", [RecordSeqNum]), + {RecordSeqNum, TargetRest}; + false -> + compare_rep_history(SourceRest, TargetRest) + end + end. + + +has_session_id(_SessionId, []) -> + false; + +has_session_id(SessionId, [{Props} | Rest]) -> + case get_value(<<"session_id">>, Props, nil) of + SessionId -> true; + _Else -> has_session_id(SessionId, Rest) + end. + + +get_pending_count(#rep_state{} = St) -> + #rep_state{ + highest_seq_done = HighestSeqDone, + source = #httpdb{} = Db0 + } = St, + {_, Seq} = HighestSeqDone, + Db = Db0#httpdb{retries = 3}, + case (catch couch_replicator_api_wrap:get_pending_count(Db, Seq)) of + {ok, Pending} -> + Pending; + _ -> + null + end. + + +maybe_update_job_state(#rep_state{} = State) -> + case State#rep_state.stats_timer of + nil -> start_stats_timer(State); + Ref when is_reference(Ref) -> State + end. + + +update_job_state(#rep_state{} = State0) -> + State = cancel_stats_timer(State0), + #rep_state{ + current_through_seq = {_, ThroughSeq}, + highest_seq_done = {_, HighestSeq}, + committed_seq = {_, CommittedSeq}, + stats = Stats, + job_data = JobData + } = State, + + Now = erlang:system_time(second), + + RevisionsChecked = couch_replicator_stats:missing_checked(Stats), + MissingRevisions = couch_replicator_stats:missing_found(Stats), + DocsRead = couch_replicator_stats:docs_read(Stats), + DocsWritten = couch_replicator_stats:docs_written(Stats), + DocWriteFailures = couch_replicator_stats:doc_write_failures(Stats), + PendingCount = get_pending_count(State), + + StatsMap = #{ + <<"checkpointed_source_seq">> => CommittedSeq, + <<"source_seq">> => HighestSeq, + <<"through_seq">> => ThroughSeq, + <<"revisions_checked">> => RevisionsChecked, + <<"missing_revisions_found">> => MissingRevisions, + <<"docs_read">> => DocsRead, + <<"docs_written">> => DocsWritten, + <<"doc_write_failures">> => DocWriteFailures, + <<"changes_pending">> => PendingCount + }, + + JobData1 = JobData#{ + ?REP_STATS := StatsMap, + ?LAST_UPDATED := Now + }, + + JobData2 = maybe_heal(JobData1, Now), + + State1 = State#rep_state{job_data = JobData2}, + State2 = update_active_task_info(State1), + update_job_data(undefined, State2). + + +replication_start_error({unauthorized, DbUri}) -> + {unauthorized, <<"unauthorized to access or create database ", + DbUri/binary>>}; + +replication_start_error({db_not_found, DbUri}) -> + {db_not_found, <<"could not open ", DbUri/binary>>}; + +replication_start_error({http_request_failed, _Method, Url0, + {error, {error, {conn_failed, {error, nxdomain}}}}}) -> + Url = ?l2b(couch_util:url_strip_password(Url0)), + {nxdomain, <<"could not resolve ", Url/binary>>}; + +replication_start_error({http_request_failed, Method0, Url0, + {error, {code, Code}}}) when is_integer(Code) -> + Url = ?l2b(couch_util:url_strip_password(Url0)), + Method = ?l2b(Method0), + CodeBin = integer_to_binary(Code), + {http_error_code, <<CodeBin/binary, " ", Method/binary, " ", Url/binary>>}; + +replication_start_error(Error) -> + Error. + + +log_replication_start(#rep_state{} = RepState) -> + #rep_state{ + id = Id, + doc_id = DocId, + db_name = DbName, + options = Options, + source_name = Source, + target_name = Target, + session_id = Sid + } = RepState, + Workers = maps:get(<<"worker_processes">>, Options), + BatchSize = maps:get(<<"worker_batch_size">>, Options), + From = case DbName of + Name when is_binary(Name) -> + io_lib:format("from doc ~s:~s", [Name, DocId]); + _ -> + "from _replicate endpoint" + end, + Msg = "Starting replication ~s (~s -> ~s) ~s worker_procesess:~p" + " worker_batch_size:~p session_id:~s", + couch_log:notice(Msg, [Id, Source, Target, From, Workers, BatchSize, Sid]). + + +check_user_filter(#rep_state{} = State) -> + #rep_state{ + id = RepId, + base_id = BaseId, + job = Job, + job_data = JobData + } = State, + case get_rep_id(undefined, Job, JobData) of + {RepId, BaseId} -> + ok; + {NewId, NewBaseId} when is_binary(NewId), is_binary(NewBaseId) -> + LogMsg = "~p : Replication id was updated ~p -> ~p", + couch_log:error(LogMsg, [?MODULE, RepId, NewId]), + reschedule(undefined, Job, JobData), + exit({shutdown, finished}) + end. + + +hist_append(Type, Now, #{} = JobData, Info) when is_integer(Now), + is_binary(Type) -> + #{?JOB_HISTORY := Hist} = JobData, + Evt1 = #{?HIST_TYPE => Type, ?HIST_TIMESTAMP => Now}, + Evt2 = case Info of + undefined -> + Evt1; + null -> + Evt1#{?HIST_REASON => null}; + <<_/binary>> -> + Evt1#{?HIST_REASON => Info}; + #{<<"error">> := Err, <<"reason">> := Reason} when is_binary(Err), + is_binary(Reason) -> + Evt1#{?HIST_REASON => Reason} + end, + Hist1 = [Evt2 | Hist], + Hist2 = lists:sublist(Hist1, max_history()), + JobData#{?JOB_HISTORY := Hist2}. + + +optimize_rate_limited_job(#{} = Options, <<"max_backoff">>) -> + OptimizedSettings = #{ + <<"checkpoint_interval">> => 5000, + <<"worker_processes">> => 2, + <<"worker_batch_size">> => 100, + <<"http_connections">> => 2 + }, + maps:merge(Options, OptimizedSettings); + +optimize_rate_limited_job(#{} = Options, _Other) -> + Options. + + +close_endpoints(State) -> + State1 = cancel_timers(State), + couch_replicator_api_wrap:db_close(State1#rep_state.source), + couch_replicator_api_wrap:db_close(State1#rep_state.target), + ok. + + +get_value(K, Props) -> + couch_util:get_value(K, Props). + + +get_value(K, Props, Default) -> + couch_util:get_value(K, Props, Default). + + +accept_jitter_msec() -> + couch_rand:uniform(erlang:max(1, max_startup_jitter_msec())). + + +max_startup_jitter_msec() -> + config:get_integer("replicator", "startup_jitter", + ?STARTUP_JITTER_DEFAULT). + + +min_backoff_penalty_sec() -> + config:get_integer("replicator", "min_backoff_penalty_sec", + ?DEFAULT_MIN_BACKOFF_PENALTY_SEC). + + +max_backoff_penalty_sec() -> + config:get_integer("replicator", "max_backoff_penalty_sec", + ?DEFAULT_MAX_BACKOFF_PENALTY_SEC). + + +max_history() -> + config:get_integer("replicator", "max_history", ?DEFAULT_MAX_HISTORY). + + +stats_update_interval_sec() -> + config:get_integer("replicator", "stats_update_interval_sec", + ?DEFAULT_STATS_UPDATE_INTERVAL_SEC). + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +replication_start_error_test() -> + ?assertEqual({unauthorized, <<"unauthorized to access or create database" + " http://x/y">>}, replication_start_error({unauthorized, + <<"http://x/y">>})), + ?assertEqual({db_not_found, <<"could not open http://x/y">>}, + replication_start_error({db_not_found, <<"http://x/y">>})), + ?assertEqual({nxdomain, <<"could not resolve http://x/y">>}, + replication_start_error({http_request_failed, "GET", "http://x/y", + {error, {error, {conn_failed, {error, nxdomain}}}}})), + ?assertEqual({http_error_code, <<"503 GET http://x/y">>}, + replication_start_error({http_request_failed, "GET", "http://x/y", + {error, {code, 503}}})). + + +scheduler_job_format_status_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_format_status) + ] + }. + + +setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end). + + +teardown(_) -> + meck:unload(). + + +t_format_status(_) -> + {ok, Rep} = couch_replicator_parse:parse_rep(#{ + <<"source">> => <<"http://u:p@h1/d1">>, + <<"target">> => <<"http://u:p@h2/d2">>, + <<"create_target">> => true + }, null), + State = #rep_state{ + id = <<"base+ext">>, + job_data = #{?REP => Rep}, + doc_id = <<"mydoc">>, + db_name = <<"mydb">>, + source = maps:get(?SOURCE, Rep), + target = maps:get(?TARGET, Rep), + options = maps:get(?OPTIONS, Rep), + session_id = <<"a">>, + start_seq = <<"1">>, + source_seq = <<"2">>, + committed_seq = <<"3">>, + current_through_seq = <<"4">>, + highest_seq_done = <<"5">> + }, + Format = format_status(opts_ignored, [pdict, State]), + FmtOptions = proplists:get_value(options, Format), + ?assertEqual("http://u:*****@h1/d1/", proplists:get_value(source, Format)), + ?assertEqual("http://u:*****@h2/d2/", proplists:get_value(target, Format)), + ?assertEqual(<<"base+ext">>, proplists:get_value(rep_id, Format)), + ?assertEqual(true, maps:get(<<"create_target">>, FmtOptions)), + ?assertEqual(<<"mydoc">>, proplists:get_value(doc_id, Format)), + ?assertEqual(<<"mydb">>, proplists:get_value(db_name, Format)), + ?assertEqual(<<"a">>, proplists:get_value(session_id, Format)), + ?assertEqual(<<"1">>, proplists:get_value(start_seq, Format)), + ?assertEqual(<<"2">>, proplists:get_value(source_seq, Format)), + ?assertEqual(<<"3">>, proplists:get_value(committed_seq, Format)), + ?assertEqual(<<"4">>, proplists:get_value(current_through_seq, Format)), + ?assertEqual(<<"5">>, proplists:get_value(highest_seq_done, Format)). + + +-endif. diff --git a/src/couch_replicator/src/couch_replicator_job_server.erl b/src/couch_replicator/src/couch_replicator_job_server.erl new file mode 100644 index 000000000..a2e90b061 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_job_server.erl @@ -0,0 +1,370 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_job_server). + + +-behaviour(gen_server). + + +-export([ + start_link/1 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + format_status/2, + code_change/3 +]). + +-export([ + accepted/2, + scheduling_interval_sec/0, + reschedule/0 +]). + + +-include("couch_replicator.hrl"). + + +-define(MAX_ACCEPTORS, 2). +-define(MAX_JOBS, 500). +-define(MAX_CHURN, 100). +-define(INTERVAL_SEC, 15). +-define(MIN_RUN_TIME_SEC, 60). +-define(TRANSIENT_JOB_MAX_AGE_SEC, 86400). % 1 day + + +start_link(Timeout) when is_integer(Timeout) -> + gen_server:start_link({local, ?MODULE}, ?MODULE, Timeout, []). + + +init(Timeout) when is_integer(Timeout) -> + process_flag(trap_exit, true), + couch_replicator_jobs:set_timeout(), + St = #{ + acceptors => #{}, + workers => #{}, + churn => 0, + config => get_config(), + timer => undefined, + timeout => Timeout + }, + St1 = spawn_acceptors(St), + St2 = do_send_after(St1), + {ok, St2}. + + +terminate(_, #{} = St) -> + #{ + workers := Workers, + timeout := Timeout + } = St, + [stop_job(Pid) || Pid <- maps:keys(Workers)], + % Give jobs a chance to checkpoint and release their locks + wait_jobs_exit(Workers, Timeout), + ok. + + +handle_call({accepted, Pid, Normal}, _From, #{} = St) -> + #{ + acceptors := Acceptors, + workers := Workers, + churn := Churn + } = St, + case maps:is_key(Pid, Acceptors) of + true -> + Val = {Normal, erlang:system_time(second)}, + St1 = St#{ + acceptors := maps:remove(Pid, Acceptors), + workers := Workers#{Pid => Val}, + churn := Churn + 1 + }, + {reply, ok, spawn_acceptors(St1)}; + false -> + LogMsg = "~p : unknown acceptor processs ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), + {stop, {unknown_acceptor_pid, Pid}, St} + end; + +handle_call(reschedule, _From, St) -> + {reply, ok, reschedule(St)}; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(reschedule, #{} = St) -> + {noreply, reschedule(St)}; + +handle_info({'EXIT', Pid, Reason}, #{} = St) -> + #{ + acceptors := Acceptors, + workers := Workers + } = St, + case {maps:is_key(Pid, Acceptors), maps:is_key(Pid, Workers)} of + {true, false} -> handle_acceptor_exit(St, Pid, Reason); + {false, true} -> handle_worker_exit(St, Pid, Reason); + {false, false} -> handle_unknown_exit(St, Pid, Reason) + end; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +format_status(_Opt, [_PDict, #{} = St]) -> + #{ + acceptors := Acceptors, + workers := Workers, + churn := Churn, + config := Config + } = St, + [ + {acceptors, map_size(Acceptors)}, + {workers, map_size(Workers)}, + {churn, Churn}, + {config, Config} + ]. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +accepted(Worker, Normal) when is_pid(Worker), is_boolean(Normal) -> + gen_server:call(?MODULE, {accepted, Worker, Normal}, infinity). + + +scheduling_interval_sec() -> + config:get_integer("replicator", "interval_sec", ?INTERVAL_SEC). + + +reschedule() -> + gen_server:call(?MODULE, reschedule, infinity). + + +% Scheduling logic + +do_send_after(#{} = St) -> + #{config := #{interval_sec := IntervalSec}} = St, + IntervalMSec = IntervalSec * 1000, + Jitter = IntervalMSec div 3, + WaitMSec = IntervalMSec + rand:uniform(max(1, Jitter)), + TRef = erlang:send_after(WaitMSec, self(), reschedule), + St#{timer := TRef}. + + +cancel_timer(#{timer := undefined} = St) -> + St; + +cancel_timer(#{timer := TRef} = St) when is_reference(TRef) -> + erlang:cancel_timer(TRef), + St#{timer := undefined}. + + +reschedule(#{} = St) -> + St1 = cancel_timer(St), + St2 = St1#{config := get_config()}, + St3 = trim_jobs(St2), + St4 = start_excess_acceptors(St3), + St5 = transient_job_cleanup(St4), + St6 = update_stats(St5), + St7 = do_send_after(St6), + St7#{churn := 0}. + + +start_excess_acceptors(#{} = St) -> + #{ + churn := Churn, + acceptors := Acceptors, + workers := Workers, + config := #{max_jobs := MaxJobs, max_churn := MaxChurn} + } = St, + + ACnt = maps:size(Acceptors), + WCnt = maps:size(Workers), + + ChurnLeft = MaxChurn - Churn, + Slots = (MaxJobs + MaxChurn) - (ACnt + WCnt), + MinSlotsChurn = min(Slots, ChurnLeft), + + Pending = if MinSlotsChurn =< 0 -> 0; true -> + % Don't fetch pending if we don't have enough slots or churn budget + couch_replicator_jobs:pending_count(undefined, MinSlotsChurn) + end, + + couch_stats:update_gauge([couch_replicator, jobs, pending], Pending), + + % Start new acceptors only if we have churn budget, there are pending jobs + % and we won't start more than max jobs + churn total acceptors + ToStart = max(0, lists:min([ChurnLeft, Pending, Slots])), + + lists:foldl(fun(_, #{} = StAcc) -> + #{acceptors := AccAcceptors} = StAcc, + {ok, Pid} = couch_replicator_job:start_link(), + StAcc#{acceptors := AccAcceptors#{Pid => true}} + end, St, lists:seq(1, ToStart)). + + +transient_job_cleanup(#{} = St) -> + #{ + config := #{transient_job_max_age_sec := MaxAge} + } = St, + Now = erlang:system_time(second), + FoldFun = fun(_JTx, JobId, State, #{} = Data, ok) -> + IsTransient = maps:get(?DB_NAME, Data) =:= null, + IsOld = Now - maps:get(?LAST_UPDATED, Data) >= MaxAge, + case State =:= finished andalso IsTransient andalso IsOld of + true -> + ok = couch_replicator_jobs:remove_job(undefined, JobId), + couch_log:info("~p : Removed old job ~p", [?MODULE, JobId]), + ok; + false -> + ok + end + end, + ok = couch_replicator_jobs:fold_jobs(undefined, FoldFun, ok), + St. + + +update_stats(#{} = St) -> + ACnt = maps:size(maps:get(acceptors, St)), + WCnt = maps:size(maps:get(workers, St)), + couch_stats:update_gauge([couch_replicator, jobs, accepting], ACnt), + couch_stats:update_gauge([couch_replicator, jobs, running], WCnt), + couch_stats:increment_counter([couch_replicator, jobs, reschedules]), + St. + + +trim_jobs(#{} = St) -> + #{ + workers := Workers, + churn := Churn, + config := #{max_jobs := MaxJobs} + } = St, + Excess = max(0, maps:size(Workers) - MaxJobs), + lists:foreach(fun stop_job/1, stop_candidates(St, Excess)), + St#{churn := Churn + Excess}. + + +stop_candidates(#{}, Top) when is_integer(Top), Top =< 0 -> + []; + +stop_candidates(#{} = St, Top) when is_integer(Top), Top > 0 -> + #{ + workers := Workers, + config := #{min_run_time_sec := MinRunTime} + } = St, + + WList1 = maps:to_list(Workers), % [{Pid, {Normal, StartTime}},...] + + % Filter out normal jobs and those which have just started running + MaxT = erlang:system_time(second) - MinRunTime, + WList2 = lists:filter(fun({_Pid, {Normal, T}}) -> + not Normal andalso T =< MaxT + end, WList1), + + Sorted = lists:keysort(2, WList2), + Pids = lists:map(fun({Pid, _}) -> Pid end, Sorted), + lists:sublist(Pids, Top). + + +stop_job(Pid) when is_pid(Pid) -> + % Replication jobs handle the shutdown signal and then checkpoint in + % terminate handler + exit(Pid, shutdown). + + +wait_jobs_exit(#{} = Jobs, _) when map_size(Jobs) =:= 0 -> + ok; + +wait_jobs_exit(#{} = Jobs, Timeout) -> + receive + {'EXIT', Pid, _} -> + wait_jobs_exit(maps:remove(Pid, Jobs), Timeout) + after + Timeout -> + LogMsg = "~p : ~p jobs didn't terminate cleanly", + couch_log:error(LogMsg, [?MODULE, map_size(Jobs)]), + ok + end. + + +spawn_acceptors(St) -> + #{ + workers := Workers, + acceptors := Acceptors, + config := #{max_jobs := MaxJobs, max_acceptors := MaxAcceptors} + } = St, + ACnt = maps:size(Acceptors), + WCnt = maps:size(Workers), + case ACnt < MaxAcceptors andalso (ACnt + WCnt) < MaxJobs of + true -> + {ok, Pid} = couch_replicator_job:start_link(), + NewSt = St#{acceptors := Acceptors#{Pid => true}}, + spawn_acceptors(NewSt); + false -> + St + end. + + +% Worker process exit handlers + +handle_acceptor_exit(#{acceptors := Acceptors} = St, Pid, Reason) -> + St1 = St#{acceptors := maps:remove(Pid, Acceptors)}, + LogMsg = "~p : acceptor process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {noreply, spawn_acceptors(St1)}. + + +handle_worker_exit(#{workers := Workers} = St, Pid, Reason) -> + St1 = St#{workers := maps:remove(Pid, Workers)}, + case Reason of + normal -> + ok; + shutdown -> + ok; + {shutdown, _} -> + ok; + _ -> + LogMsg = "~p : replicator job process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]) + end, + {noreply, spawn_acceptors(St1)}. + + +handle_unknown_exit(St, Pid, Reason) -> + LogMsg = "~p : unknown process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unknown_pid_exit, Pid}, St}. + + +get_config() -> + Defaults = #{ + max_acceptors => ?MAX_ACCEPTORS, + interval_sec => ?INTERVAL_SEC, + max_jobs => ?MAX_JOBS, + max_churn => ?MAX_CHURN, + min_run_time_sec => ?MIN_RUN_TIME_SEC, + transient_job_max_age_sec => ?TRANSIENT_JOB_MAX_AGE_SEC + }, + maps:map(fun(K, Default) -> + config:get_integer("replicator", atom_to_list(K), Default) + end, Defaults). diff --git a/src/couch_replicator/src/couch_replicator_jobs.erl b/src/couch_replicator/src/couch_replicator_jobs.erl new file mode 100644 index 000000000..51f441caf --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_jobs.erl @@ -0,0 +1,314 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_jobs). + + +-export([ + % couch_jobs type timeouts + set_timeout/0, + get_timeout/0, + + % Job creation and querying + new_job/7, + add_job/3, + remove_job/2, + get_job_data/2, + fold_jobs/3, + pending_count/2, + + % Job subscription + wait_running/1, + wait_result/1, + + % Job execution + accept_job/1, + update_job_data/3, + finish_job/3, + reschedule_job/4, + + % (..., ?REPLICATION_IDS) -> JobId handling + try_update_rep_id/3, + update_rep_id/3, + clear_old_rep_id/3, + get_job_id/2, + + % Debug functions + remove_jobs/2, + get_job_ids/1 +]). + + +-include("couch_replicator.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +-define(REP_JOBS, <<"rep_jobs">>). +-define(REP_JOBS_TIMEOUT_SEC, 61). + + +% Data model +% ---------- +% +% State kept in couch_jobs under the ?REP_JOBS type +% +% Job IDs are defined as: +% * Replicator DB instance UUID + doc ID for persistent replications +% * Hash(username|source|target|options) for transient replications +% +% To map replication IDs to couch_job jobs, there is a separate index that +% looks like: +% (?REPLICATION_IDS, RepId) -> JobId +% + +set_timeout() -> + couch_jobs:set_type_timeout(?REP_JOBS, ?REP_JOBS_TIMEOUT_SEC). + + +get_timeout() -> + ?REP_JOBS_TIMEOUT_SEC. + + +new_job(#{} = Rep, DbName, DbUUID, DocId, State, StateInfo, DocState) -> + NowSec = erlang:system_time(second), + AddedEvent = #{?HIST_TYPE => ?HIST_ADDED, ?HIST_TIMESTAMP => NowSec}, + #{ + ?REP => Rep, + ?REP_ID => null, + ?BASE_ID => null, + ?DB_NAME => DbName, + ?DB_UUID => DbUUID, + ?DOC_ID => DocId, + ?ERROR_COUNT => 0, + ?REP_STATS => #{}, + ?STATE => State, + ?STATE_INFO => StateInfo, + ?DOC_STATE => DocState, + ?LAST_UPDATED => NowSec, + ?LAST_START => 0, + ?LAST_ERROR => null, + ?REP_NODE => null, + ?REP_PID => null, + ?JOB_HISTORY => [AddedEvent], + ?CHECKPOINT_HISTORY => [] + }. + + +add_job(Tx, JobId, JobData) -> + couch_stats:increment_counter([couch_replicator, jobs, adds]), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs:get_job_data(JTx, ?REP_JOBS, JobId) of + {ok, #{} = OldData} -> + ok = remove_job(JTx, JobId, OldData); + {error, not_found} -> + ok + end, + ok = couch_jobs:add(JTx, ?REP_JOBS, JobId, JobData) + end). + + +remove_job(Tx, JobId) -> + couch_stats:increment_counter([couch_replicator, jobs, removes]), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + case couch_jobs:get_job_data(JTx, ?REP_JOBS, JobId) of + {ok, #{} = JobData} -> + ok = remove_job(JTx, JobId, JobData); + {error, not_found} -> + ok + end + end). + + +get_job_data(Tx, JobId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:get_job_data(JTx, ?REP_JOBS, JobId) + end). + + +% UserFun = fun(JTx, JobId, JobState, JobData, UserAcc) +% +fold_jobs(Tx, UserFun, Acc) when is_function(UserFun, 5) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:fold_jobs(JTx, ?REP_JOBS, UserFun, Acc) + end). + + +pending_count(_Tx, Limit) when is_integer(Limit), Limit =< 0 -> + 0; + +pending_count(Tx, Limit) when is_integer(Limit), Limit > 0 -> + Opts = #{ + max_sched_time => erlang:system_time(second), + limit => Limit + }, + couch_jobs:pending_count(Tx, ?REP_JOBS, Opts). + + +wait_running(JobId) -> + case couch_jobs:subscribe(?REP_JOBS, JobId) of + {ok, finished, JobData} -> + {ok, JobData}; + {ok, SubId, running, #{?STATE := ?ST_PENDING}} -> + wait_running(JobId, SubId); + {ok, SubId, running, JobData} -> + ok = couch_jobs:unsubscribe(SubId), + {ok, JobData}; + {ok, SubId, pending, _} -> + wait_running(JobId, SubId); + {error, Error} -> + {error, Error} + end. + + +wait_running(JobId, SubId) -> + case couch_jobs:wait(SubId, infinity) of + {?REP_JOBS, _, running, #{?STATE := ?ST_PENDING}} -> + wait_running(JobId, SubId); + {?REP_JOBS, _, running, JobData} -> + ok = couch_jobs:unsubscribe(SubId), + {ok, JobData}; + {?REP_JOBS, _, pending, _} -> + wait_running(JobId, SubId); + {?REP_JOBS, _, finished, JobData} -> + ok = couch_jobs:unsubscribe(SubId), + {ok, JobData} + end. + + +wait_result(JobId) -> + case couch_jobs:subscribe(?REP_JOBS, JobId) of + {ok, finished, JobData} -> + {ok, JobData}; + {ok, SubId, _, _} -> + {?REP_JOBS, _, finished, JobData} = couch_jobs:wait(SubId, + finished, infinity), + {ok, JobData}; + {error, Error} -> + {error, Error} + end. + + +accept_job(MaxSchedTime) when is_integer(MaxSchedTime) -> + Opts = #{max_sched_time => MaxSchedTime}, + couch_jobs:accept(?REP_JOBS, Opts). + + +update_job_data(Tx, #{} = Job, #{} = JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:update(JTx, Job, JobData) + end). + + +finish_job(Tx, #{} = Job, #{} = JobData) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + couch_jobs:finish(JTx, Job, JobData) + end). + + +reschedule_job(Tx, #{} = Job, #{} = JobData, Time) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + {ok, Job1} = couch_jobs:resubmit(JTx, Job, Time), + ok = couch_jobs:finish(JTx, Job1, JobData) + end). + + +try_update_rep_id(Tx, JobId, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case get_job_id(JTx, RepId) of + {error, not_found} -> + ok = erlfdb:set(ErlFdbTx, Key, JobId); + {ok, JobId} -> + ok; + {ok, OtherJobId} when is_binary(OtherJobId) -> + {error, {replication_job_conflict, OtherJobId}} + end + end). + + +update_rep_id(Tx, JobId, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + ok = erlfdb:set(ErlFdbTx, Key, JobId) + end). + + +clear_old_rep_id(_, _, null) -> + ok; + +clear_old_rep_id(Tx, JobId, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case get_job_id(JTx, RepId) of + {error, not_found} -> + ok; + {ok, JobId} -> + ok = erlfdb:clear(ErlFdbTx, Key); + {ok, OtherJobId} when is_binary(OtherJobId) -> + ok + end + end). + + +get_job_id(Tx, RepId) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case erlfdb:wait(erlfdb:get(ErlFdbTx, Key)) of + not_found -> + {error, not_found}; + <<_/binary>> = JobId -> + {ok, JobId} + end + end). + + +% Debug functions + +remove_jobs(Tx, JobIds) when is_list(JobIds) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + lists:foreach(fun(JobId) -> remove_job(JTx, JobId) end, JobIds) + end), + []. + + +get_job_ids(Tx) -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(Tx), fun(JTx) -> + #{tx := ErlFdbTx, layer_prefix := LayerPrefix} = JTx, + Prefix = erlfdb_tuple:pack({?REPLICATION_IDS}, LayerPrefix), + KVs = erlfdb:wait(erlfdb:get_range_startswith(ErlFdbTx, Prefix)), + lists:map(fun({K, JobId}) -> + {RepId} = erlfdb_tuple:unpack(K, Prefix), + {RepId, JobId} + end, KVs) + end). + + +% Private functions + +remove_job(#{jtx := true} = JTx, JobId, OldJobData) -> + #{tx := Tx, layer_prefix := LayerPrefix} = JTx, + case OldJobData of + #{?REP_ID := null} -> + couch_jobs:remove(JTx, ?REP_JOBS, JobId); + #{?REP_ID := RepId} when is_binary(RepId) -> + Key = erlfdb_tuple:pack({?REPLICATION_IDS, RepId}, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> ok; + JobId -> erlfdb:clear(Tx, Key); + <<_/binary>> -> ok + end, + couch_jobs:remove(JTx, ?REP_JOBS, JobId) + end. diff --git a/src/couch_replicator/src/couch_replicator_js_functions.hrl b/src/couch_replicator/src/couch_replicator_js_functions.hrl deleted file mode 100644 index d41043309..000000000 --- a/src/couch_replicator/src/couch_replicator_js_functions.hrl +++ /dev/null @@ -1,177 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --define(REP_DB_DOC_VALIDATE_FUN, <<" - function(newDoc, oldDoc, userCtx) { - function reportError(error_msg) { - log('Error writing document `' + newDoc._id + - '\\' to the replicator database: ' + error_msg); - throw({forbidden: error_msg}); - } - - function validateEndpoint(endpoint, fieldName) { - if ((typeof endpoint !== 'string') && - ((typeof endpoint !== 'object') || (endpoint === null))) { - - reportError('The `' + fieldName + '\\' property must exist' + - ' and be either a string or an object.'); - } - - if (typeof endpoint === 'object') { - if ((typeof endpoint.url !== 'string') || !endpoint.url) { - reportError('The url property must exist in the `' + - fieldName + '\\' field and must be a non-empty string.'); - } - - if ((typeof endpoint.auth !== 'undefined') && - ((typeof endpoint.auth !== 'object') || - endpoint.auth === null)) { - - reportError('`' + fieldName + - '.auth\\' must be a non-null object.'); - } - - if ((typeof endpoint.headers !== 'undefined') && - ((typeof endpoint.headers !== 'object') || - endpoint.headers === null)) { - - reportError('`' + fieldName + - '.headers\\' must be a non-null object.'); - } - } - } - - var isReplicator = (userCtx.roles.indexOf('_replicator') >= 0); - var isAdmin = (userCtx.roles.indexOf('_admin') >= 0); - - if (isReplicator) { - // Always let replicator update the replication document - return; - } - - if (newDoc._replication_state === 'failed') { - // Skip validation in case when we update the document with the - // failed state. In this case it might be malformed. However, - // replicator will not pay attention to failed documents so this - // is safe. - return; - } - - if (!newDoc._deleted) { - validateEndpoint(newDoc.source, 'source'); - validateEndpoint(newDoc.target, 'target'); - - if ((typeof newDoc.create_target !== 'undefined') && - (typeof newDoc.create_target !== 'boolean')) { - - reportError('The `create_target\\' field must be a boolean.'); - } - - if ((typeof newDoc.continuous !== 'undefined') && - (typeof newDoc.continuous !== 'boolean')) { - - reportError('The `continuous\\' field must be a boolean.'); - } - - if ((typeof newDoc.doc_ids !== 'undefined') && - !isArray(newDoc.doc_ids)) { - - reportError('The `doc_ids\\' field must be an array of strings.'); - } - - if ((typeof newDoc.selector !== 'undefined') && - (typeof newDoc.selector !== 'object')) { - - reportError('The `selector\\' field must be an object.'); - } - - if ((typeof newDoc.filter !== 'undefined') && - ((typeof newDoc.filter !== 'string') || !newDoc.filter)) { - - reportError('The `filter\\' field must be a non-empty string.'); - } - - if ((typeof newDoc.doc_ids !== 'undefined') && - (typeof newDoc.selector !== 'undefined')) { - - reportError('`doc_ids\\' field is incompatible with `selector\\'.'); - } - - if ( ((typeof newDoc.doc_ids !== 'undefined') || - (typeof newDoc.selector !== 'undefined')) && - (typeof newDoc.filter !== 'undefined') ) { - - reportError('`filter\\' field is incompatible with `selector\\' and `doc_ids\\'.'); - } - - if ((typeof newDoc.query_params !== 'undefined') && - ((typeof newDoc.query_params !== 'object') || - newDoc.query_params === null)) { - - reportError('The `query_params\\' field must be an object.'); - } - - if (newDoc.user_ctx) { - var user_ctx = newDoc.user_ctx; - - if ((typeof user_ctx !== 'object') || (user_ctx === null)) { - reportError('The `user_ctx\\' property must be a ' + - 'non-null object.'); - } - - if (!(user_ctx.name === null || - (typeof user_ctx.name === 'undefined') || - ((typeof user_ctx.name === 'string') && - user_ctx.name.length > 0))) { - - reportError('The `user_ctx.name\\' property must be a ' + - 'non-empty string or null.'); - } - - if (!isAdmin && (user_ctx.name !== userCtx.name)) { - reportError('The given `user_ctx.name\\' is not valid'); - } - - if (user_ctx.roles && !isArray(user_ctx.roles)) { - reportError('The `user_ctx.roles\\' property must be ' + - 'an array of strings.'); - } - - if (!isAdmin && user_ctx.roles) { - for (var i = 0; i < user_ctx.roles.length; i++) { - var role = user_ctx.roles[i]; - - if (typeof role !== 'string' || role.length === 0) { - reportError('Roles must be non-empty strings.'); - } - if (userCtx.roles.indexOf(role) === -1) { - reportError('Invalid role (`' + role + - '\\') in the `user_ctx\\''); - } - } - } - } else { - if (!isAdmin) { - reportError('The `user_ctx\\' property is missing (it is ' + - 'optional for admins only).'); - } - } - } else { - if (!isAdmin) { - if (!oldDoc.user_ctx || (oldDoc.user_ctx.name !== userCtx.name)) { - reportError('Replication documents can only be deleted by ' + - 'admins or by the users who created them.'); - } - } - } - } -">>). diff --git a/src/couch_replicator/src/couch_replicator_notifier.erl b/src/couch_replicator/src/couch_replicator_notifier.erl deleted file mode 100644 index f7640a349..000000000 --- a/src/couch_replicator/src/couch_replicator_notifier.erl +++ /dev/null @@ -1,58 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_notifier). - --behaviour(gen_event). --vsn(1). - -% public API --export([start_link/1, stop/1, notify/1]). - -% gen_event callbacks --export([init/1, terminate/2, code_change/3]). --export([handle_event/2, handle_call/2, handle_info/2]). - --include_lib("couch/include/couch_db.hrl"). - -start_link(FunAcc) -> - couch_event_sup:start_link(couch_replication, - {couch_replicator_notifier, make_ref()}, FunAcc). - -notify(Event) -> - gen_event:notify(couch_replication, Event). - -stop(Pid) -> - couch_event_sup:stop(Pid). - - -init(FunAcc) -> - {ok, FunAcc}. - -terminate(_Reason, _State) -> - ok. - -handle_event(Event, Fun) when is_function(Fun, 1) -> - Fun(Event), - {ok, Fun}; -handle_event(Event, {Fun, Acc}) when is_function(Fun, 2) -> - Acc2 = Fun(Event, Acc), - {ok, {Fun, Acc2}}. - -handle_call(_Msg, State) -> - {ok, ok, State}. - -handle_info(_Msg, State) -> - {ok, State}. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. diff --git a/src/couch_replicator/src/couch_replicator_parse.erl b/src/couch_replicator/src/couch_replicator_parse.erl new file mode 100644 index 000000000..5996ec507 --- /dev/null +++ b/src/couch_replicator/src/couch_replicator_parse.erl @@ -0,0 +1,545 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_parse). + + +-export([ + parse_rep_doc/1, + parse_transient_rep/2, + parse_rep/2, + parse_rep_db/3 +]). + + +-include_lib("ibrowse/include/ibrowse.hrl"). +-include("couch_replicator.hrl"). + + +-define(DEFAULT_SOCK_OPTS, "[{keepalive, true}, {nodelay, false}]"). +-define(VALID_SOCK_OPTS, [ + buffer, + delay_send, + exit_on_close, + ipv6_v6only, + keepalive, + nodelay, + recbuf, + send_timeout, + send_timout_close, + sndbuf, + priority, + tos, + tclass +]). +-define(VALID_PROXY_PROTOCOLS, [http, https, socks5]). +-define(CONFIG_DEFAULTS, [ + {"worker_processes", "4", fun list_to_integer/1}, + {"worker_batch_size", "500", fun list_to_integer/1}, + {"http_connections", "20", fun list_to_integer/1}, + {"connection_timeout", "30000", fun list_to_integer/1}, + {"retries_per_request", "5", fun list_to_integer/1}, + {"use_checkpoints", "true", fun list_to_existing_atom/1}, + {"checkpoint_interval", "30000", fun list_to_integer/1}, + {"socket_options", ?DEFAULT_SOCK_OPTS, fun parse_sock_opts/1} +]). + + +-spec parse_rep_doc({[_]}) -> #{}. +parse_rep_doc(RepDoc) -> + {ok, Rep} = try + parse_rep(RepDoc, null) + catch + throw:{error, Reason} -> + Stack = erlang:get_stacktrace(), + LogErr1 = "~p parse_rep_doc fail ~p ~p", + couch_log:error(LogErr1, [?MODULE, Reason, Stack]), + throw({bad_rep_doc, Reason}); + Tag:Err -> + Stack = erlang:get_stacktrace(), + LogErr2 = "~p parse_rep_doc fail ~p:~p ~p", + couch_log:error(LogErr2, [?MODULE, Tag, Err, Stack]), + throw({bad_rep_doc, couch_util:to_binary({Tag, Err})}) + end, + Rep. + + +-spec parse_transient_rep({[_]} | #{}, user_name()) -> {ok, #{}}. +parse_transient_rep({Props} = EJson, UserName) when is_list(Props) -> + Str = couch_util:json_encode(EJson), + Map = couch_util:json_decode(Str, [return_maps]), + parse_transient_rep(Map, UserName); + +parse_transient_rep(#{} = Body, UserName) -> + {ok, Rep} = try + parse_rep(Body, UserName) + catch + throw:{error, Reason} -> + Stack = erlang:get_stacktrace(), + LogErr1 = "~p parse_transient_rep fail ~p ~p", + couch_log:error(LogErr1, [?MODULE, Reason, Stack]), + throw({bad_request, Reason}); + Tag:Err -> + Stack = erlang:get_stacktrace(), + LogErr2 = "~p parse_transient_rep fail ~p ~p", + couch_log:error(LogErr2, [?MODULE, Tag, Err, Stack]), + throw({bad_request, couch_util:to_binary({Tag, Err})}) + end, + #{?OPTIONS := Options} = Rep, + Cancel = maps:get(<<"cancel">>, Options, false), + Id = maps:get(<<"id">>, Options, nil), + case {Cancel, Id} of + {true, nil} -> + % Cancel request with no id, must parse id out of body contents + JobId = couch_replicator_ids:job_id(Rep, null, null), + {ok, JobId, Rep}; + {true, Id} -> + % Cancel request with an id specified, so do not parse id from body + {ok, Id, Rep}; + {false, _Id} -> + JobId = couch_replicator_ids:job_id(Rep, null, null), + % Not a cancel request, regular replication doc + {ok, JobId, Rep} + end. + + +-spec parse_rep({[_]} | #{}, user_name()) -> {ok, #{}}. +parse_rep({Props} = EJson, UserName) when is_list(Props) -> + Str = couch_util:json_encode(EJson), + Map = couch_util:json_decode(Str, [return_maps]), + parse_rep(Map, UserName); + +parse_rep(#{} = Doc, UserName) -> + {SrcProxy, TgtProxy} = parse_proxy_settings(Doc), + Opts = make_options(Doc), + Cancel = maps:get(<<"cancel">>, Opts, false), + Id = maps:get(<<"id">>, Opts, nil), + case Cancel andalso Id =/= nil of + true -> + {ok, #{?OPTIONS => Opts, ?REP_USER => UserName}}; + false -> + case {maps:is_key(?SOURCE, Doc), maps:is_key(?TARGET, Doc)} of + {false, _} -> throw({error, <<"Missing `source` field">>}); + {_, false} -> throw({error, <<"Missing `target` field">>}); + {true, true} -> ok + end, + #{?SOURCE := Source0, ?TARGET := Target0} = Doc, + Source = parse_rep_db(Source0, SrcProxy, Opts), + Target = parse_rep_db(Target0, TgtProxy, Opts), + case couch_replicator_filters:view_type(Doc, Opts) of + {error, Error} -> throw({error, Error}); + _ -> ok + end, + case couch_replicator_filters:parse(Opts) of + {ok, _} -> ok; + {error, FilterError} -> throw({error, FilterError}) + end, + Rep = #{ + ?SOURCE => Source, + ?TARGET => Target, + ?OPTIONS => Opts, + ?REP_USER => UserName, + ?START_TIME => erlang:system_time(second) + }, + {ok, Rep} + end. + + +-spec parse_rep_db(#{}, #{}, #{}) -> #{}. +parse_rep_db(#{} = Endpoint, #{} = ProxyParams, #{} = Options) -> + ProxyUrl = case ProxyParams of + #{<<"proxy_url">> := PUrl} -> PUrl; + _ -> null + end, + + Url0 = maps:get(<<"url">>, Endpoint), + Url = maybe_add_trailing_slash(Url0), + + AuthProps = maps:get(<<"auth">>, Endpoint, #{}), + if is_map(AuthProps) -> ok; true -> + throw({error, "if defined, `auth` must be an object"}) + end, + + Headers0 = maps:get(<<"headers">>, Endpoint, #{}), + if is_map(Headers0) -> ok; true -> + throw({error, "if defined `headers` must be an object"}) + end, + DefaultHeaders = couch_replicator_utils:default_headers_map(), + Headers = maps:merge(DefaultHeaders, Headers0), + + SockOpts = maps:get(<<"socket_options">>, Options, #{}), + SockAndProxy = maps:merge(#{ + <<"socket_options">> => SockOpts + }, ProxyParams), + SslParams = ssl_params(Url), + + #{ + <<"url">> => Url, + <<"auth_props">> => AuthProps, + <<"headers">> => Headers, + <<"ibrowse_options">> => maps:merge(SslParams, SockAndProxy), + <<"timeout">> => maps:get(<<"connection_timeout">>, Options), + <<"http_connections">> => maps:get(<<"http_connections">>, Options), + <<"retries">> => maps:get(<<"retries_per_request">>, Options), + <<"proxy_url">> => ProxyUrl + }; + +parse_rep_db(<<"http://", _/binary>> = Url, Proxy, Options) -> + parse_rep_db(#{<<"url">> => Url}, Proxy, Options); + +parse_rep_db(<<"https://", _/binary>> = Url, Proxy, Options) -> + parse_rep_db(#{<<"url">> => Url}, Proxy, Options); + +parse_rep_db(<<_/binary>>, _Proxy, _Options) -> + throw({error, local_endpoints_not_supported}); + +parse_rep_db(undefined, _Proxy, _Options) -> + throw({error, <<"Missing replication endpoint">>}). + + +parse_proxy_settings(#{} = Doc) -> + Proxy = maps:get(?PROXY, Doc, <<>>), + SrcProxy = maps:get(?SOURCE_PROXY, Doc, <<>>), + TgtProxy = maps:get(?TARGET_PROXY, Doc, <<>>), + + case Proxy =/= <<>> of + true when SrcProxy =/= <<>> -> + Error = "`proxy` is mutually exclusive with `source_proxy`", + throw({error, Error}); + true when TgtProxy =/= <<>> -> + Error = "`proxy` is mutually exclusive with `target_proxy`", + throw({error, Error}); + true -> + {parse_proxy_params(Proxy), parse_proxy_params(Proxy)}; + false -> + {parse_proxy_params(SrcProxy), parse_proxy_params(TgtProxy)} + end. + + +-spec maybe_add_trailing_slash(binary()) -> binary(). +maybe_add_trailing_slash(<<>>) -> + <<>>; + +maybe_add_trailing_slash(Url) when is_binary(Url) -> + case binary:match(Url, <<"?">>) of + nomatch -> + case binary:last(Url) of + $/ -> Url; + _ -> <<Url/binary, "/">> + end; + _ -> + Url % skip if there are query params + end. + + +-spec make_options(#{}) -> #{}. +make_options(#{} = RepDoc) -> + Options0 = convert_options(RepDoc), + Options = check_options(Options0), + ConfigOptions = lists:foldl(fun({K, Default, ConversionFun}, Acc) -> + V = ConversionFun(config:get("replicator", K, Default)), + Acc#{list_to_binary(K) => V} + end, #{}, ?CONFIG_DEFAULTS), + maps:merge(ConfigOptions, Options). + + +-spec convert_options(#{}) -> #{} | no_return(). +convert_options(#{} = Doc) -> + maps:fold(fun convert_fold/3, #{}, Doc). + + +-spec convert_fold(binary(), any(), #{}) -> #{}. +convert_fold(<<"cancel">>, V, Acc) when is_boolean(V) -> + Acc#{<<"cancel">> => V}; +convert_fold(<<"cancel">>, _, _) -> + throw({error, <<"`cancel` must be a boolean">>}); +convert_fold(IdOpt, V, Acc) when IdOpt =:= <<"_local_id">>; + IdOpt =:= <<"replication_id">>; IdOpt =:= <<"id">> -> + Acc#{<<"id">> => couch_replicator_ids:convert(V)}; +convert_fold(<<"create_target">>, V, Acc) when is_boolean(V) -> + Acc#{<<"create_target">> => V}; +convert_fold(<<"create_target">>, _, _) -> + throw({error, <<"`create_target` must be a boolean">>}); +convert_fold(<<"create_target_params">>, #{} = V, Acc) -> + Acc#{<<"create_target_params">> => V}; +convert_fold(<<"create_target_params">>, _, _) -> + throw({error, <<"`create_target_params` must be an object">>}); +convert_fold(<<"continuous">>, V, Acc) when is_boolean(V) -> + Acc#{<<"continuous">> => V}; +convert_fold(<<"continuous">>, _, _) -> + throw({error, <<"`continuous` must be a boolean">>}); +convert_fold(<<"filter">>, V, Acc) when is_binary(V), byte_size(V) > 1 -> + Acc#{<<"filter">> => V}; +convert_fold(<<"filter">>, _, _) -> + throw({error, <<"`filter` must be a string">>}); +convert_fold(<<"query_params">>, V, Acc) when is_map(V) orelse V =:= null -> + Acc#{<<"query_params">> => V}; +convert_fold(<<"query_params">>, _, _Acc) -> + throw({error, <<"`query_params` is not `null` or object">>}); +convert_fold(<<"doc_ids">>, null, Acc) -> + Acc; +convert_fold(<<"doc_ids">>, V, Acc) when is_list(V) -> + % Compatibility behaviour as: accept a list of percent encoded doc IDs + Ids = lists:map(fun(Id) -> + case is_binary(Id) andalso byte_size(Id) > 0 of + true -> list_to_binary(couch_httpd:unquote(Id)); + false -> throw({error, <<"`doc_ids` array must contain strings">>}) + end + end, V), + Acc#{<<"doc_ids">> => lists:usort(Ids)}; +convert_fold(<<"doc_ids">>, _, _) -> + throw({error, <<"`doc_ids` must be an array">>}); +convert_fold(<<"selector">>, #{} = V, Acc) -> + Acc#{<<"selector">> => V}; +convert_fold(<<"selector">>, _, _Acc) -> + throw({error, <<"`selector` must be a JSON object">>}); +convert_fold(<<"worker_processes">>, V, Acc) -> + Acc#{<<"worker_processes">> => bin2int(V, <<"worker_processes">>)}; +convert_fold(<<"worker_batch_size">>, V, Acc) -> + Acc#{<<"worker_batch_size">> => bin2int(V, <<"worker_batch_size">>)}; +convert_fold(<<"http_connections">>, V, Acc) -> + Acc#{<<"http_connections">> => bin2int(V, <<"http_connections">>)}; +convert_fold(<<"connection_timeout">>, V, Acc) -> + Acc#{<<"connection_timeout">> => bin2int(V, <<"connection_timeout">>)}; +convert_fold(<<"retries_per_request">>, V, Acc) -> + Acc#{<<"retries_per_request">> => bin2int(V, <<"retries_per_request">>)}; +convert_fold(<<"socket_options">>, V, Acc) -> + Acc#{<<"socket_options">> => parse_sock_opts(V)}; +convert_fold(<<"since_seq">>, V, Acc) -> + Acc#{<<"since_seq">> => V}; +convert_fold(<<"use_checkpoints">>, V, Acc) when is_boolean(V) -> + Acc#{<<"use_checkpoints">> => V}; +convert_fold(<<"use_checkpoints">>, _, _) -> + throw({error, <<"`use_checkpoints` must be a boolean">>}); +convert_fold(<<"checkpoint_interval">>, V, Acc) -> + Acc#{<<"checkpoint_interval">> => bin2int(V, <<"checkpoint_interval">>)}; +convert_fold(_K, _V, Acc) -> % skip unknown option + Acc. + + +bin2int(V, _Field) when is_integer(V) -> + V; + +bin2int(V, Field) when is_binary(V) -> + try + erlang:binary_to_integer(V) + catch + error:badarg -> + throw({error, <<"`", Field/binary, "` must be an integer">>}) + end; + +bin2int(_V, Field) -> + throw({error, <<"`", Field/binary, "` must be an integer">>}). + + +-spec check_options(#{}) -> #{}. +check_options(Options) -> + DocIds = maps:is_key(<<"doc_ids">>, Options), + Filter = maps:is_key(<<"filter">>, Options), + Selector = maps:is_key(<<"selector">>, Options), + case {DocIds, Filter, Selector} of + {false, false, false} -> Options; + {false, false, _} -> Options; + {false, _, false} -> Options; + {_, false, false} -> Options; + _ -> throw({error, <<"`doc_ids`,`filter`,`selector` are mutually " + " exclusive">>}) + end. + + +parse_sock_opts(Term) -> + {ok, SocketOptions} = couch_util:parse_term(Term), + lists:foldl(fun + ({K, V}, Acc) when is_atom(K) -> + case lists:member(K, ?VALID_SOCK_OPTS) of + true -> Acc#{atom_to_binary(K, utf8) => V}; + false -> Acc + end; + (_, Acc) -> + Acc + end, #{}, SocketOptions). + + +-spec parse_proxy_params(binary() | #{}) -> #{}. +parse_proxy_params(<<>>) -> + #{}; +parse_proxy_params(ProxyUrl) when is_binary(ProxyUrl)-> + #url{ + host = Host, + port = Port, + username = User, + password = Passwd, + protocol = Prot0 + } = ibrowse_lib:parse_url(binary_to_list(ProxyUrl)), + Prot = case lists:member(Prot0, ?VALID_PROXY_PROTOCOLS) of + true -> atom_to_binary(Prot0, utf8); + false -> throw({error, <<"Unsupported proxy protocol">>}) + end, + ProxyParams = #{ + <<"proxy_url">> => ProxyUrl, + <<"proxy_protocol">> => Prot, + <<"proxy_host">> => list_to_binary(Host), + <<"proxy_port">> => Port + }, + case is_list(User) andalso is_list(Passwd) of + true -> + ProxyParams#{ + <<"proxy_user">> => list_to_binary(User), + <<"proxy_password">> => list_to_binary(Passwd) + }; + false -> + ProxyParams + end. + + +-spec ssl_params(binary()) -> #{}. +ssl_params(Url) -> + case ibrowse_lib:parse_url(binary_to_list(Url)) of + #url{protocol = https} -> + Depth = list_to_integer( + config:get("replicator", "ssl_certificate_max_depth", "3") + ), + VerifyCerts = config:get("replicator", "verify_ssl_certificates"), + CertFile = config:get("replicator", "cert_file", null), + KeyFile = config:get("replicator", "key_file", null), + Password = config:get("replicator", "password", null), + VerifySslOptions = ssl_verify_options(VerifyCerts =:= "true"), + SslOpts = maps:merge(VerifySslOptions, #{<<"depth">> => Depth}), + HaveCertAndKey = CertFile /= null andalso KeyFile /= null, + SslOpts1 = case HaveCertAndKey of false -> SslOpts; true -> + CertOpts0 = #{ + <<"certfile">> => list_to_binary(CertFile), + <<"keyfile">> => list_to_binary(KeyFile) + }, + CertOpts = case Password of null -> CertOpts0; _ -> + CertOpts0#{<<"password">> => list_to_binary(Password)} + end, + maps:merge(SslOpts, CertOpts) + end, + #{<<"is_ssl">> => true, <<"ssl_options">> => SslOpts1}; + #url{protocol = http} -> + #{} + end. + + +-spec ssl_verify_options(true | false) -> [_]. +ssl_verify_options(true) -> + case config:get("replicator", "ssl_trusted_certificates_file") of + undefined -> + #{ + <<"verify">> => <<"verify_peer">>, + <<"cacertfile">> => null + }; + CAFile when is_list(CAFile) -> + #{ + <<"verify">> => <<"verify_peer">>, + <<"cacertfile">> => list_to_binary(CAFile) + } + end; + +ssl_verify_options(false) -> + #{ + <<"verify">> => <<"verify_none">> + }. + + +-ifdef(TEST). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +check_options_pass_values_test() -> + ?assertEqual(check_options(#{}), #{}), + ?assertEqual(check_options(#{<<"baz">> => <<"foo">>}), + #{<<"baz">> => <<"foo">>}), + ?assertEqual(check_options(#{<<"doc_ids">> => [<<"x">>]}), + #{<<"doc_ids">> => [<<"x">>]}), + ?assertEqual(check_options(#{<<"filter">> => <<"f">>}), + #{<<"filter">> => <<"f">>}), + ?assertEqual(check_options(#{<<"selector">> => <<"s">>}), + #{<<"selector">> => <<"s">>}). + + +check_options_fail_values_test() -> + ?assertThrow({error, _}, + check_options(#{<<"doc_ids">> => [], <<"filter">> => <<"f">>})), + ?assertThrow({error, _}, + check_options(#{<<"doc_ids">> => [], <<"selector">> => <<"s">>})), + ?assertThrow({error, _}, + check_options(#{<<"filter">> => <<"f">>, <<"selector">> => <<"s">>})), + ?assertThrow({error, _}, + check_options(#{ + <<"doc_ids">> => [], + <<"filter">> => <<"f">>, + <<"selector">> => <<"s">>} + )). + + +check_convert_options_pass_test() -> + ?assertEqual(#{}, convert_options(#{})), + ?assertEqual(#{}, convert_options(#{<<"random">> => 42})), + ?assertEqual(#{<<"cancel">> => true}, + convert_options(#{<<"cancel">> => true})), + ?assertEqual(#{<<"create_target">> => true}, + convert_options(#{<<"create_target">> => true})), + ?assertEqual(#{<<"continuous">> => true}, + convert_options(#{<<"continuous">> => true})), + ?assertEqual(#{<<"doc_ids">> => [<<"id">>]}, + convert_options(#{<<"doc_ids">> => [<<"id">>]})), + ?assertEqual(#{<<"selector">> => #{<<"key">> => <<"value">>}}, + convert_options(#{<<"selector">> => #{<<"key">> => <<"value">>}})). + + +check_convert_options_fail_test() -> + ?assertThrow({error, _}, + convert_options(#{<<"cancel">> => <<"true">>})), + ?assertThrow({error, _}, + convert_options(#{<<"create_target">> => <<"true">>})), + ?assertThrow({error, _}, + convert_options(#{<<"continuous">> => <<"true">>})), + ?assertThrow({error, _}, + convert_options(#{<<"doc_ids">> => <<"not_a_list">>})), + ?assertThrow({error, _}, + convert_options(#{<<"selector">> => <<"bad">>})). + + +local_replication_endpoint_error_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_error_on_local_endpoint) + ] + }. + + +setup() -> + meck:expect(config, get, fun(_, _, Default) -> Default end). + + +teardown(_) -> + meck:unload(). + + +t_error_on_local_endpoint(_) -> + RepDoc = {[ + {<<"_id">>, <<"someid">>}, + {<<"source">>, <<"localdb">>}, + {<<"target">>, <<"http://somehost.local/tgt">>} + ]}, + Expect = local_endpoints_not_supported, + ?assertThrow({bad_rep_doc, Expect}, parse_rep_doc(RepDoc)). + + +-endif. diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl b/src/couch_replicator/src/couch_replicator_scheduler.erl deleted file mode 100644 index 53c040e8c..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler.erl +++ /dev/null @@ -1,1687 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_scheduler). - --behaviour(gen_server). --behaviour(config_listener). - --export([ - start_link/0 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3, - format_status/2 -]). - --export([ - add_job/1, - remove_job/1, - reschedule/0, - rep_state/1, - find_jobs_by_dbname/1, - find_jobs_by_doc/2, - job_summary/2, - health_threshold/0, - jobs/0, - job/1, - restart_job/1, - update_job_stats/2 -]). - -%% config_listener callbacks --export([ - handle_config_change/5, - handle_config_terminate/3 -]). - -%% for status updater process to allow hot code loading --export([ - stats_updater_loop/1 -]). - --include("couch_replicator_scheduler.hrl"). --include("couch_replicator.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --include_lib("couch/include/couch_db.hrl"). - -%% types --type event_type() :: added | started | stopped | {crashed, any()}. --type event() :: {Type:: event_type(), When :: erlang:timestamp()}. --type history() :: nonempty_list(event()). - -%% definitions --define(MAX_BACKOFF_EXPONENT, 10). --define(BACKOFF_INTERVAL_MICROS, 30 * 1000 * 1000). --define(DEFAULT_HEALTH_THRESHOLD_SEC, 2 * 60). --define(RELISTEN_DELAY, 5000). --define(STATS_UPDATE_WAIT, 5000). - --define(DEFAULT_MAX_JOBS, 500). --define(DEFAULT_MAX_CHURN, 20). --define(DEFAULT_MAX_HISTORY, 20). --define(DEFAULT_SCHEDULER_INTERVAL, 60000). - - --record(state, {interval, timer, max_jobs, max_churn, max_history, stats_pid}). --record(job, { - id :: job_id() | '$1' | '_', - rep :: #rep{} | '_', - pid :: undefined | pid() | '$1' | '_', - monitor :: undefined | reference() | '_', - history :: history() | '_' -}). - --record(stats_acc, { - pending_n = 0 :: non_neg_integer(), - running_n = 0 :: non_neg_integer(), - crashed_n = 0 :: non_neg_integer() -}). - - -%% public functions - --spec start_link() -> {ok, pid()} | ignore | {error, term()}. -start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). - - --spec add_job(#rep{}) -> ok. -add_job(#rep{} = Rep) when Rep#rep.id /= undefined -> - case existing_replication(Rep) of - false -> - Job = #job{ - id = Rep#rep.id, - rep = Rep, - history = [{added, os:timestamp()}] - }, - gen_server:call(?MODULE, {add_job, Job}, infinity); - true -> - ok - end. - - --spec remove_job(job_id()) -> ok. -remove_job(Id) -> - gen_server:call(?MODULE, {remove_job, Id}, infinity). - - --spec reschedule() -> ok. -% Trigger a manual reschedule. Used for testing and/or ops. -reschedule() -> - gen_server:call(?MODULE, reschedule, infinity). - - --spec rep_state(rep_id()) -> #rep{} | nil. -rep_state(RepId) -> - case (catch ets:lookup_element(?MODULE, RepId, #job.rep)) of - {'EXIT',{badarg, _}} -> - nil; - Rep -> - Rep - end. - - --spec job_summary(job_id(), non_neg_integer()) -> [_] | nil. -job_summary(JobId, HealthThreshold) -> - case job_by_id(JobId) of - {ok, #job{pid = Pid, history = History, rep = Rep}} -> - ErrorCount = consecutive_crashes(History, HealthThreshold), - {State, Info} = case {Pid, ErrorCount} of - {undefined, 0} -> - case History of - [{{crashed, Error}, _When} | _] -> - {crashing, crash_reason_json(Error)}; - [_ | _] -> - {pending, Rep#rep.stats} - end; - {undefined, ErrorCount} when ErrorCount > 0 -> - [{{crashed, Error}, _When} | _] = History, - {crashing, crash_reason_json(Error)}; - {Pid, ErrorCount} when is_pid(Pid) -> - {running, Rep#rep.stats} - end, - [ - {source, iolist_to_binary(ejson_url(Rep#rep.source))}, - {target, iolist_to_binary(ejson_url(Rep#rep.target))}, - {state, State}, - {info, couch_replicator_utils:ejson_state_info(Info)}, - {error_count, ErrorCount}, - {last_updated, last_updated(History)}, - {start_time, - couch_replicator_utils:iso8601(Rep#rep.start_time)}, - {source_proxy, job_proxy_url(Rep#rep.source)}, - {target_proxy, job_proxy_url(Rep#rep.target)} - ]; - {error, not_found} -> - nil % Job might have just completed - end. - - -job_proxy_url(#httpdb{proxy_url = ProxyUrl}) when is_list(ProxyUrl) -> - list_to_binary(couch_util:url_strip_password(ProxyUrl)); -job_proxy_url(_Endpoint) -> - null. - - -% Health threshold is the minimum amount of time an unhealthy job should run -% crashing before it is considered to be healthy again. HealtThreashold should -% not be 0 as jobs could start and immediately crash, and it shouldn't be -% infinity, since then consecutive crashes would accumulate forever even if -% job is back to normal. --spec health_threshold() -> non_neg_integer(). -health_threshold() -> - config:get_integer("replicator", "health_threshold", - ?DEFAULT_HEALTH_THRESHOLD_SEC). - - --spec find_jobs_by_dbname(binary()) -> list(#rep{}). -find_jobs_by_dbname(DbName) -> - Rep = #rep{db_name = DbName, _ = '_'}, - MatchSpec = #job{id = '$1', rep = Rep, _ = '_'}, - [RepId || [RepId] <- ets:match(?MODULE, MatchSpec)]. - - --spec find_jobs_by_doc(binary(), binary()) -> list(#rep{}). -find_jobs_by_doc(DbName, DocId) -> - Rep = #rep{db_name = DbName, doc_id = DocId, _ = '_'}, - MatchSpec = #job{id = '$1', rep = Rep, _ = '_'}, - [RepId || [RepId] <- ets:match(?MODULE, MatchSpec)]. - - --spec restart_job(binary() | list() | rep_id()) -> - {ok, {[_]}} | {error, not_found}. -restart_job(JobId) -> - case rep_state(JobId) of - nil -> - {error, not_found}; - #rep{} = Rep -> - ok = remove_job(JobId), - ok = add_job(Rep), - job(JobId) - end. - - --spec update_job_stats(job_id(), term()) -> ok. -update_job_stats(JobId, Stats) -> - gen_server:cast(?MODULE, {update_job_stats, JobId, Stats}). - - -%% gen_server functions - -init(_) -> - config:enable_feature('scheduler'), - EtsOpts = [named_table, {keypos, #job.id}, {read_concurrency, true}, - {write_concurrency, true}], - ?MODULE = ets:new(?MODULE, EtsOpts), - ok = config:listen_for_changes(?MODULE, nil), - Interval = config:get_integer("replicator", "interval", - ?DEFAULT_SCHEDULER_INTERVAL), - MaxJobs = config:get_integer("replicator", "max_jobs", ?DEFAULT_MAX_JOBS), - MaxChurn = config:get_integer("replicator", "max_churn", - ?DEFAULT_MAX_CHURN), - MaxHistory = config:get_integer("replicator", "max_history", - ?DEFAULT_MAX_HISTORY), - Timer = erlang:send_after(Interval, self(), reschedule), - State = #state{ - interval = Interval, - max_jobs = MaxJobs, - max_churn = MaxChurn, - max_history = MaxHistory, - timer = Timer, - stats_pid = start_stats_updater() - }, - {ok, State}. - - -handle_call({add_job, Job}, _From, State) -> - ok = maybe_remove_job_int(Job#job.id, State), - true = add_job_int(Job), - ok = maybe_start_newly_added_job(Job, State), - couch_stats:increment_counter([couch_replicator, jobs, adds]), - TotalJobs = ets:info(?MODULE, size), - couch_stats:update_gauge([couch_replicator, jobs, total], TotalJobs), - {reply, ok, State}; - -handle_call({remove_job, Id}, _From, State) -> - ok = maybe_remove_job_int(Id, State), - {reply, ok, State}; - -handle_call(reschedule, _From, State) -> - ok = reschedule(State), - {reply, ok, State}; - -handle_call(_, _From, State) -> - {noreply, State}. - - -handle_cast({set_max_jobs, MaxJobs}, State) when is_integer(MaxJobs), - MaxJobs >= 0 -> - couch_log:notice("~p: max_jobs set to ~B", [?MODULE, MaxJobs]), - {noreply, State#state{max_jobs = MaxJobs}}; - -handle_cast({set_max_churn, MaxChurn}, State) when is_integer(MaxChurn), - MaxChurn > 0 -> - couch_log:notice("~p: max_churn set to ~B", [?MODULE, MaxChurn]), - {noreply, State#state{max_churn = MaxChurn}}; - -handle_cast({set_max_history, MaxHistory}, State) when is_integer(MaxHistory), - MaxHistory > 0 -> - couch_log:notice("~p: max_history set to ~B", [?MODULE, MaxHistory]), - {noreply, State#state{max_history = MaxHistory}}; - -handle_cast({set_interval, Interval}, State) when is_integer(Interval), - Interval > 0 -> - couch_log:notice("~p: interval set to ~B", [?MODULE, Interval]), - {noreply, State#state{interval = Interval}}; - -handle_cast({update_job_stats, JobId, Stats}, State) -> - case rep_state(JobId) of - nil -> - ok; - #rep{} = Rep -> - NewRep = Rep#rep{stats = Stats}, - true = ets:update_element(?MODULE, JobId, {#job.rep, NewRep}) - end, - {noreply, State}; - -handle_cast(UnexpectedMsg, State) -> - couch_log:error("~p: received un-expected cast ~p", [?MODULE, UnexpectedMsg]), - {noreply, State}. - - -handle_info(reschedule, State) -> - ok = reschedule(State), - erlang:cancel_timer(State#state.timer), - Timer = erlang:send_after(State#state.interval, self(), reschedule), - {noreply, State#state{timer = Timer}}; - -handle_info({'DOWN', _Ref, process, Pid, normal}, State) -> - {ok, Job} = job_by_pid(Pid), - couch_log:notice("~p: Job ~p completed normally", [?MODULE, Job#job.id]), - remove_job_int(Job), - update_running_jobs_stats(State#state.stats_pid), - {noreply, State}; - -handle_info({'DOWN', _Ref, process, Pid, Reason0}, State) -> - {ok, Job} = job_by_pid(Pid), - Reason = case Reason0 of - {shutdown, ShutdownReason} -> ShutdownReason; - Other -> Other - end, - ok = handle_crashed_job(Job, Reason, State), - {noreply, State}; - -handle_info(restart_config_listener, State) -> - ok = config:listen_for_changes(?MODULE, nil), - {noreply, State}; - -handle_info(_, State) -> - {noreply, State}. - - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - - -terminate(_Reason, _State) -> - ok. - - -format_status(_Opt, [_PDict, State]) -> - [ - {max_jobs, State#state.max_jobs}, - {running_jobs, running_job_count()}, - {pending_jobs, pending_job_count()} - ]. - - -%% config listener functions - -handle_config_change("replicator", "max_jobs", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_max_jobs, list_to_integer(V)}), - {ok, S}; - -handle_config_change("replicator", "max_churn", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_max_churn, list_to_integer(V)}), - {ok, S}; - -handle_config_change("replicator", "interval", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_interval, list_to_integer(V)}), - {ok, S}; - -handle_config_change("replicator", "max_history", V, _, S) -> - ok = gen_server:cast(?MODULE, {set_max_history, list_to_integer(V)}), - {ok, S}; - -handle_config_change(_, _, _, _, S) -> - {ok, S}. - - -handle_config_terminate(_, stop, _) -> - ok; - -handle_config_terminate(_, _, _) -> - Pid = whereis(?MODULE), - erlang:send_after(?RELISTEN_DELAY, Pid, restart_config_listener). - - -%% Private functions - -% Handle crashed jobs. Handling differs between transient and permanent jobs. -% Transient jobs are those posted to the _replicate endpoint. They don't have a -% db associated with them. When those jobs crash, they are not restarted. That -% is also consistent with behavior when the node they run on, crashed and they -% do not migrate to other nodes. Permanent jobs are those created from -% replicator documents. Those jobs, once they pass basic validation and end up -% in the scheduler will be retried indefinitely (with appropriate exponential -% backoffs). --spec handle_crashed_job(#job{}, any(), #state{}) -> ok. -handle_crashed_job(#job{rep = #rep{db_name = null}} = Job, Reason, State) -> - Msg = "~p : Transient job ~p failed, removing. Error: ~p", - ErrorBinary = couch_replicator_utils:rep_error_to_binary(Reason), - couch_log:error(Msg, [?MODULE, Job#job.id, ErrorBinary]), - remove_job_int(Job), - update_running_jobs_stats(State#state.stats_pid), - ok; - -handle_crashed_job(Job, Reason, State) -> - ok = update_state_crashed(Job, Reason, State), - case couch_replicator_doc_processor:update_docs() of - true -> - couch_replicator_docs:update_error(Job#job.rep, Reason); - false -> - ok - end, - case ets:info(?MODULE, size) < State#state.max_jobs of - true -> - % Starting pending jobs is an O(TotalJobsCount) operation. Only do - % it if there is a relatively small number of jobs. Otherwise - % scheduler could be blocked if there is a cascade of lots failing - % jobs in a row. - start_pending_jobs(State), - update_running_jobs_stats(State#state.stats_pid), - ok; - false -> - ok - end. - - -% Attempt to start a newly added job. First quickly check if total jobs -% already exceed max jobs, then do a more expensive check which runs a -% select (an O(n) operation) to check pending jobs specifically. --spec maybe_start_newly_added_job(#job{}, #state{}) -> ok. -maybe_start_newly_added_job(Job, State) -> - MaxJobs = State#state.max_jobs, - TotalJobs = ets:info(?MODULE, size), - case TotalJobs < MaxJobs andalso running_job_count() < MaxJobs of - true -> - start_job_int(Job, State), - update_running_jobs_stats(State#state.stats_pid), - ok; - false -> - ok - end. - - -% Return up to a given number of oldest, not recently crashed jobs. Try to be -% memory efficient and use ets:foldl to accumulate jobs. --spec pending_jobs(non_neg_integer()) -> [#job{}]. -pending_jobs(0) -> - % Handle this case as user could set max_churn to 0. If this is passed to - % other function clause it will crash as gb_sets:largest assumes set is not - % empty. - []; - -pending_jobs(Count) when is_integer(Count), Count > 0 -> - Set0 = gb_sets:new(), % [{LastStart, Job},...] - Now = os:timestamp(), - Acc0 = {Set0, Now, Count, health_threshold()}, - {Set1, _, _, _} = ets:foldl(fun pending_fold/2, Acc0, ?MODULE), - [Job || {_Started, Job} <- gb_sets:to_list(Set1)]. - - -pending_fold(Job, {Set, Now, Count, HealthThreshold}) -> - Set1 = case {not_recently_crashed(Job, Now, HealthThreshold), - gb_sets:size(Set) >= Count} of - {true, true} -> - % Job is healthy but already reached accumulated limit, so might - % have to replace one of the accumulated jobs - pending_maybe_replace(Job, Set); - {true, false} -> - % Job is healthy and we haven't reached the limit, so add job - % to accumulator - gb_sets:add_element({last_started(Job), Job}, Set); - {false, _} -> - % This job is not healthy (has crashed too recently), so skip it. - Set - end, - {Set1, Now, Count, HealthThreshold}. - - -% Replace Job in the accumulator if it is older than youngest job there. -% "oldest" here means one which has been waiting to run the longest. "youngest" -% means the one with most recent activity. The goal is to keep up to Count -% oldest jobs during iteration. For example if there are jobs with these times -% accumulated so far [5, 7, 11], and start time of current job is 6. Then -% 6 < 11 is true, so 11 (youngest) is dropped and 6 inserted resulting in -% [5, 6, 7]. In the end the result might look like [1, 2, 5], for example. -pending_maybe_replace(Job, Set) -> - Started = last_started(Job), - {Youngest, YoungestJob} = gb_sets:largest(Set), - case Started < Youngest of - true -> - Set1 = gb_sets:delete({Youngest, YoungestJob}, Set), - gb_sets:add_element({Started, Job}, Set1); - false -> - Set - end. - - -start_jobs(Count, State) -> - [start_job_int(Job, State) || Job <- pending_jobs(Count)], - ok. - - --spec stop_jobs(non_neg_integer(), boolean(), #state{}) -> non_neg_integer(). -stop_jobs(Count, _, _) when is_integer(Count), Count =< 0 -> - 0; - -stop_jobs(Count, IsContinuous, State) when is_integer(Count) -> - Running0 = running_jobs(), - ContinuousPred = fun(Job) -> is_continuous(Job) =:= IsContinuous end, - Running1 = lists:filter(ContinuousPred, Running0), - Running2 = lists:sort(fun longest_running/2, Running1), - Running3 = lists:sublist(Running2, Count), - length([stop_job_int(Job, State) || Job <- Running3]). - - -longest_running(#job{} = A, #job{} = B) -> - last_started(A) =< last_started(B). - - -not_recently_crashed(#job{history = History}, Now, HealthThreshold) -> - case History of - [{added, _When}] -> - true; - [{stopped, _When} | _] -> - true; - _ -> - LatestCrashT = latest_crash_timestamp(History), - CrashCount = consecutive_crashes(History, HealthThreshold), - timer:now_diff(Now, LatestCrashT) >= backoff_micros(CrashCount) - end. - - -% Count consecutive crashes. A crash happens when there is a `crashed` event -% within a short period of time (configurable) after any other event. It could -% be `crashed, started` for jobs crashing quickly after starting, `crashed, -% crashed`, `crashed, stopped` if job repeatedly failed to start -% being stopped. Or it could be `crashed, added` if it crashed immediately after -% being added during start. -% -% A streak of "consecutive crashes" ends when a crashed event is seen starting -% and running successfully without crashing for a period of time. That period -% of time is the HealthThreshold. -% - --spec consecutive_crashes(history(), non_neg_integer()) -> non_neg_integer(). -consecutive_crashes(History, HealthThreshold) when is_list(History) -> - consecutive_crashes(History, HealthThreshold, 0). - - --spec consecutive_crashes(history(), non_neg_integer(), non_neg_integer()) -> - non_neg_integer(). -consecutive_crashes([], _HealthThreashold, Count) -> - Count; - -consecutive_crashes([{{crashed, _}, CrashT}, {_, PrevT} = PrevEvent | Rest], - HealthThreshold, Count) -> - case timer:now_diff(CrashT, PrevT) > HealthThreshold * 1000000 of - true -> - Count; - false -> - consecutive_crashes([PrevEvent | Rest], HealthThreshold, Count + 1) - end; - -consecutive_crashes([{stopped, _}, {started, _} | _], _HealthThreshold, - Count) -> - Count; - -consecutive_crashes([_ | Rest], HealthThreshold, Count) -> - consecutive_crashes(Rest, HealthThreshold, Count). - - --spec latest_crash_timestamp(history()) -> erlang:timestamp(). -latest_crash_timestamp([]) -> - {0, 0, 0}; % Used to avoid special-casing "no crash" when doing now_diff - -latest_crash_timestamp([{{crashed, _Reason}, When} | _]) -> - When; - -latest_crash_timestamp([_Event | Rest]) -> - latest_crash_timestamp(Rest). - - --spec backoff_micros(non_neg_integer()) -> non_neg_integer(). -backoff_micros(CrashCount) -> - % When calculating the backoff interval treat consecutive crash count as the - % exponent in Base * 2 ^ CrashCount to achieve an exponential backoff - % doubling every consecutive failure, starting with the base value of - % ?BACKOFF_INTERVAL_MICROS. - BackoffExp = erlang:min(CrashCount - 1, ?MAX_BACKOFF_EXPONENT), - (1 bsl BackoffExp) * ?BACKOFF_INTERVAL_MICROS. - - --spec add_job_int(#job{}) -> boolean(). -add_job_int(#job{} = Job) -> - ets:insert_new(?MODULE, Job). - - --spec maybe_remove_job_int(job_id(), #state{}) -> ok. -maybe_remove_job_int(JobId, State) -> - case job_by_id(JobId) of - {ok, Job} -> - ok = stop_job_int(Job, State), - true = remove_job_int(Job), - couch_stats:increment_counter([couch_replicator, jobs, removes]), - TotalJobs = ets:info(?MODULE, size), - couch_stats:update_gauge([couch_replicator, jobs, total], - TotalJobs), - update_running_jobs_stats(State#state.stats_pid), - ok; - {error, not_found} -> - ok - end. - - -start_job_int(#job{pid = Pid}, _State) when Pid /= undefined -> - ok; - -start_job_int(#job{} = Job0, State) -> - Job = maybe_optimize_job_for_rate_limiting(Job0), - case couch_replicator_scheduler_sup:start_child(Job#job.rep) of - {ok, Child} -> - Ref = monitor(process, Child), - ok = update_state_started(Job, Child, Ref, State), - couch_log:notice("~p: Job ~p started as ~p", - [?MODULE, Job#job.id, Child]); - {error, {already_started, OtherPid}} when node(OtherPid) =:= node() -> - Ref = monitor(process, OtherPid), - ok = update_state_started(Job, OtherPid, Ref, State), - couch_log:notice("~p: Job ~p already running as ~p. Most likely" - " because replicator scheduler was restarted", - [?MODULE, Job#job.id, OtherPid]); - {error, {already_started, OtherPid}} when node(OtherPid) =/= node() -> - CrashMsg = "Duplicate replication running on another node", - couch_log:notice("~p: Job ~p already running as ~p. Most likely" - " because a duplicate replication is running on another node", - [?MODULE, Job#job.id, OtherPid]), - ok = update_state_crashed(Job, CrashMsg, State); - {error, Reason} -> - couch_log:notice("~p: Job ~p failed to start for reason ~p", - [?MODULE, Job, Reason]), - ok = update_state_crashed(Job, Reason, State) - end. - - --spec stop_job_int(#job{}, #state{}) -> ok | {error, term()}. -stop_job_int(#job{pid = undefined}, _State) -> - ok; - -stop_job_int(#job{} = Job, State) -> - ok = couch_replicator_scheduler_sup:terminate_child(Job#job.pid), - demonitor(Job#job.monitor, [flush]), - ok = update_state_stopped(Job, State), - couch_log:notice("~p: Job ~p stopped as ~p", - [?MODULE, Job#job.id, Job#job.pid]). - - --spec remove_job_int(#job{}) -> true. -remove_job_int(#job{} = Job) -> - ets:delete(?MODULE, Job#job.id). - - --spec running_job_count() -> non_neg_integer(). -running_job_count() -> - ets:info(?MODULE, size) - pending_job_count(). - - --spec running_jobs() -> [#job{}]. -running_jobs() -> - ets:select(?MODULE, [{#job{pid = '$1', _='_'}, [{is_pid, '$1'}], ['$_']}]). - - --spec pending_job_count() -> non_neg_integer(). -pending_job_count() -> - ets:select_count(?MODULE, [{#job{pid=undefined, _='_'}, [], [true]}]). - - --spec job_by_pid(pid()) -> {ok, #job{}} | {error, not_found}. -job_by_pid(Pid) when is_pid(Pid) -> - case ets:match_object(?MODULE, #job{pid=Pid, _='_'}) of - [] -> - {error, not_found}; - [#job{}=Job] -> - {ok, Job} - end. - - --spec job_by_id(job_id()) -> {ok, #job{}} | {error, not_found}. -job_by_id(Id) -> - case ets:lookup(?MODULE, Id) of - [] -> - {error, not_found}; - [#job{}=Job] -> - {ok, Job} - end. - - --spec update_state_stopped(#job{}, #state{}) -> ok. -update_state_stopped(Job, State) -> - Job1 = reset_job_process(Job), - Job2 = update_history(Job1, stopped, os:timestamp(), State), - true = ets:insert(?MODULE, Job2), - couch_stats:increment_counter([couch_replicator, jobs, stops]), - ok. - - --spec update_state_started(#job{}, pid(), reference(), #state{}) -> ok. -update_state_started(Job, Pid, Ref, State) -> - Job1 = set_job_process(Job, Pid, Ref), - Job2 = update_history(Job1, started, os:timestamp(), State), - true = ets:insert(?MODULE, Job2), - couch_stats:increment_counter([couch_replicator, jobs, starts]), - ok. - - --spec update_state_crashed(#job{}, any(), #state{}) -> ok. -update_state_crashed(Job, Reason, State) -> - Job1 = reset_job_process(Job), - Job2 = update_history(Job1, {crashed, Reason}, os:timestamp(), State), - true = ets:insert(?MODULE, Job2), - couch_stats:increment_counter([couch_replicator, jobs, crashes]), - ok. - - --spec set_job_process(#job{}, pid(), reference()) -> #job{}. -set_job_process(#job{} = Job, Pid, Ref) when is_pid(Pid), is_reference(Ref) -> - Job#job{pid = Pid, monitor = Ref}. - - --spec reset_job_process(#job{}) -> #job{}. -reset_job_process(#job{} = Job) -> - Job#job{pid = undefined, monitor = undefined}. - - --spec reschedule(#state{}) -> ok. -reschedule(State) -> - StopCount = stop_excess_jobs(State, running_job_count()), - rotate_jobs(State, StopCount), - update_running_jobs_stats(State#state.stats_pid). - - --spec stop_excess_jobs(#state{}, non_neg_integer()) -> non_neg_integer(). -stop_excess_jobs(State, Running) -> - #state{max_jobs=MaxJobs} = State, - StopCount = max(0, Running - MaxJobs), - Stopped = stop_jobs(StopCount, true, State), - OneshotLeft = StopCount - Stopped, - stop_jobs(OneshotLeft, false, State), - StopCount. - - -start_pending_jobs(State) -> - #state{max_jobs=MaxJobs} = State, - Running = running_job_count(), - Pending = pending_job_count(), - if Running < MaxJobs, Pending > 0 -> - start_jobs(MaxJobs - Running, State); - true -> - ok - end. - - --spec rotate_jobs(#state{}, non_neg_integer()) -> ok. -rotate_jobs(State, ChurnSoFar) -> - #state{max_jobs=MaxJobs, max_churn=MaxChurn} = State, - Running = running_job_count(), - Pending = pending_job_count(), - % Reduce MaxChurn by the number of already stopped jobs in the - % current rescheduling cycle. - Churn = max(0, MaxChurn - ChurnSoFar), - SlotsAvailable = MaxJobs - Running, - if SlotsAvailable >= 0 -> - % If there is are enough SlotsAvailable reduce StopCount to avoid - % unnesessarily stopping jobs. `stop_jobs/3` ignores 0 or negative - % values so we don't worry about that here. - StopCount = lists:min([Pending - SlotsAvailable, Running, Churn]), - stop_jobs(StopCount, true, State), - StartCount = max(0, MaxJobs - running_job_count()), - start_jobs(StartCount, State); - true -> - ok - end. - - --spec last_started(#job{}) -> erlang:timestamp(). -last_started(#job{} = Job) -> - case lists:keyfind(started, 1, Job#job.history) of - false -> - {0, 0, 0}; - {started, When} -> - When - end. - - --spec update_history(#job{}, event_type(), erlang:timestamp(), #state{}) -> - #job{}. -update_history(Job, Type, When, State) -> - History0 = [{Type, When} | Job#job.history], - History1 = lists:sublist(History0, State#state.max_history), - Job#job{history = History1}. - - --spec ejson_url(#httpdb{} | binary()) -> binary(). -ejson_url(#httpdb{}=Httpdb) -> - couch_util:url_strip_password(Httpdb#httpdb.url); -ejson_url(DbName) when is_binary(DbName) -> - DbName. - - --spec job_ejson(#job{}) -> {[_ | _]}. -job_ejson(Job) -> - Rep = Job#job.rep, - Source = ejson_url(Rep#rep.source), - Target = ejson_url(Rep#rep.target), - History = lists:map(fun({Type, When}) -> - EventProps = case Type of - {crashed, Reason} -> - [{type, crashed}, {reason, crash_reason_json(Reason)}]; - Type -> - [{type, Type}] - end, - {[{timestamp, couch_replicator_utils:iso8601(When)} | EventProps]} - end, Job#job.history), - {BaseID, Ext} = Job#job.id, - Pid = case Job#job.pid of - undefined -> - null; - P when is_pid(P) -> - ?l2b(pid_to_list(P)) - end, - {[ - {id, iolist_to_binary([BaseID, Ext])}, - {pid, Pid}, - {source, iolist_to_binary(Source)}, - {target, iolist_to_binary(Target)}, - {database, Rep#rep.db_name}, - {user, (Rep#rep.user_ctx)#user_ctx.name}, - {doc_id, Rep#rep.doc_id}, - {info, couch_replicator_utils:ejson_state_info(Rep#rep.stats)}, - {history, History}, - {node, node()}, - {start_time, couch_replicator_utils:iso8601(Rep#rep.start_time)} - ]}. - - --spec jobs() -> [[tuple()]]. -jobs() -> - ets:foldl(fun(Job, Acc) -> [job_ejson(Job) | Acc] end, [], ?MODULE). - - --spec job(job_id()) -> {ok, {[_ | _]}} | {error, not_found}. -job(JobId) -> - case job_by_id(JobId) of - {ok, Job} -> - {ok, job_ejson(Job)}; - Error -> - Error - end. - - -crash_reason_json({_CrashType, Info}) when is_binary(Info) -> - Info; -crash_reason_json(Reason) when is_binary(Reason) -> - Reason; -crash_reason_json(Error) -> - couch_replicator_utils:rep_error_to_binary(Error). - - --spec last_updated([_]) -> binary(). -last_updated([{_Type, When} | _]) -> - couch_replicator_utils:iso8601(When). - - --spec is_continuous(#job{}) -> boolean(). -is_continuous(#job{rep = Rep}) -> - couch_util:get_value(continuous, Rep#rep.options, false). - - -% If job crashed last time because it was rate limited, try to -% optimize some options to help the job make progress. --spec maybe_optimize_job_for_rate_limiting(#job{}) -> #job{}. -maybe_optimize_job_for_rate_limiting(Job = #job{history = - [{{crashed, max_backoff}, _} | _]}) -> - Opts = [ - {checkpoint_interval, 5000}, - {worker_processes, 2}, - {worker_batch_size, 100}, - {http_connections, 5} - ], - Rep = lists:foldl(fun optimize_int_option/2, Job#job.rep, Opts), - Job#job{rep = Rep}; -maybe_optimize_job_for_rate_limiting(Job) -> - Job. - - --spec optimize_int_option({atom(), any()}, #rep{}) -> #rep{}. -optimize_int_option({Key, Val}, #rep{options = Options} = Rep) -> - case couch_util:get_value(Key, Options) of - CurVal when is_integer(CurVal), CurVal > Val -> - Msg = "~p replication ~p : setting ~p = ~p due to rate limiting", - couch_log:warning(Msg, [?MODULE, Rep#rep.id, Key, Val]), - Options1 = lists:keyreplace(Key, 1, Options, {Key, Val}), - Rep#rep{options = Options1}; - _ -> - Rep - end. - - -% Updater is a separate process. It receives `update_stats` messages and -% updates scheduler stats from the scheduler jobs table. Updates are -% performed no more frequently than once per ?STATS_UPDATE_WAIT milliseconds. - -update_running_jobs_stats(StatsPid) when is_pid(StatsPid) -> - StatsPid ! update_stats, - ok. - - -start_stats_updater() -> - erlang:spawn_link(?MODULE, stats_updater_loop, [undefined]). - - -stats_updater_loop(Timer) -> - receive - update_stats when Timer == undefined -> - TRef = erlang:send_after(?STATS_UPDATE_WAIT, self(), refresh_stats), - ?MODULE:stats_updater_loop(TRef); - update_stats when is_reference(Timer) -> - ?MODULE:stats_updater_loop(Timer); - refresh_stats -> - ok = stats_updater_refresh(), - ?MODULE:stats_updater_loop(undefined); - Else -> - erlang:exit({stats_updater_bad_msg, Else}) - end. - - --spec stats_updater_refresh() -> ok. -stats_updater_refresh() -> - #stats_acc{ - pending_n = PendingN, - running_n = RunningN, - crashed_n = CrashedN - } = ets:foldl(fun stats_fold/2, #stats_acc{}, ?MODULE), - couch_stats:update_gauge([couch_replicator, jobs, pending], PendingN), - couch_stats:update_gauge([couch_replicator, jobs, running], RunningN), - couch_stats:update_gauge([couch_replicator, jobs, crashed], CrashedN), - ok. - - --spec stats_fold(#job{}, #stats_acc{}) -> #stats_acc{}. -stats_fold(#job{pid = undefined, history = [{added, _}]}, Acc) -> - Acc#stats_acc{pending_n = Acc#stats_acc.pending_n + 1}; -stats_fold(#job{pid = undefined, history = [{stopped, _} | _]}, Acc) -> - Acc#stats_acc{pending_n = Acc#stats_acc.pending_n + 1}; -stats_fold(#job{pid = undefined, history = [{{crashed, _}, _} | _]}, Acc) -> - Acc#stats_acc{crashed_n =Acc#stats_acc.crashed_n + 1}; -stats_fold(#job{pid = P, history = [{started, _} | _]}, Acc) when is_pid(P) -> - Acc#stats_acc{running_n = Acc#stats_acc.running_n + 1}. - - --spec existing_replication(#rep{}) -> boolean(). -existing_replication(#rep{} = NewRep) -> - case job_by_id(NewRep#rep.id) of - {ok, #job{rep = CurRep}} -> - NormCurRep = couch_replicator_utils:normalize_rep(CurRep), - NormNewRep = couch_replicator_utils:normalize_rep(NewRep), - NormCurRep == NormNewRep; - {error, not_found} -> - false - end. - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - - -backoff_micros_test_() -> - BaseInterval = ?BACKOFF_INTERVAL_MICROS, - [?_assertEqual(R * BaseInterval, backoff_micros(N)) || {R, N} <- [ - {1, 1}, {2, 2}, {4, 3}, {8, 4}, {16, 5}, {32, 6}, {64, 7}, {128, 8}, - {256, 9}, {512, 10}, {1024, 11}, {1024, 12} - ]]. - - -consecutive_crashes_test_() -> - Threshold = ?DEFAULT_HEALTH_THRESHOLD_SEC, - [?_assertEqual(R, consecutive_crashes(H, Threshold)) || {R, H} <- [ - {0, []}, - {0, [added()]}, - {0, [stopped()]}, - {0, [crashed()]}, - {1, [crashed(), added()]}, - {1, [crashed(), crashed()]}, - {1, [crashed(), stopped()]}, - {3, [crashed(), crashed(), crashed(), added()]}, - {2, [crashed(), crashed(), stopped()]}, - {1, [crashed(), started(), added()]}, - {2, [crashed(3), started(2), crashed(1), started(0)]}, - {0, [stopped(3), started(2), crashed(1), started(0)]}, - {1, [crashed(3), started(2), stopped(1), started(0)]}, - {0, [crashed(999), started(0)]}, - {1, [crashed(999), started(998), crashed(997), started(0)]} - ]]. - - -consecutive_crashes_non_default_threshold_test_() -> - [?_assertEqual(R, consecutive_crashes(H, T)) || {R, H, T} <- [ - {0, [crashed(11), started(0)], 10}, - {1, [crashed(10), started(0)], 10} - ]]. - - -latest_crash_timestamp_test_() -> - [?_assertEqual({0, R, 0}, latest_crash_timestamp(H)) || {R, H} <- [ - {0, [added()]}, - {1, [crashed(1)]}, - {3, [crashed(3), started(2), crashed(1), started(0)]}, - {1, [started(3), stopped(2), crashed(1), started(0)]} - ]]. - - -last_started_test_() -> - [?_assertEqual({0, R, 0}, last_started(testjob(H))) || {R, H} <- [ - {0, [added()]}, - {0, [crashed(1)]}, - {1, [started(1)]}, - {1, [added(), started(1)]}, - {2, [started(2), started(1)]}, - {2, [crashed(3), started(2), started(1)]} - ]]. - - -longest_running_test() -> - J0 = testjob([crashed()]), - J1 = testjob([started(1)]), - J2 = testjob([started(2)]), - Sort = fun(Jobs) -> lists:sort(fun longest_running/2, Jobs) end, - ?assertEqual([], Sort([])), - ?assertEqual([J1], Sort([J1])), - ?assertEqual([J1, J2], Sort([J2, J1])), - ?assertEqual([J0, J1, J2], Sort([J2, J1, J0])). - - -scheduler_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_pending_jobs_simple(), - t_pending_jobs_skip_crashed(), - t_one_job_starts(), - t_no_jobs_start_if_max_is_0(), - t_one_job_starts_if_max_is_1(), - t_max_churn_does_not_throttle_initial_start(), - t_excess_oneshot_only_jobs(), - t_excess_continuous_only_jobs(), - t_excess_prefer_continuous_first(), - t_stop_oldest_first(), - t_start_oldest_first(), - t_jobs_churn_even_if_not_all_max_jobs_are_running(), - t_jobs_dont_churn_if_there_are_available_running_slots(), - t_start_only_pending_jobs_do_not_churn_existing_ones(), - t_dont_stop_if_nothing_pending(), - t_max_churn_limits_number_of_rotated_jobs(), - t_existing_jobs(), - t_if_pending_less_than_running_start_all_pending(), - t_running_less_than_pending_swap_all_running(), - t_oneshot_dont_get_rotated(), - t_rotate_continuous_only_if_mixed(), - t_oneshot_dont_get_starting_priority(), - t_oneshot_will_hog_the_scheduler(), - t_if_excess_is_trimmed_rotation_still_happens(), - t_if_transient_job_crashes_it_gets_removed(), - t_if_permanent_job_crashes_it_stays_in_ets(), - t_job_summary_running(), - t_job_summary_pending(), - t_job_summary_crashing_once(), - t_job_summary_crashing_many_times(), - t_job_summary_proxy_fields() - ] - } - }. - - -t_pending_jobs_simple() -> - ?_test(begin - Job1 = oneshot(1), - Job2 = oneshot(2), - setup_jobs([Job2, Job1]), - ?assertEqual([], pending_jobs(0)), - ?assertEqual([Job1], pending_jobs(1)), - ?assertEqual([Job1, Job2], pending_jobs(2)), - ?assertEqual([Job1, Job2], pending_jobs(3)) - end). - - -t_pending_jobs_skip_crashed() -> - ?_test(begin - Job = oneshot(1), - Ts = os:timestamp(), - History = [crashed(Ts), started(Ts) | Job#job.history], - Job1 = Job#job{history = History}, - Job2 = oneshot(2), - Job3 = oneshot(3), - setup_jobs([Job2, Job1, Job3]), - ?assertEqual([Job2], pending_jobs(1)), - ?assertEqual([Job2, Job3], pending_jobs(2)), - ?assertEqual([Job2, Job3], pending_jobs(3)) - end). - - -t_one_job_starts() -> - ?_test(begin - setup_jobs([oneshot(1)]), - ?assertEqual({0, 1}, run_stop_count()), - reschedule(mock_state(?DEFAULT_MAX_JOBS)), - ?assertEqual({1, 0}, run_stop_count()) - end). - - -t_no_jobs_start_if_max_is_0() -> - ?_test(begin - setup_jobs([oneshot(1)]), - reschedule(mock_state(0)), - ?assertEqual({0, 1}, run_stop_count()) - end). - - -t_one_job_starts_if_max_is_1() -> - ?_test(begin - setup_jobs([oneshot(1), oneshot(2)]), - reschedule(mock_state(1)), - ?assertEqual({1, 1}, run_stop_count()) - end). - - -t_max_churn_does_not_throttle_initial_start() -> - ?_test(begin - setup_jobs([oneshot(1), oneshot(2)]), - reschedule(mock_state(?DEFAULT_MAX_JOBS, 0)), - ?assertEqual({2, 0}, run_stop_count()) - end). - - -t_excess_oneshot_only_jobs() -> - ?_test(begin - setup_jobs([oneshot_running(1), oneshot_running(2)]), - ?assertEqual({2, 0}, run_stop_count()), - reschedule(mock_state(1)), - ?assertEqual({1, 1}, run_stop_count()), - reschedule(mock_state(0)), - ?assertEqual({0, 2}, run_stop_count()) - end). - - -t_excess_continuous_only_jobs() -> - ?_test(begin - setup_jobs([continuous_running(1), continuous_running(2)]), - ?assertEqual({2, 0}, run_stop_count()), - reschedule(mock_state(1)), - ?assertEqual({1, 1}, run_stop_count()), - reschedule(mock_state(0)), - ?assertEqual({0, 2}, run_stop_count()) - end). - - -t_excess_prefer_continuous_first() -> - ?_test(begin - Jobs = [ - continuous_running(1), - oneshot_running(2), - continuous_running(3) - ], - setup_jobs(Jobs), - ?assertEqual({3, 0}, run_stop_count()), - ?assertEqual({1, 0}, oneshot_run_stop_count()), - reschedule(mock_state(2)), - ?assertEqual({2, 1}, run_stop_count()), - ?assertEqual({1, 0}, oneshot_run_stop_count()), - reschedule(mock_state(1)), - ?assertEqual({1, 0}, oneshot_run_stop_count()), - reschedule(mock_state(0)), - ?assertEqual({0, 1}, oneshot_run_stop_count()) - end). - - -t_stop_oldest_first() -> - ?_test(begin - Jobs = [ - continuous_running(7), - continuous_running(4), - continuous_running(5) - ], - setup_jobs(Jobs), - reschedule(mock_state(2, 1)), - ?assertEqual({2, 1}, run_stop_count()), - ?assertEqual([4], jobs_stopped()), - reschedule(mock_state(1, 1)), - ?assertEqual([7], jobs_running()) - end). - - -t_start_oldest_first() -> - ?_test(begin - setup_jobs([continuous(7), continuous(2), continuous(5)]), - reschedule(mock_state(1)), - ?assertEqual({1, 2}, run_stop_count()), - ?assertEqual([2], jobs_running()), - reschedule(mock_state(2)), - ?assertEqual({2, 1}, run_stop_count()), - % After rescheduling with max_jobs = 2, 2 was stopped and 5, 7 should - % be running. - ?assertEqual([2], jobs_stopped()) - end). - - -t_jobs_churn_even_if_not_all_max_jobs_are_running() -> - ?_test(begin - setup_jobs([ - continuous_running(7), - continuous(2), - continuous(5) - ]), - reschedule(mock_state(2, 2)), - ?assertEqual({2, 1}, run_stop_count()), - ?assertEqual([7], jobs_stopped()) - end). - - -t_jobs_dont_churn_if_there_are_available_running_slots() -> - ?_test(begin - setup_jobs([ - continuous_running(1), - continuous_running(2) - ]), - reschedule(mock_state(2, 2)), - ?assertEqual({2, 0}, run_stop_count()), - ?assertEqual([], jobs_stopped()), - ?assertEqual(0, meck:num_calls(couch_replicator_scheduler_sup, start_child, 1)) - end). - - -t_start_only_pending_jobs_do_not_churn_existing_ones() -> - ?_test(begin - setup_jobs([ - continuous(1), - continuous_running(2) - ]), - reschedule(mock_state(2, 2)), - ?assertEqual(1, meck:num_calls(couch_replicator_scheduler_sup, start_child, 1)), - ?assertEqual([], jobs_stopped()), - ?assertEqual({2, 0}, run_stop_count()) - end). - - -t_dont_stop_if_nothing_pending() -> - ?_test(begin - setup_jobs([continuous_running(1), continuous_running(2)]), - reschedule(mock_state(2)), - ?assertEqual({2, 0}, run_stop_count()) - end). - - -t_max_churn_limits_number_of_rotated_jobs() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous_running(2), - continuous(3), - continuous_running(4) - ], - setup_jobs(Jobs), - reschedule(mock_state(2, 1)), - ?assertEqual([2, 3], jobs_stopped()) - end). - - -t_if_pending_less_than_running_start_all_pending() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous_running(2), - continuous(3), - continuous_running(4), - continuous_running(5) - ], - setup_jobs(Jobs), - reschedule(mock_state(3)), - ?assertEqual([1, 2, 5], jobs_running()) - end). - - -t_running_less_than_pending_swap_all_running() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous(2), - continuous(3), - continuous_running(4), - continuous_running(5) - ], - setup_jobs(Jobs), - reschedule(mock_state(2)), - ?assertEqual([3, 4, 5], jobs_stopped()) - end). - - -t_oneshot_dont_get_rotated() -> - ?_test(begin - setup_jobs([oneshot_running(1), continuous(2)]), - reschedule(mock_state(1)), - ?assertEqual([1], jobs_running()) - end). - - -t_rotate_continuous_only_if_mixed() -> - ?_test(begin - setup_jobs([continuous(1), oneshot_running(2), continuous_running(3)]), - reschedule(mock_state(2)), - ?assertEqual([1, 2], jobs_running()) - end). - - -t_oneshot_dont_get_starting_priority() -> - ?_test(begin - setup_jobs([continuous(1), oneshot(2), continuous_running(3)]), - reschedule(mock_state(1)), - ?assertEqual([1], jobs_running()) - end). - - -% This tested in other test cases, it is here to mainly make explicit a property -% of one-shot replications -- they can starve other jobs if they "take control" -% of all the available scheduler slots. -t_oneshot_will_hog_the_scheduler() -> - ?_test(begin - Jobs = [ - oneshot_running(1), - oneshot_running(2), - oneshot(3), - continuous(4) - ], - setup_jobs(Jobs), - reschedule(mock_state(2)), - ?assertEqual([1, 2], jobs_running()) - end). - - -t_if_excess_is_trimmed_rotation_still_happens() -> - ?_test(begin - Jobs = [ - continuous(1), - continuous_running(2), - continuous_running(3) - ], - setup_jobs(Jobs), - reschedule(mock_state(1)), - ?assertEqual([1], jobs_running()) - end). - - -t_if_transient_job_crashes_it_gets_removed() -> - ?_test(begin - Pid = mock_pid(), - Job = #job{ - id = job1, - pid = Pid, - history = [added()], - rep = #rep{db_name = null, options = [{continuous, true}]} - }, - setup_jobs([Job]), - ?assertEqual(1, ets:info(?MODULE, size)), - State = #state{max_history = 3, stats_pid = self()}, - {noreply, State} = handle_info({'DOWN', r1, process, Pid, failed}, - State), - ?assertEqual(0, ets:info(?MODULE, size)) - end). - - -t_if_permanent_job_crashes_it_stays_in_ets() -> - ?_test(begin - Pid = mock_pid(), - Job = #job{ - id = job1, - pid = Pid, - history = [added()], - rep = #rep{db_name = <<"db1">>, options = [{continuous, true}]} - }, - setup_jobs([Job]), - ?assertEqual(1, ets:info(?MODULE, size)), - State = #state{max_jobs =1, max_history = 3, stats_pid = self()}, - {noreply, State} = handle_info({'DOWN', r1, process, Pid, failed}, - State), - ?assertEqual(1, ets:info(?MODULE, size)), - [Job1] = ets:lookup(?MODULE, job1), - [Latest | _] = Job1#job.history, - ?assertMatch({{crashed, failed}, _}, Latest) - end). - - -t_existing_jobs() -> - ?_test(begin - Rep = #rep{ - id = job1, - db_name = <<"db">>, - source = <<"s">>, - target = <<"t">>, - options = [{continuous, true}] - }, - setup_jobs([#job{id = Rep#rep.id, rep = Rep}]), - NewRep = #rep{ - id = Rep#rep.id, - db_name = <<"db">>, - source = <<"s">>, - target = <<"t">>, - options = [{continuous, true}] - }, - ?assert(existing_replication(NewRep)), - ?assertNot(existing_replication(NewRep#rep{source = <<"s1">>})), - ?assertNot(existing_replication(NewRep#rep{target = <<"t1">>})), - ?assertNot(existing_replication(NewRep#rep{options = []})) - end). - - -t_job_summary_running() -> - ?_test(begin - Job = #job{ - id = job1, - pid = mock_pid(), - history = [added()], - rep = #rep{ - db_name = <<"db1">>, - source = <<"s">>, - target = <<"t">> - } - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(running, proplists:get_value(state, Summary)), - ?assertEqual(null, proplists:get_value(info, Summary)), - ?assertEqual(0, proplists:get_value(error_count, Summary)), - - Stats = [{source_seq, <<"1-abc">>}], - handle_cast({update_job_stats, job1, Stats}, mock_state(1)), - Summary1 = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual({Stats}, proplists:get_value(info, Summary1)) - end). - - -t_job_summary_pending() -> - ?_test(begin - Job = #job{ - id = job1, - pid = undefined, - history = [stopped(20), started(10), added()], - rep = #rep{source = <<"s">>, target = <<"t">>} - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(pending, proplists:get_value(state, Summary)), - ?assertEqual(null, proplists:get_value(info, Summary)), - ?assertEqual(0, proplists:get_value(error_count, Summary)), - - Stats = [{doc_write_failures, 1}], - handle_cast({update_job_stats, job1, Stats}, mock_state(1)), - Summary1 = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual({Stats}, proplists:get_value(info, Summary1)) - end). - - -t_job_summary_crashing_once() -> - ?_test(begin - Job = #job{ - id = job1, - history = [crashed(?DEFAULT_HEALTH_THRESHOLD_SEC + 1), started(0)], - rep = #rep{source = <<"s">>, target = <<"t">>} - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(crashing, proplists:get_value(state, Summary)), - Info = proplists:get_value(info, Summary), - ?assertEqual({[{<<"error">>, <<"some_reason">>}]}, Info), - ?assertEqual(0, proplists:get_value(error_count, Summary)) - end). - - -t_job_summary_crashing_many_times() -> - ?_test(begin - Job = #job{ - id = job1, - history = [crashed(4), started(3), crashed(2), started(1)], - rep = #rep{source = <<"s">>, target = <<"t">>} - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(crashing, proplists:get_value(state, Summary)), - Info = proplists:get_value(info, Summary), - ?assertEqual({[{<<"error">>, <<"some_reason">>}]}, Info), - ?assertEqual(2, proplists:get_value(error_count, Summary)) - end). - - -t_job_summary_proxy_fields() -> - ?_test(begin - Job = #job{ - id = job1, - history = [started(10), added()], - rep = #rep{ - source = #httpdb{ - url = "https://s", - proxy_url = "http://u:p@sproxy:12" - }, - target = #httpdb{ - url = "http://t", - proxy_url = "socks5://u:p@tproxy:34" - } - } - }, - setup_jobs([Job]), - Summary = job_summary(job1, ?DEFAULT_HEALTH_THRESHOLD_SEC), - ?assertEqual(<<"http://u:*****@sproxy:12">>, - proplists:get_value(source_proxy, Summary)), - ?assertEqual(<<"socks5://u:*****@tproxy:34">>, - proplists:get_value(target_proxy, Summary)) - end). - - -% Test helper functions - -setup_all() -> - catch ets:delete(?MODULE), - meck:expect(couch_log, notice, 2, ok), - meck:expect(couch_log, warning, 2, ok), - meck:expect(couch_log, error, 2, ok), - meck:expect(couch_replicator_scheduler_sup, terminate_child, 1, ok), - meck:expect(couch_stats, increment_counter, 1, ok), - meck:expect(couch_stats, update_gauge, 2, ok), - Pid = mock_pid(), - meck:expect(couch_replicator_scheduler_sup, start_child, 1, {ok, Pid}). - - -teardown_all(_) -> - catch ets:delete(?MODULE), - meck:unload(). - - -setup() -> - meck:reset([ - couch_log, - couch_replicator_scheduler_sup, - couch_stats - ]). - - -teardown(_) -> - ok. - - -setup_jobs(Jobs) when is_list(Jobs) -> - ?MODULE = ets:new(?MODULE, [named_table, {keypos, #job.id}]), - ets:insert(?MODULE, Jobs). - - -all_jobs() -> - lists:usort(ets:tab2list(?MODULE)). - - -jobs_stopped() -> - [Job#job.id || Job <- all_jobs(), Job#job.pid =:= undefined]. - - -jobs_running() -> - [Job#job.id || Job <- all_jobs(), Job#job.pid =/= undefined]. - - -run_stop_count() -> - {length(jobs_running()), length(jobs_stopped())}. - - -oneshot_run_stop_count() -> - Running = [Job#job.id || Job <- all_jobs(), Job#job.pid =/= undefined, - not is_continuous(Job)], - Stopped = [Job#job.id || Job <- all_jobs(), Job#job.pid =:= undefined, - not is_continuous(Job)], - {length(Running), length(Stopped)}. - - -mock_state(MaxJobs) -> - #state{ - max_jobs = MaxJobs, - max_churn = ?DEFAULT_MAX_CHURN, - max_history = ?DEFAULT_MAX_HISTORY, - stats_pid = self() - }. - -mock_state(MaxJobs, MaxChurn) -> - #state{ - max_jobs = MaxJobs, - max_churn = MaxChurn, - max_history = ?DEFAULT_MAX_HISTORY, - stats_pid = self() - }. - - -continuous(Id) when is_integer(Id) -> - Started = Id, - Hist = [stopped(Started+1), started(Started), added()], - #job{ - id = Id, - history = Hist, - rep = #rep{options = [{continuous, true}]} - }. - - -continuous_running(Id) when is_integer(Id) -> - Started = Id, - Pid = mock_pid(), - #job{ - id = Id, - history = [started(Started), added()], - rep = #rep{options = [{continuous, true}]}, - pid = Pid, - monitor = monitor(process, Pid) - }. - - -oneshot(Id) when is_integer(Id) -> - Started = Id, - Hist = [stopped(Started + 1), started(Started), added()], - #job{id = Id, history = Hist, rep = #rep{options = []}}. - - -oneshot_running(Id) when is_integer(Id) -> - Started = Id, - Pid = mock_pid(), - #job{ - id = Id, - history = [started(Started), added()], - rep = #rep{options = []}, - pid = Pid, - monitor = monitor(process, Pid) - }. - - -testjob(Hist) when is_list(Hist) -> - #job{history = Hist}. - - -mock_pid() -> - list_to_pid("<0.999.999>"). - -crashed() -> - crashed(0). - - -crashed(WhenSec) when is_integer(WhenSec)-> - {{crashed, some_reason}, {0, WhenSec, 0}}; -crashed({MSec, Sec, USec}) -> - {{crashed, some_reason}, {MSec, Sec, USec}}. - - -started() -> - started(0). - - -started(WhenSec) when is_integer(WhenSec)-> - {started, {0, WhenSec, 0}}; - -started({MSec, Sec, USec}) -> - {started, {MSec, Sec, USec}}. - - -stopped() -> - stopped(0). - - -stopped(WhenSec) -> - {stopped, {0, WhenSec, 0}}. - - -added() -> - {added, {0, 0, 0}}. - --endif. diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl deleted file mode 100644 index 0b33419e1..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl +++ /dev/null @@ -1,1090 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_scheduler_job). - --behaviour(gen_server). - --export([ - start_link/1 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_info/2, - handle_cast/2, - code_change/3, - format_status/2 -]). - --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --include("couch_replicator_scheduler.hrl"). --include("couch_replicator.hrl"). - --import(couch_util, [ - get_value/2, - get_value/3, - to_binary/1 -]). - --import(couch_replicator_utils, [ - pp_rep_id/1 -]). - - --define(LOWEST_SEQ, 0). --define(DEFAULT_CHECKPOINT_INTERVAL, 30000). --define(STARTUP_JITTER_DEFAULT, 5000). - --record(rep_state, { - rep_details, - source_name, - target_name, - source, - target, - history, - checkpoint_history, - start_seq, - committed_seq, - current_through_seq, - seqs_in_progress = [], - highest_seq_done = {0, ?LOWEST_SEQ}, - source_log, - target_log, - rep_starttime, - src_starttime, - tgt_starttime, - timer, % checkpoint timer - changes_queue, - changes_manager, - changes_reader, - workers, - stats = couch_replicator_stats:new(), - session_id, - source_seq = nil, - use_checkpoints = true, - checkpoint_interval = ?DEFAULT_CHECKPOINT_INTERVAL, - type = db, - view = nil -}). - - -start_link(#rep{id = {BaseId, Ext}, source = Src, target = Tgt} = Rep) -> - RepChildId = BaseId ++ Ext, - Source = couch_replicator_api_wrap:db_uri(Src), - Target = couch_replicator_api_wrap:db_uri(Tgt), - ServerName = {global, {?MODULE, Rep#rep.id}}, - - case gen_server:start_link(ServerName, ?MODULE, Rep, []) of - {ok, Pid} -> - {ok, Pid}; - {error, Reason} -> - couch_log:warning("failed to start replication `~s` (`~s` -> `~s`)", - [RepChildId, Source, Target]), - {error, Reason} - end. - - -init(InitArgs) -> - {ok, InitArgs, 0}. - - -do_init(#rep{options = Options, id = {BaseId, Ext}, user_ctx=UserCtx} = Rep) -> - process_flag(trap_exit, true), - - timer:sleep(startup_jitter()), - - #rep_state{ - source = Source, - target = Target, - source_name = SourceName, - target_name = TargetName, - start_seq = {_Ts, StartSeq}, - highest_seq_done = {_, HighestSeq}, - checkpoint_interval = CheckpointInterval - } = State = init_state(Rep), - - NumWorkers = get_value(worker_processes, Options), - BatchSize = get_value(worker_batch_size, Options), - {ok, ChangesQueue} = couch_work_queue:new([ - {max_items, BatchSize * NumWorkers * 2}, - {max_size, 100 * 1024 * NumWorkers} - ]), - % This starts the _changes reader process. It adds the changes from - % the source db to the ChangesQueue. - {ok, ChangesReader} = couch_replicator_changes_reader:start_link( - StartSeq, Source, ChangesQueue, Options - ), - % Changes manager - responsible for dequeing batches from the changes queue - % and deliver them to the worker processes. - ChangesManager = spawn_changes_manager(self(), ChangesQueue, BatchSize), - % This starts the worker processes. They ask the changes queue manager for a - % a batch of _changes rows to process -> check which revs are missing in the - % target, and for the missing ones, it copies them from the source to the target. - MaxConns = get_value(http_connections, Options), - Workers = lists:map( - fun(_) -> - couch_stats:increment_counter([couch_replicator, workers_started]), - {ok, Pid} = couch_replicator_worker:start_link( - self(), Source, Target, ChangesManager, MaxConns), - Pid - end, - lists:seq(1, NumWorkers)), - - couch_task_status:add_task([ - {type, replication}, - {user, UserCtx#user_ctx.name}, - {replication_id, ?l2b(BaseId ++ Ext)}, - {database, Rep#rep.db_name}, - {doc_id, Rep#rep.doc_id}, - {source, ?l2b(SourceName)}, - {target, ?l2b(TargetName)}, - {continuous, get_value(continuous, Options, false)}, - {source_seq, HighestSeq}, - {checkpoint_interval, CheckpointInterval} - ] ++ rep_stats(State)), - couch_task_status:set_update_frequency(1000), - - % Until OTP R14B03: - % - % Restarting a temporary supervised child implies that the original arguments - % (#rep{} record) specified in the MFA component of the supervisor - % child spec will always be used whenever the child is restarted. - % This implies the same replication performance tunning parameters will - % always be used. The solution is to delete the child spec (see - % cancel_replication/1) and then start the replication again, but this is - % unfortunately not immune to race conditions. - - log_replication_start(State), - couch_log:debug("Worker pids are: ~p", [Workers]), - - doc_update_triggered(Rep), - - {ok, State#rep_state{ - changes_queue = ChangesQueue, - changes_manager = ChangesManager, - changes_reader = ChangesReader, - workers = Workers - } - }. - - -handle_call({add_stats, Stats}, From, State) -> - gen_server:reply(From, ok), - NewStats = couch_replicator_utils:sum_stats(State#rep_state.stats, Stats), - {noreply, State#rep_state{stats = NewStats}}; - -handle_call({report_seq_done, Seq, StatsInc}, From, - #rep_state{seqs_in_progress = SeqsInProgress, highest_seq_done = HighestDone, - current_through_seq = ThroughSeq, stats = Stats} = State) -> - gen_server:reply(From, ok), - {NewThroughSeq0, NewSeqsInProgress} = case SeqsInProgress of - [] -> - {Seq, []}; - [Seq | Rest] -> - {Seq, Rest}; - [_ | _] -> - {ThroughSeq, ordsets:del_element(Seq, SeqsInProgress)} - end, - NewHighestDone = lists:max([HighestDone, Seq]), - NewThroughSeq = case NewSeqsInProgress of - [] -> - lists:max([NewThroughSeq0, NewHighestDone]); - _ -> - NewThroughSeq0 - end, - couch_log:debug("Worker reported seq ~p, through seq was ~p, " - "new through seq is ~p, highest seq done was ~p, " - "new highest seq done is ~p~n" - "Seqs in progress were: ~p~nSeqs in progress are now: ~p", - [Seq, ThroughSeq, NewThroughSeq, HighestDone, - NewHighestDone, SeqsInProgress, NewSeqsInProgress]), - NewState = State#rep_state{ - stats = couch_replicator_utils:sum_stats(Stats, StatsInc), - current_through_seq = NewThroughSeq, - seqs_in_progress = NewSeqsInProgress, - highest_seq_done = NewHighestDone - }, - update_task(NewState), - {noreply, NewState}. - - -handle_cast(checkpoint, State) -> - case do_checkpoint(State) of - {ok, NewState} -> - couch_stats:increment_counter([couch_replicator, checkpoints, success]), - {noreply, NewState#rep_state{timer = start_timer(State)}}; - Error -> - couch_stats:increment_counter([couch_replicator, checkpoints, failure]), - {stop, Error, State} - end; - -handle_cast({report_seq, Seq}, - #rep_state{seqs_in_progress = SeqsInProgress} = State) -> - NewSeqsInProgress = ordsets:add_element(Seq, SeqsInProgress), - {noreply, State#rep_state{seqs_in_progress = NewSeqsInProgress}}. - - -handle_info(shutdown, St) -> - {stop, shutdown, St}; - -handle_info({'EXIT', Pid, max_backoff}, State) -> - couch_log:error("Max backoff reached child process ~p", [Pid]), - {stop, {shutdown, max_backoff}, State}; - -handle_info({'EXIT', Pid, {shutdown, max_backoff}}, State) -> - couch_log:error("Max backoff reached child process ~p", [Pid]), - {stop, {shutdown, max_backoff}, State}; - -handle_info({'EXIT', Pid, normal}, #rep_state{changes_reader=Pid} = State) -> - {noreply, State}; - -handle_info({'EXIT', Pid, Reason0}, #rep_state{changes_reader=Pid} = State) -> - couch_stats:increment_counter([couch_replicator, changes_reader_deaths]), - Reason = case Reason0 of - {changes_req_failed, _, _} = HttpFail -> - HttpFail; - {http_request_failed, _, _, {error, {code, Code}}} -> - {changes_req_failed, Code}; - {http_request_failed, _, _, {error, Err}} -> - {changes_req_failed, Err}; - Other -> - {changes_reader_died, Other} - end, - couch_log:error("ChangesReader process died with reason: ~p", [Reason]), - {stop, {shutdown, Reason}, cancel_timer(State)}; - -handle_info({'EXIT', Pid, normal}, #rep_state{changes_manager = Pid} = State) -> - {noreply, State}; - -handle_info({'EXIT', Pid, Reason}, #rep_state{changes_manager = Pid} = State) -> - couch_stats:increment_counter([couch_replicator, changes_manager_deaths]), - couch_log:error("ChangesManager process died with reason: ~p", [Reason]), - {stop, {shutdown, {changes_manager_died, Reason}}, cancel_timer(State)}; - -handle_info({'EXIT', Pid, normal}, #rep_state{changes_queue=Pid} = State) -> - {noreply, State}; - -handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) -> - couch_stats:increment_counter([couch_replicator, changes_queue_deaths]), - couch_log:error("ChangesQueue process died with reason: ~p", [Reason]), - {stop, {shutdown, {changes_queue_died, Reason}}, cancel_timer(State)}; - -handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) -> - case Workers -- [Pid] of - Workers -> - couch_log:error("unknown pid bit the dust ~p ~n",[Pid]), - {noreply, State#rep_state{workers = Workers}}; - %% not clear why a stop was here before - %%{stop, {unknown_process_died, Pid, normal}, State}; - [] -> - catch unlink(State#rep_state.changes_manager), - catch exit(State#rep_state.changes_manager, kill), - do_last_checkpoint(State); - Workers2 -> - {noreply, State#rep_state{workers = Workers2}} - end; - -handle_info({'EXIT', Pid, Reason}, #rep_state{workers = Workers} = State) -> - State2 = cancel_timer(State), - case lists:member(Pid, Workers) of - false -> - {stop, {unknown_process_died, Pid, Reason}, State2}; - true -> - couch_stats:increment_counter([couch_replicator, worker_deaths]), - StopReason = case Reason of - {shutdown, _} = Err -> - Err; - Other -> - couch_log:error("Worker ~p died with reason: ~p", [Pid, Reason]), - {worker_died, Pid, Other} - end, - {stop, StopReason, State2} - end; - -handle_info(timeout, InitArgs) -> - try do_init(InitArgs) of {ok, State} -> - {noreply, State} - catch - exit:{http_request_failed, _, _, max_backoff} -> - {stop, {shutdown, max_backoff}, {error, InitArgs}}; - Class:Error -> - ShutdownReason = {error, replication_start_error(Error)}, - StackTop2 = lists:sublist(erlang:get_stacktrace(), 2), - % Shutdown state is a hack as it is not really the state of the - % gen_server (it failed to initialize, so it doesn't have one). - % Shutdown state is used to pass extra info about why start failed. - ShutdownState = {error, Class, StackTop2, InitArgs}, - {stop, {shutdown, ShutdownReason}, ShutdownState} - end. - - -terminate(normal, #rep_state{rep_details = #rep{id = RepId} = Rep, - checkpoint_history = CheckpointHistory} = State) -> - terminate_cleanup(State), - couch_replicator_notifier:notify({finished, RepId, CheckpointHistory}), - doc_update_completed(Rep, rep_stats(State)); - -terminate(shutdown, #rep_state{rep_details = #rep{id = RepId}} = State) -> - % Replication stopped via _scheduler_sup:terminate_child/1, which can be - % occur during regular scheduler operation or when job is removed from - % the scheduler. - State1 = case do_checkpoint(State) of - {ok, NewState} -> - NewState; - Error -> - LogMsg = "~p : Failed last checkpoint. Job: ~p Error: ~p", - couch_log:error(LogMsg, [?MODULE, RepId, Error]), - State - end, - couch_replicator_notifier:notify({stopped, RepId, <<"stopped">>}), - terminate_cleanup(State1); - -terminate({shutdown, max_backoff}, {error, InitArgs}) -> - #rep{id = {BaseId, Ext} = RepId} = InitArgs, - couch_stats:increment_counter([couch_replicator, failed_starts]), - couch_log:warning("Replication `~s` reached max backoff ", [BaseId ++ Ext]), - couch_replicator_notifier:notify({error, RepId, max_backoff}); - -terminate({shutdown, {error, Error}}, {error, Class, Stack, InitArgs}) -> - #rep{ - id = {BaseId, Ext} = RepId, - source = Source0, - target = Target0, - doc_id = DocId, - db_name = DbName - } = InitArgs, - Source = couch_replicator_api_wrap:db_uri(Source0), - Target = couch_replicator_api_wrap:db_uri(Target0), - RepIdStr = BaseId ++ Ext, - Msg = "~p:~p: Replication ~s failed to start ~p -> ~p doc ~p:~p stack:~p", - couch_log:error(Msg, [Class, Error, RepIdStr, Source, Target, DbName, - DocId, Stack]), - couch_stats:increment_counter([couch_replicator, failed_starts]), - couch_replicator_notifier:notify({error, RepId, Error}); - -terminate({shutdown, max_backoff}, State) -> - #rep_state{ - source_name = Source, - target_name = Target, - rep_details = #rep{id = {BaseId, Ext} = RepId} - } = State, - couch_log:error("Replication `~s` (`~s` -> `~s`) reached max backoff", - [BaseId ++ Ext, Source, Target]), - terminate_cleanup(State), - couch_replicator_notifier:notify({error, RepId, max_backoff}); - -terminate({shutdown, Reason}, State) -> - % Unwrap so when reporting we don't have an extra {shutdown, ...} tuple - % wrapped around the message - terminate(Reason, State); - -terminate(Reason, State) -> -#rep_state{ - source_name = Source, - target_name = Target, - rep_details = #rep{id = {BaseId, Ext} = RepId} - } = State, - couch_log:error("Replication `~s` (`~s` -> `~s`) failed: ~s", - [BaseId ++ Ext, Source, Target, to_binary(Reason)]), - terminate_cleanup(State), - couch_replicator_notifier:notify({error, RepId, Reason}). - -terminate_cleanup(State) -> - update_task(State), - couch_replicator_api_wrap:db_close(State#rep_state.source), - couch_replicator_api_wrap:db_close(State#rep_state.target). - - -code_change(_OldVsn, #rep_state{}=State, _Extra) -> - {ok, State}. - - -format_status(_Opt, [_PDict, State]) -> - #rep_state{ - source = Source, - target = Target, - rep_details = RepDetails, - start_seq = StartSeq, - source_seq = SourceSeq, - committed_seq = CommitedSeq, - current_through_seq = ThroughSeq, - highest_seq_done = HighestSeqDone, - session_id = SessionId - } = state_strip_creds(State), - #rep{ - id = RepId, - options = Options, - doc_id = DocId, - db_name = DbName - } = RepDetails, - [ - {rep_id, RepId}, - {source, couch_replicator_api_wrap:db_uri(Source)}, - {target, couch_replicator_api_wrap:db_uri(Target)}, - {db_name, DbName}, - {doc_id, DocId}, - {options, Options}, - {session_id, SessionId}, - {start_seq, StartSeq}, - {source_seq, SourceSeq}, - {committed_seq, CommitedSeq}, - {current_through_seq, ThroughSeq}, - {highest_seq_done, HighestSeqDone} - ]. - - -startup_jitter() -> - Jitter = config:get_integer("replicator", "startup_jitter", - ?STARTUP_JITTER_DEFAULT), - couch_rand:uniform(erlang:max(1, Jitter)). - - -headers_strip_creds([], Acc) -> - lists:reverse(Acc); -headers_strip_creds([{Key, Value0} | Rest], Acc) -> - Value = case string:to_lower(Key) of - "authorization" -> - "****"; - _ -> - Value0 - end, - headers_strip_creds(Rest, [{Key, Value} | Acc]). - - -httpdb_strip_creds(#httpdb{url = Url, headers = Headers} = HttpDb) -> - HttpDb#httpdb{ - url = couch_util:url_strip_password(Url), - headers = headers_strip_creds(Headers, []) - }; -httpdb_strip_creds(LocalDb) -> - LocalDb. - - -rep_strip_creds(#rep{source = Source, target = Target} = Rep) -> - Rep#rep{ - source = httpdb_strip_creds(Source), - target = httpdb_strip_creds(Target) - }. - - -state_strip_creds(#rep_state{rep_details = Rep, source = Source, target = Target} = State) -> - % #rep_state contains the source and target at the top level and also - % in the nested #rep_details record - State#rep_state{ - rep_details = rep_strip_creds(Rep), - source = httpdb_strip_creds(Source), - target = httpdb_strip_creds(Target) - }. - - -adjust_maxconn(Src = #httpdb{http_connections = 1}, RepId) -> - Msg = "Adjusting minimum number of HTTP source connections to 2 for ~p", - couch_log:notice(Msg, [RepId]), - Src#httpdb{http_connections = 2}; -adjust_maxconn(Src, _RepId) -> - Src. - - --spec doc_update_triggered(#rep{}) -> ok. -doc_update_triggered(#rep{db_name = null}) -> - ok; -doc_update_triggered(#rep{id = RepId, doc_id = DocId} = Rep) -> - case couch_replicator_doc_processor:update_docs() of - true -> - couch_replicator_docs:update_triggered(Rep, RepId); - false -> - ok - end, - couch_log:notice("Document `~s` triggered replication `~s`", - [DocId, pp_rep_id(RepId)]), - ok. - - --spec doc_update_completed(#rep{}, list()) -> ok. -doc_update_completed(#rep{db_name = null}, _Stats) -> - ok; -doc_update_completed(#rep{id = RepId, doc_id = DocId, db_name = DbName, - start_time = StartTime}, Stats0) -> - Stats = Stats0 ++ [{start_time, couch_replicator_utils:iso8601(StartTime)}], - couch_replicator_docs:update_doc_completed(DbName, DocId, Stats), - couch_log:notice("Replication `~s` completed (triggered by `~s`)", - [pp_rep_id(RepId), DocId]), - ok. - - -do_last_checkpoint(#rep_state{seqs_in_progress = [], - highest_seq_done = {_Ts, ?LOWEST_SEQ}} = State) -> - {stop, normal, cancel_timer(State)}; -do_last_checkpoint(#rep_state{seqs_in_progress = [], - highest_seq_done = Seq} = State) -> - case do_checkpoint(State#rep_state{current_through_seq = Seq}) of - {ok, NewState} -> - couch_stats:increment_counter([couch_replicator, checkpoints, success]), - {stop, normal, cancel_timer(NewState)}; - Error -> - couch_stats:increment_counter([couch_replicator, checkpoints, failure]), - {stop, Error, State} - end. - - -start_timer(State) -> - After = State#rep_state.checkpoint_interval, - case timer:apply_after(After, gen_server, cast, [self(), checkpoint]) of - {ok, Ref} -> - Ref; - Error -> - couch_log:error("Replicator, error scheduling checkpoint: ~p", [Error]), - nil - end. - - -cancel_timer(#rep_state{timer = nil} = State) -> - State; -cancel_timer(#rep_state{timer = Timer} = State) -> - {ok, cancel} = timer:cancel(Timer), - State#rep_state{timer = nil}. - - -init_state(Rep) -> - #rep{ - id = {BaseId, _Ext}, - source = Src0, target = Tgt, - options = Options, - type = Type, view = View, - start_time = StartTime, - stats = ArgStats0 - } = Rep, - % Adjust minimum number of http source connections to 2 to avoid deadlock - Src = adjust_maxconn(Src0, BaseId), - {ok, Source} = couch_replicator_api_wrap:db_open(Src), - {CreateTargetParams} = get_value(create_target_params, Options, {[]}), - {ok, Target} = couch_replicator_api_wrap:db_open(Tgt, - get_value(create_target, Options, false), CreateTargetParams), - - {ok, SourceInfo} = couch_replicator_api_wrap:get_db_info(Source), - {ok, TargetInfo} = couch_replicator_api_wrap:get_db_info(Target), - - [SourceLog, TargetLog] = find_and_migrate_logs([Source, Target], Rep), - - {StartSeq0, History} = compare_replication_logs(SourceLog, TargetLog), - - ArgStats1 = couch_replicator_stats:new(ArgStats0), - HistoryStats = case History of - [{[_ | _] = HProps} | _] -> couch_replicator_stats:new(HProps); - _ -> couch_replicator_stats:new() - end, - Stats = couch_replicator_stats:max_stats(ArgStats1, HistoryStats), - - StartSeq1 = get_value(since_seq, Options, StartSeq0), - StartSeq = {0, StartSeq1}, - - SourceSeq = get_value(<<"update_seq">>, SourceInfo, ?LOWEST_SEQ), - - #doc{body={CheckpointHistory}} = SourceLog, - State = #rep_state{ - rep_details = Rep, - source_name = couch_replicator_api_wrap:db_uri(Source), - target_name = couch_replicator_api_wrap:db_uri(Target), - source = Source, - target = Target, - history = History, - checkpoint_history = {[{<<"no_changes">>, true}| CheckpointHistory]}, - start_seq = StartSeq, - current_through_seq = StartSeq, - committed_seq = StartSeq, - source_log = SourceLog, - target_log = TargetLog, - rep_starttime = StartTime, - src_starttime = get_value(<<"instance_start_time">>, SourceInfo), - tgt_starttime = get_value(<<"instance_start_time">>, TargetInfo), - session_id = couch_uuids:random(), - source_seq = SourceSeq, - use_checkpoints = get_value(use_checkpoints, Options, true), - checkpoint_interval = get_value(checkpoint_interval, Options, - ?DEFAULT_CHECKPOINT_INTERVAL), - type = Type, - view = View, - stats = Stats - }, - State#rep_state{timer = start_timer(State)}. - - -find_and_migrate_logs(DbList, #rep{id = {BaseId, _}} = Rep) -> - LogId = ?l2b(?LOCAL_DOC_PREFIX ++ BaseId), - fold_replication_logs(DbList, ?REP_ID_VERSION, LogId, LogId, Rep, []). - - -fold_replication_logs([], _Vsn, _LogId, _NewId, _Rep, Acc) -> - lists:reverse(Acc); - -fold_replication_logs([Db | Rest] = Dbs, Vsn, LogId, NewId, Rep, Acc) -> - case couch_replicator_api_wrap:open_doc(Db, LogId, [ejson_body]) of - {error, <<"not_found">>} when Vsn > 1 -> - OldRepId = couch_replicator_utils:replication_id(Rep, Vsn - 1), - fold_replication_logs(Dbs, Vsn - 1, - ?l2b(?LOCAL_DOC_PREFIX ++ OldRepId), NewId, Rep, Acc); - {error, <<"not_found">>} -> - fold_replication_logs( - Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [#doc{id = NewId} | Acc]); - {ok, Doc} when LogId =:= NewId -> - fold_replication_logs( - Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [Doc | Acc]); - {ok, Doc} -> - MigratedLog = #doc{id = NewId, body = Doc#doc.body}, - maybe_save_migrated_log(Rep, Db, MigratedLog, Doc#doc.id), - fold_replication_logs( - Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [MigratedLog | Acc]) - end. - - -maybe_save_migrated_log(Rep, Db, #doc{} = Doc, OldId) -> - case get_value(use_checkpoints, Rep#rep.options, true) of - true -> - update_checkpoint(Db, Doc), - Msg = "Migrated replication checkpoint. Db:~p ~p -> ~p", - couch_log:notice(Msg, [httpdb_strip_creds(Db), OldId, Doc#doc.id]); - false -> - ok - end. - - -spawn_changes_manager(Parent, ChangesQueue, BatchSize) -> - spawn_link(fun() -> - changes_manager_loop_open(Parent, ChangesQueue, BatchSize, 1) - end). - - -changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts) -> - receive - {get_changes, From} -> - case couch_work_queue:dequeue(ChangesQueue, BatchSize) of - closed -> - From ! {closed, self()}; - {ok, ChangesOrLastSeqs} -> - ReportSeq = case lists:last(ChangesOrLastSeqs) of - {last_seq, Seq} -> - {Ts, Seq}; - #doc_info{high_seq = Seq} -> - {Ts, Seq} - end, - Changes = lists:filter( - fun(#doc_info{}) -> - true; - ({last_seq, _Seq}) -> - false - end, ChangesOrLastSeqs), - ok = gen_server:cast(Parent, {report_seq, ReportSeq}), - From ! {changes, self(), Changes, ReportSeq} - end, - changes_manager_loop_open(Parent, ChangesQueue, BatchSize, Ts + 1) - end. - - -do_checkpoint(#rep_state{use_checkpoints=false} = State) -> - NewState = State#rep_state{checkpoint_history = {[{<<"use_checkpoints">>, false}]} }, - {ok, NewState}; -do_checkpoint(#rep_state{current_through_seq=Seq, committed_seq=Seq} = State) -> - update_task(State), - {ok, State}; -do_checkpoint(State) -> - #rep_state{ - source_name=SourceName, - target_name=TargetName, - source = Source, - target = Target, - history = OldHistory, - start_seq = {_, StartSeq}, - current_through_seq = {_Ts, NewSeq} = NewTsSeq, - source_log = SourceLog, - target_log = TargetLog, - rep_starttime = ReplicationStartTime, - src_starttime = SrcInstanceStartTime, - tgt_starttime = TgtInstanceStartTime, - stats = Stats, - rep_details = #rep{options = Options}, - session_id = SessionId - } = State, - case commit_to_both(Source, Target) of - {source_error, Reason} -> - {checkpoint_commit_failure, - <<"Failure on source commit: ", (to_binary(Reason))/binary>>}; - {target_error, Reason} -> - {checkpoint_commit_failure, - <<"Failure on target commit: ", (to_binary(Reason))/binary>>}; - {SrcInstanceStartTime, TgtInstanceStartTime} -> - couch_log:notice("recording a checkpoint for `~s` -> `~s` at source update_seq ~p", - [SourceName, TargetName, NewSeq]), - LocalStartTime = calendar:now_to_local_time(ReplicationStartTime), - StartTime = ?l2b(httpd_util:rfc1123_date(LocalStartTime)), - EndTime = ?l2b(httpd_util:rfc1123_date()), - NewHistoryEntry = {[ - {<<"session_id">>, SessionId}, - {<<"start_time">>, StartTime}, - {<<"end_time">>, EndTime}, - {<<"start_last_seq">>, StartSeq}, - {<<"end_last_seq">>, NewSeq}, - {<<"recorded_seq">>, NewSeq}, - {<<"missing_checked">>, couch_replicator_stats:missing_checked(Stats)}, - {<<"missing_found">>, couch_replicator_stats:missing_found(Stats)}, - {<<"docs_read">>, couch_replicator_stats:docs_read(Stats)}, - {<<"docs_written">>, couch_replicator_stats:docs_written(Stats)}, - {<<"doc_write_failures">>, couch_replicator_stats:doc_write_failures(Stats)} - ]}, - BaseHistory = [ - {<<"session_id">>, SessionId}, - {<<"source_last_seq">>, NewSeq}, - {<<"replication_id_version">>, ?REP_ID_VERSION} - ] ++ case get_value(doc_ids, Options) of - undefined -> - []; - _DocIds -> - % backwards compatibility with the result of a replication by - % doc IDs in versions 0.11.x and 1.0.x - % TODO: deprecate (use same history format, simplify code) - [ - {<<"start_time">>, StartTime}, - {<<"end_time">>, EndTime}, - {<<"docs_read">>, couch_replicator_stats:docs_read(Stats)}, - {<<"docs_written">>, couch_replicator_stats:docs_written(Stats)}, - {<<"doc_write_failures">>, couch_replicator_stats:doc_write_failures(Stats)} - ] - end, - % limit history to 50 entries - NewRepHistory = { - BaseHistory ++ - [{<<"history">>, lists:sublist([NewHistoryEntry | OldHistory], 50)}] - }, - - try - {SrcRevPos, SrcRevId} = update_checkpoint( - Source, SourceLog#doc{body = NewRepHistory}, source), - {TgtRevPos, TgtRevId} = update_checkpoint( - Target, TargetLog#doc{body = NewRepHistory}, target), - NewState = State#rep_state{ - checkpoint_history = NewRepHistory, - committed_seq = NewTsSeq, - source_log = SourceLog#doc{revs={SrcRevPos, [SrcRevId]}}, - target_log = TargetLog#doc{revs={TgtRevPos, [TgtRevId]}} - }, - update_task(NewState), - {ok, NewState} - catch throw:{checkpoint_commit_failure, _} = Failure -> - Failure - end; - {SrcInstanceStartTime, _NewTgtInstanceStartTime} -> - {checkpoint_commit_failure, <<"Target database out of sync. " - "Try to increase max_dbs_open at the target's server.">>}; - {_NewSrcInstanceStartTime, TgtInstanceStartTime} -> - {checkpoint_commit_failure, <<"Source database out of sync. " - "Try to increase max_dbs_open at the source's server.">>}; - {_NewSrcInstanceStartTime, _NewTgtInstanceStartTime} -> - {checkpoint_commit_failure, <<"Source and target databases out of " - "sync. Try to increase max_dbs_open at both servers.">>} - end. - - -update_checkpoint(Db, Doc, DbType) -> - try - update_checkpoint(Db, Doc) - catch throw:{checkpoint_commit_failure, Reason} -> - throw({checkpoint_commit_failure, - <<"Error updating the ", (to_binary(DbType))/binary, - " checkpoint document: ", (to_binary(Reason))/binary>>}) - end. - - -update_checkpoint(Db, #doc{id = LogId, body = LogBody} = Doc) -> - try - case couch_replicator_api_wrap:update_doc(Db, Doc, [delay_commit]) of - {ok, PosRevId} -> - PosRevId; - {error, Reason} -> - throw({checkpoint_commit_failure, Reason}) - end - catch throw:conflict -> - case (catch couch_replicator_api_wrap:open_doc(Db, LogId, [ejson_body])) of - {ok, #doc{body = LogBody, revs = {Pos, [RevId | _]}}} -> - % This means that we were able to update successfully the - % checkpoint doc in a previous attempt but we got a connection - % error (timeout for e.g.) before receiving the success response. - % Therefore the request was retried and we got a conflict, as the - % revision we sent is not the current one. - % We confirm this by verifying the doc body we just got is the same - % that we have just sent. - {Pos, RevId}; - _ -> - throw({checkpoint_commit_failure, conflict}) - end - end. - - -commit_to_both(Source, Target) -> - % commit the src async - ParentPid = self(), - SrcCommitPid = spawn_link( - fun() -> - Result = (catch couch_replicator_api_wrap:ensure_full_commit(Source)), - ParentPid ! {self(), Result} - end), - - % commit tgt sync - TargetResult = (catch couch_replicator_api_wrap:ensure_full_commit(Target)), - - SourceResult = receive - {SrcCommitPid, Result} -> - unlink(SrcCommitPid), - receive {'EXIT', SrcCommitPid, _} -> ok after 0 -> ok end, - Result; - {'EXIT', SrcCommitPid, Reason} -> - {error, Reason} - end, - case TargetResult of - {ok, TargetStartTime} -> - case SourceResult of - {ok, SourceStartTime} -> - {SourceStartTime, TargetStartTime}; - SourceError -> - {source_error, SourceError} - end; - TargetError -> - {target_error, TargetError} - end. - - -compare_replication_logs(SrcDoc, TgtDoc) -> - #doc{body={RepRecProps}} = SrcDoc, - #doc{body={RepRecPropsTgt}} = TgtDoc, - case get_value(<<"session_id">>, RepRecProps) == - get_value(<<"session_id">>, RepRecPropsTgt) of - true -> - % if the records have the same session id, - % then we have a valid replication history - OldSeqNum = get_value(<<"source_last_seq">>, RepRecProps, ?LOWEST_SEQ), - OldHistory = get_value(<<"history">>, RepRecProps, []), - {OldSeqNum, OldHistory}; - false -> - SourceHistory = get_value(<<"history">>, RepRecProps, []), - TargetHistory = get_value(<<"history">>, RepRecPropsTgt, []), - couch_log:notice("Replication records differ. " - "Scanning histories to find a common ancestor.", []), - couch_log:debug("Record on source:~p~nRecord on target:~p~n", - [RepRecProps, RepRecPropsTgt]), - compare_rep_history(SourceHistory, TargetHistory) - end. - - -compare_rep_history(S, T) when S =:= [] orelse T =:= [] -> - couch_log:notice("no common ancestry -- performing full replication", []), - {?LOWEST_SEQ, []}; -compare_rep_history([{S} | SourceRest], [{T} | TargetRest] = Target) -> - SourceId = get_value(<<"session_id">>, S), - case has_session_id(SourceId, Target) of - true -> - RecordSeqNum = get_value(<<"recorded_seq">>, S, ?LOWEST_SEQ), - couch_log:notice("found a common replication record with source_seq ~p", - [RecordSeqNum]), - {RecordSeqNum, SourceRest}; - false -> - TargetId = get_value(<<"session_id">>, T), - case has_session_id(TargetId, SourceRest) of - true -> - RecordSeqNum = get_value(<<"recorded_seq">>, T, ?LOWEST_SEQ), - couch_log:notice("found a common replication record with source_seq ~p", - [RecordSeqNum]), - {RecordSeqNum, TargetRest}; - false -> - compare_rep_history(SourceRest, TargetRest) - end - end. - - -has_session_id(_SessionId, []) -> - false; -has_session_id(SessionId, [{Props} | Rest]) -> - case get_value(<<"session_id">>, Props, nil) of - SessionId -> - true; - _Else -> - has_session_id(SessionId, Rest) - end. - - -get_pending_count(St) -> - Rep = St#rep_state.rep_details, - Timeout = get_value(connection_timeout, Rep#rep.options), - TimeoutMicro = Timeout * 1000, - case get(pending_count_state) of - {LastUpdate, PendingCount} -> - case timer:now_diff(os:timestamp(), LastUpdate) > TimeoutMicro of - true -> - NewPendingCount = get_pending_count_int(St), - put(pending_count_state, {os:timestamp(), NewPendingCount}), - NewPendingCount; - false -> - PendingCount - end; - undefined -> - NewPendingCount = get_pending_count_int(St), - put(pending_count_state, {os:timestamp(), NewPendingCount}), - NewPendingCount - end. - - -get_pending_count_int(#rep_state{source = #httpdb{} = Db0}=St) -> - {_, Seq} = St#rep_state.highest_seq_done, - Db = Db0#httpdb{retries = 3}, - case (catch couch_replicator_api_wrap:get_pending_count(Db, Seq)) of - {ok, Pending} -> - Pending; - _ -> - null - end; -get_pending_count_int(#rep_state{source = Db}=St) -> - {_, Seq} = St#rep_state.highest_seq_done, - {ok, Pending} = couch_replicator_api_wrap:get_pending_count(Db, Seq), - Pending. - - -update_task(State) -> - #rep_state{ - rep_details = #rep{id = JobId}, - current_through_seq = {_, ThroughSeq}, - highest_seq_done = {_, HighestSeq} - } = State, - Status = rep_stats(State) ++ [ - {source_seq, HighestSeq}, - {through_seq, ThroughSeq} - ], - couch_replicator_scheduler:update_job_stats(JobId, Status), - couch_task_status:update(Status). - - -rep_stats(State) -> - #rep_state{ - committed_seq = {_, CommittedSeq}, - stats = Stats - } = State, - [ - {revisions_checked, couch_replicator_stats:missing_checked(Stats)}, - {missing_revisions_found, couch_replicator_stats:missing_found(Stats)}, - {docs_read, couch_replicator_stats:docs_read(Stats)}, - {docs_written, couch_replicator_stats:docs_written(Stats)}, - {changes_pending, get_pending_count(State)}, - {doc_write_failures, couch_replicator_stats:doc_write_failures(Stats)}, - {checkpointed_source_seq, CommittedSeq} - ]. - - -replication_start_error({unauthorized, DbUri}) -> - {unauthorized, <<"unauthorized to access or create database ", DbUri/binary>>}; -replication_start_error({db_not_found, DbUri}) -> - {db_not_found, <<"could not open ", DbUri/binary>>}; -replication_start_error({http_request_failed, _Method, Url0, - {error, {error, {conn_failed, {error, nxdomain}}}}}) -> - Url = ?l2b(couch_util:url_strip_password(Url0)), - {nxdomain, <<"could not resolve ", Url/binary>>}; -replication_start_error({http_request_failed, Method0, Url0, - {error, {code, Code}}}) when is_integer(Code) -> - Url = ?l2b(couch_util:url_strip_password(Url0)), - Method = ?l2b(Method0), - {http_error_code, Code, <<Method/binary, " ", Url/binary>>}; -replication_start_error(Error) -> - Error. - - -log_replication_start(#rep_state{rep_details = Rep} = RepState) -> - #rep{ - id = {BaseId, Ext}, - doc_id = DocId, - db_name = DbName, - options = Options - } = Rep, - Id = BaseId ++ Ext, - Workers = get_value(worker_processes, Options), - BatchSize = get_value(worker_batch_size, Options), - #rep_state{ - source_name = Source, % credentials already stripped - target_name = Target, % credentials already stripped - session_id = Sid - } = RepState, - From = case DbName of - ShardName when is_binary(ShardName) -> - io_lib:format("from doc ~s:~s", [mem3:dbname(ShardName), DocId]); - _ -> - "from _replicate endpoint" - end, - Msg = "Starting replication ~s (~s -> ~s) ~s worker_procesess:~p" - " worker_batch_size:~p session_id:~s", - couch_log:notice(Msg, [Id, Source, Target, From, Workers, BatchSize, Sid]). - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - - -replication_start_error_test() -> - ?assertEqual({unauthorized, <<"unauthorized to access or create database" - " http://x/y">>}, replication_start_error({unauthorized, - <<"http://x/y">>})), - ?assertEqual({db_not_found, <<"could not open http://x/y">>}, - replication_start_error({db_not_found, <<"http://x/y">>})), - ?assertEqual({nxdomain,<<"could not resolve http://x/y">>}, - replication_start_error({http_request_failed, "GET", "http://x/y", - {error, {error, {conn_failed, {error, nxdomain}}}}})), - ?assertEqual({http_error_code,503,<<"GET http://x/y">>}, - replication_start_error({http_request_failed, "GET", "http://x/y", - {error, {code, 503}}})). - - -scheduler_job_format_status_test() -> - Source = <<"http://u:p@h1/d1">>, - Target = <<"http://u:p@h2/d2">>, - Rep = #rep{ - id = {"base", "+ext"}, - source = couch_replicator_docs:parse_rep_db(Source, [], []), - target = couch_replicator_docs:parse_rep_db(Target, [], []), - options = [{create_target, true}], - doc_id = <<"mydoc">>, - db_name = <<"mydb">> - }, - State = #rep_state{ - rep_details = Rep, - source = Rep#rep.source, - target = Rep#rep.target, - session_id = <<"a">>, - start_seq = <<"1">>, - source_seq = <<"2">>, - committed_seq = <<"3">>, - current_through_seq = <<"4">>, - highest_seq_done = <<"5">> - }, - Format = format_status(opts_ignored, [pdict, State]), - ?assertEqual("http://u:*****@h1/d1/", proplists:get_value(source, Format)), - ?assertEqual("http://u:*****@h2/d2/", proplists:get_value(target, Format)), - ?assertEqual({"base", "+ext"}, proplists:get_value(rep_id, Format)), - ?assertEqual([{create_target, true}], proplists:get_value(options, Format)), - ?assertEqual(<<"mydoc">>, proplists:get_value(doc_id, Format)), - ?assertEqual(<<"mydb">>, proplists:get_value(db_name, Format)), - ?assertEqual(<<"a">>, proplists:get_value(session_id, Format)), - ?assertEqual(<<"1">>, proplists:get_value(start_seq, Format)), - ?assertEqual(<<"2">>, proplists:get_value(source_seq, Format)), - ?assertEqual(<<"3">>, proplists:get_value(committed_seq, Format)), - ?assertEqual(<<"4">>, proplists:get_value(current_through_seq, Format)), - ?assertEqual(<<"5">>, proplists:get_value(highest_seq_done, Format)). - - --endif. diff --git a/src/couch_replicator/src/couch_replicator_scheduler_sup.erl b/src/couch_replicator/src/couch_replicator_scheduler_sup.erl deleted file mode 100644 index 8ab55f838..000000000 --- a/src/couch_replicator/src/couch_replicator_scheduler_sup.erl +++ /dev/null @@ -1,62 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_scheduler_sup). - --behaviour(supervisor). - -%% public api --export([ - start_link/0, - start_child/1, - terminate_child/1 -]). - -%% supervisor api --export([ - init/1 -]). - - -%% includes --include("couch_replicator.hrl"). - - -%% public functions - -start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). - - -start_child(#rep{} = Rep) -> - supervisor:start_child(?MODULE, [Rep]). - - -terminate_child(Pid) -> - supervisor:terminate_child(?MODULE, Pid). - -%% supervisor functions - -init(_Args) -> - Start = {couch_replicator_scheduler_job, start_link, []}, - Restart = temporary, % A crashed job is not entitled to immediate restart. - Shutdown = 5000, - Type = worker, - Modules = [couch_replicator_scheduler_job], - - RestartStrategy = simple_one_for_one, - MaxR = 10, - MaxT = 3, - - ChildSpec = - {undefined, Start, Restart, Shutdown, Type, Modules}, - {ok, {{RestartStrategy, MaxR, MaxT}, [ChildSpec]}}. diff --git a/src/couch_replicator/src/couch_replicator_stats.erl b/src/couch_replicator/src/couch_replicator_stats.erl index 37848b3ee..69e60a05c 100644 --- a/src/couch_replicator/src/couch_replicator_stats.erl +++ b/src/couch_replicator/src/couch_replicator_stats.erl @@ -32,6 +32,8 @@ new() -> orddict:new(). +new(#{} = Map) -> + new(maps:to_list(Map)); new(Initializers0) when is_list(Initializers0) -> Initializers1 = lists:filtermap(fun fmap/1, Initializers0), orddict:from_list(Initializers1). diff --git a/src/couch_replicator/src/couch_replicator_sup.erl b/src/couch_replicator/src/couch_replicator_sup.erl index cd4512c54..49d412aaa 100644 --- a/src/couch_replicator/src/couch_replicator_sup.erl +++ b/src/couch_replicator/src/couch_replicator_sup.erl @@ -12,61 +12,66 @@ % the License. -module(couch_replicator_sup). + + -behaviour(supervisor). --export([start_link/0, init/1]). + + +-export([ + start_link/0 +]). + +-export([ + init/1 +]). + start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). + Backend = fabric2_node_types:is_type(replication), + Frontend = fabric2_node_types:is_type(api_frontend), + Arg = {Backend, Frontend}, + supervisor:start_link({local, ?MODULE}, ?MODULE, Arg). + + +init({Backend, Frontend}) -> + Children = case {Backend, Frontend} of + {true, true} -> backend() ++ frontend(); + {true, false} -> backend(); + {false, true} -> frontend(); + {false, false} -> [] + end, + Flags = #{ + strategy => rest_for_one, + intensity => 1, + period => 5 + }, + {ok, {Flags, Children}}. + + +backend() -> + Timeout = 5000, + [ + #{ + id => couch_replicator_connection, + start => {couch_replicator_connection, start_link, []} + }, + #{ + id => couch_replicator_rate_limiter, + start => {couch_replicator_rate_limiter, start_link, []} + }, + #{ + id => couch_replicator_job_server, + start => {couch_replicator_job_server, start_link, [Timeout]}, + shutdown => Timeout + } + ]. + -init(_Args) -> - Children = [ - {couch_replication_event, - {gen_event, start_link, [{local, couch_replication}]}, - permanent, - brutal_kill, - worker, - dynamic}, - {couch_replicator_clustering, - {couch_replicator_clustering, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_clustering]}, - {couch_replicator_connection, - {couch_replicator_connection, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_connection]}, - {couch_replicator_rate_limiter, - {couch_replicator_rate_limiter, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_rate_limiter]}, - {couch_replicator_scheduler_sup, - {couch_replicator_scheduler_sup, start_link, []}, - permanent, - infinity, - supervisor, - [couch_replicator_scheduler_sup]}, - {couch_replicator_scheduler, - {couch_replicator_scheduler, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_scheduler]}, - {couch_replicator_doc_processor, - {couch_replicator_doc_processor, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_replicator_doc_processor]}, - {couch_replicator_db_changes, - {couch_replicator_db_changes, start_link, []}, - permanent, - brutal_kill, - worker, - [couch_multidb_changes]} - ], - {ok, {{rest_for_one,10,1}, Children}}. +frontend() -> + [ + #{ + id => couch_replicator, + start => {couch_replicator, ensure_rep_db_exists, []}, + restart => transient + } + ] ++ couch_epi:register_service(couch_replicator_epi, []). diff --git a/src/couch_replicator/src/couch_replicator_utils.erl b/src/couch_replicator/src/couch_replicator_utils.erl index 5f608dee7..523de5f54 100644 --- a/src/couch_replicator/src/couch_replicator_utils.erl +++ b/src/couch_replicator/src/couch_replicator_utils.erl @@ -13,19 +13,18 @@ -module(couch_replicator_utils). -export([ - parse_rep_doc/2, - replication_id/2, - sum_stats/2, - is_deleted/1, rep_error_to_binary/1, - get_json_value/2, - get_json_value/3, - pp_rep_id/1, + iso8601/0, iso8601/1, - filter_state/3, + rfc1123_local/0, + rfc1123_local/1, remove_basic_auth_from_headers/1, normalize_rep/1, - ejson_state_info/1 + compare_reps/2, + default_headers_map/0, + parse_replication_states/1, + parse_int_param/5, + proplist_options/1 ]). @@ -33,11 +32,6 @@ -include("couch_replicator.hrl"). -include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). --import(couch_util, [ - get_value/2, - get_value/3 -]). - rep_error_to_binary(Error) -> couch_util:to_binary(error_reason(Error)). @@ -54,77 +48,27 @@ error_reason(Reason) -> Reason. -get_json_value(Key, Props) -> - get_json_value(Key, Props, undefined). - -get_json_value(Key, Props, Default) when is_atom(Key) -> - Ref = make_ref(), - case get_value(Key, Props, Ref) of - Ref -> - get_value(?l2b(atom_to_list(Key)), Props, Default); - Else -> - Else - end; -get_json_value(Key, Props, Default) when is_binary(Key) -> - Ref = make_ref(), - case get_value(Key, Props, Ref) of - Ref -> - get_value(list_to_atom(?b2l(Key)), Props, Default); - Else -> - Else - end. - - -% pretty-print replication id --spec pp_rep_id(#rep{} | rep_id()) -> string(). -pp_rep_id(#rep{id = RepId}) -> - pp_rep_id(RepId); -pp_rep_id({Base, Extension}) -> - Base ++ Extension. - - -% NV: TODO: this function is not used outside api wrap module -% consider moving it there during final cleanup -is_deleted(Change) -> - get_json_value(<<"deleted">>, Change, false). - - -% NV: TODO: proxy some functions which used to be here, later remove -% these and replace calls to their respective modules -replication_id(Rep, Version) -> - couch_replicator_ids:replication_id(Rep, Version). +-spec iso8601() -> binary(). +iso8601() -> + iso8601(erlang:system_time(second)). -sum_stats(S1, S2) -> - couch_replicator_stats:sum_stats(S1, S2). - - -parse_rep_doc(Props, UserCtx) -> - couch_replicator_docs:parse_rep_doc(Props, UserCtx). - - --spec iso8601(erlang:timestamp()) -> binary(). -iso8601({_Mega, _Sec, _Micro} = Timestamp) -> - {{Y, Mon, D}, {H, Min, S}} = calendar:now_to_universal_time(Timestamp), +-spec iso8601(integer()) -> binary(). +iso8601(Sec) when is_integer(Sec) -> + Time = unix_sec_to_timestamp(Sec), + {{Y, Mon, D}, {H, Min, S}} = calendar:now_to_universal_time(Time), Format = "~B-~2..0B-~2..0BT~2..0B:~2..0B:~2..0BZ", iolist_to_binary(io_lib:format(Format, [Y, Mon, D, H, Min, S])). -%% Filter replication info ejson by state provided. If it matches return -%% the input value, if it doesn't return 'skip'. This is used from replicator -%% fabric coordinator and worker. --spec filter_state(atom(), [atom()], {[_ | _]}) -> {[_ | _]} | skip. -filter_state(null = _State, _States, _Info) -> - skip; -filter_state(_ = _State, [] = _States, Info) -> - Info; -filter_state(State, States, Info) -> - case lists:member(State, States) of - true -> - Info; - false -> - skip - end. +rfc1123_local() -> + list_to_binary(httpd_util:rfc1123_date()). + + +rfc1123_local(Sec) -> + Time = unix_sec_to_timestamp(Sec), + Local = calendar:now_to_local_time(Time), + list_to_binary(httpd_util:rfc1123_date(Local)). remove_basic_auth_from_headers(Headers) -> @@ -158,37 +102,101 @@ decode_basic_creds(Base64) -> end. -% Normalize a #rep{} record such that it doesn't contain time dependent fields +-spec compare_reps(#{} | null, #{} | null) -> boolean(). +compare_reps(Rep1, Rep2) -> + NormRep1 = normalize_rep(Rep1), + NormRep2 = normalize_rep(Rep2), + NormRep1 =:= NormRep2. + + +% Normalize a rep map such that it doesn't contain time dependent fields % pids (like httpc pools), and options / props are sorted. This function would % used during comparisons. --spec normalize_rep(#rep{} | nil) -> #rep{} | nil. -normalize_rep(nil) -> - nil; - -normalize_rep(#rep{} = Rep)-> - #rep{ - source = couch_replicator_api_wrap:normalize_db(Rep#rep.source), - target = couch_replicator_api_wrap:normalize_db(Rep#rep.target), - options = Rep#rep.options, % already sorted in make_options/1 - type = Rep#rep.type, - view = Rep#rep.view, - doc_id = Rep#rep.doc_id, - db_name = Rep#rep.db_name +-spec normalize_rep(#{} | null) -> #{} | null. +normalize_rep(null) -> + null; + +normalize_rep(#{} = Rep)-> + #{ + ?SOURCE := Source, + ?TARGET := Target, + ?OPTIONS := Options + } = Rep, + #{ + ?SOURCE => normalize_endpoint(Source), + ?TARGET => normalize_endpoint(Target), + ?OPTIONS => Options }. --spec ejson_state_info(binary() | nil) -> binary() | null. -ejson_state_info(nil) -> - null; -ejson_state_info(Info) when is_binary(Info) -> - {[{<<"error">>, Info}]}; -ejson_state_info([]) -> - null; % Status not set yet => null for compatibility reasons -ejson_state_info([{_, _} | _] = Info) -> - {Info}; -ejson_state_info(Info) -> - ErrMsg = couch_replicator_utils:rep_error_to_binary(Info), - {[{<<"error">>, ErrMsg}]}. +normalize_endpoint(<<DbName/binary>>) -> + DbName; + +normalize_endpoint(#{} = Endpoint) -> + Ks = [ + <<"url">>, + <<"auth_props">>, + <<"headers">>, + <<"timeout">>, + <<"ibrowse_options">>, + <<"retries">>, + <<"http_connections">>, + <<"proxy_url">> + ], + maps:with(Ks, Endpoint). + + +default_headers_map() -> + lists:foldl(fun({K, V}, Acc) -> + Acc#{list_to_binary(K) => list_to_binary(V)} + end, #{}, (#httpdb{})#httpdb.headers). + + +parse_replication_states(undefined) -> + []; % This is the default (wildcard) filter + +parse_replication_states(States) when is_list(States) -> + All = [?ST_RUNNING, ?ST_FAILED, ?ST_COMPLETED, ?ST_PENDING, ?ST_CRASHING], + AllSet = sets:from_list(All), + BinStates = [?l2b(string:to_lower(S)) || S <- string:tokens(States, ",")], + StatesSet = sets:from_list(BinStates), + Diff = sets:to_list(sets:subtract(StatesSet, AllSet)), + case Diff of + [] -> + BinStates; + _ -> + Args = [Diff, All], + Msg2 = io_lib:format("Unknown states ~p. Choose from: ~p", Args), + throw({query_parse_error, ?l2b(Msg2)}) + end. + + +parse_int_param(Req, Param, Default, Min, Max) -> + IntVal = try + list_to_integer(chttpd:qs_value(Req, Param, integer_to_list(Default))) + catch error:badarg -> + Msg1 = io_lib:format("~s must be an integer", [Param]), + throw({query_parse_error, ?l2b(Msg1)}) + end, + case IntVal >= Min andalso IntVal =< Max of + true -> + IntVal; + false -> + Msg2 = io_lib:format("~s not in range of [~w,~w]", [Param, Min, Max]), + throw({query_parse_error, ?l2b(Msg2)}) + end. + + +proplist_options(#{} = OptionsMap) -> + maps:fold(fun(K, V, Acc) -> + [{binary_to_atom(K, utf8), V} | Acc] + end, [], OptionsMap). + + +unix_sec_to_timestamp(Sec) when is_integer(Sec) -> + MegaSecPart = Sec div 1000000, + SecPart = Sec - MegaSecPart * 1000000, + {MegaSecPart, SecPart, 0}. -ifdef(TEST). @@ -256,7 +264,7 @@ normalize_rep_test_() -> {<<"doc_ids">>, [<<"a">>, <<"c">>, <<"b">>]}, {<<"other_field">>, <<"some_value">>} ]}, - Rep1 = couch_replicator_docs:parse_rep_doc_without_id(EJson1), + Rep1 = couch_replicator_parse:parse_rep_doc(EJson1), EJson2 = {[ {<<"other_field">>, <<"unrelated">>}, {<<"target">>, <<"http://target.local/db">>}, @@ -264,9 +272,31 @@ normalize_rep_test_() -> {<<"doc_ids">>, [<<"c">>, <<"a">>, <<"b">>]}, {<<"other_field2">>, <<"unrelated2">>} ]}, - Rep2 = couch_replicator_docs:parse_rep_doc_without_id(EJson2), + Rep2 = couch_replicator_parse:parse_rep_doc(EJson2), ?assertEqual(normalize_rep(Rep1), normalize_rep(Rep2)) end) }. + +normalize_endpoint() -> + HttpDb = #httpdb{ + url = "http://host/db", + auth_props = #{ + "key" => "val", + "nested" => #{<<"other_key">> => "other_val"} + }, + headers = [{"k2","v2"}, {"k1","v1"}], + timeout = 30000, + ibrowse_options = [{k2, v2}, {k1, v1}], + retries = 10, + http_connections = 20 + }, + Expected = HttpDb#httpdb{ + headers = [{"k1","v1"}, {"k2","v2"}], + ibrowse_options = [{k1, v1}, {k2, v2}] + }, + ?assertEqual(Expected, normalize_endpoint(HttpDb)), + ?assertEqual(<<"local">>, normalize_endpoint(<<"local">>)). + + -endif. diff --git a/src/couch_replicator/src/couch_replicator_worker.erl b/src/couch_replicator/src/couch_replicator_worker.erl index eb8beaaa9..4cd984c1a 100644 --- a/src/couch_replicator/src/couch_replicator_worker.erl +++ b/src/couch_replicator/src/couch_replicator_worker.erl @@ -103,7 +103,7 @@ handle_call({batch_doc, Doc}, From, State) -> handle_call({add_stats, IncStats}, From, #state{stats = Stats} = State) -> gen_server:reply(From, ok), - NewStats = couch_replicator_utils:sum_stats(Stats, IncStats), + NewStats = couch_replicator_stats:sum_stats(Stats, IncStats), NewStats2 = maybe_report_stats(State#state.cp, NewStats), {noreply, State#state{stats = NewStats2}}; diff --git a/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl b/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl index ac4bb84f3..0e7e0ea5a 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_attachments_too_large.erl @@ -12,72 +12,60 @@ -module(couch_replicator_attachments_too_large). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - - -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_doc_with_attachment(Source, <<"doc">>, 1000), - Target = create_db(), - {Ctx, {Source, Target}}. - - -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - config:delete("couchdb", "max_attachment_size"), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). attachment_too_large_replication_test_() -> - Pairs = [{remote, remote}], { - "Attachment size too large replication tests", + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] ++ - [{Pair, fun should_fail/2} || Pair <- Pairs] + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_should_succeed), + ?TDEF_FE(t_should_fail) + ] } }. -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("couchdb", "max_attachment_size", "1000", _Persist = false), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_doc_with_attachment(Source, <<"doc">>, 1000), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -should_fail({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("couchdb", "max_attachment_size", "999", _Persist = false), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertError({badmatch, {not_found, missing}}, - couch_replicator_test_helper:compare_dbs(Source, Target)). +teardown({Source, Target}) -> + config:delete("couchdb", "max_attachment_size", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. +t_should_succeed({Source, Target}) -> + config:set("couchdb", "max_attachment_size", "1000", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). + + +t_should_fail({Source, Target}) -> + config:set("couchdb", "max_attachment_size", "999", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ExceptIds = [<<"doc">>], + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, + Target, ExceptIds)). create_doc_with_attachment(DbName, DocId, AttSize) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), Doc = #doc{id = DocId, atts = att(AttSize)}, - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db), + couch_replicator_test_helper:create_docs(DbName, [Doc]), ok. @@ -90,13 +78,3 @@ att(Size) when is_integer(Size), Size >= 1 -> << <<"x">> || _ <- lists:seq(1, Size) >> end} ])]. - - -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - - -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). diff --git a/src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl deleted file mode 100644 index 997c84863..000000000 --- a/src/couch_replicator/test/eunit/couch_replicator_compact_tests.erl +++ /dev/null @@ -1,455 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_compact_tests). - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/src/couch_replicator.hrl"). - --import(couch_replicator_test_helper, [ - db_url/1, - get_pid/1 -]). - --define(ATTFILE, filename:join([?FIXTURESDIR, "logo.png"])). --define(DELAY, 500). --define(TIMEOUT, 360000). --define(TIMEOUT_WRITER, 100000). --define(TIMEOUT_EUNIT, ?TIMEOUT div 1000 + 70). --define(WRITE_BATCH_SIZE, 25). - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - -compact_test_() -> - Pairs = [{remote, remote}], - { - "Compaction during replication tests", - { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_populate_replicate_compact/2} - || Pair <- Pairs] - } - }. - - -should_populate_replicate_compact({From, To}, {_Ctx, {Source, Target}}) -> - {ok, RepPid, RepId} = replicate(Source, Target), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_run_replication(RepPid, RepId, Source, Target), - should_all_processes_be_alive(RepPid, Source, Target), - should_populate_and_compact(RepPid, Source, Target, 50, 3), - should_wait_target_in_sync(Source, Target), - should_ensure_replication_still_running(RepPid, RepId, Source, Target), - should_cancel_replication(RepId, RepPid), - should_compare_databases(Source, Target) - ]}}. - -should_all_processes_be_alive(RepPid, Source, Target) -> - ?_test(begin - {ok, SourceDb} = reopen_db(Source), - {ok, TargetDb} = reopen_db(Target), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(SourceDb))), - ?assert(is_process_alive(couch_db:get_pid(TargetDb))) - end). - -should_run_replication(RepPid, RepId, Source, Target) -> - ?_test(check_active_tasks(RepPid, RepId, Source, Target)). - -should_ensure_replication_still_running(RepPid, RepId, Source, Target) -> - ?_test(check_active_tasks(RepPid, RepId, Source, Target)). - -check_active_tasks(RepPid, {BaseId, Ext} = _RepId, Src, Tgt) -> - Source = case Src of - {remote, NameSrc} -> - <<(db_url(NameSrc))/binary, $/>>; - _ -> - Src - end, - Target = case Tgt of - {remote, NameTgt} -> - <<(db_url(NameTgt))/binary, $/>>; - _ -> - Tgt - end, - FullRepId = ?l2b(BaseId ++ Ext), - Pid = ?l2b(pid_to_list(RepPid)), - RepTasks = wait_for_task_status(), - ?assertNotEqual(timeout, RepTasks), - [RepTask] = RepTasks, - ?assertEqual(Pid, couch_util:get_value(pid, RepTask)), - ?assertEqual(FullRepId, couch_util:get_value(replication_id, RepTask)), - ?assertEqual(true, couch_util:get_value(continuous, RepTask)), - ?assertEqual(Source, couch_util:get_value(source, RepTask)), - ?assertEqual(Target, couch_util:get_value(target, RepTask)), - ?assert(is_integer(couch_util:get_value(docs_read, RepTask))), - ?assert(is_integer(couch_util:get_value(docs_written, RepTask))), - ?assert(is_integer(couch_util:get_value(doc_write_failures, RepTask))), - ?assert(is_integer(couch_util:get_value(revisions_checked, RepTask))), - ?assert(is_integer(couch_util:get_value(missing_revisions_found, RepTask))), - ?assert(is_integer(couch_util:get_value(checkpointed_source_seq, RepTask))), - ?assert(is_integer(couch_util:get_value(source_seq, RepTask))), - Pending = couch_util:get_value(changes_pending, RepTask), - ?assert(is_integer(Pending)). - -replication_tasks() -> - lists:filter(fun(P) -> - couch_util:get_value(type, P) =:= replication - end, couch_task_status:all()). - - -wait_for_task_status() -> - test_util:wait(fun() -> - case replication_tasks() of - [] -> - wait; - Tasks -> - Tasks - end - end). - -should_cancel_replication(RepId, RepPid) -> - ?_assertNot(begin - ok = couch_replicator_scheduler:remove_job(RepId), - is_process_alive(RepPid) - end). - -should_populate_and_compact(RepPid, Source, Target, BatchSize, Rounds) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(begin - {ok, SourceDb0} = reopen_db(Source), - Writer = spawn_writer(SourceDb0), - lists:foreach( - fun(N) -> - {ok, SourceDb} = reopen_db(Source), - {ok, TargetDb} = reopen_db(Target), - pause_writer(Writer), - - compact_db("source", SourceDb), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(SourceDb))), - wait_for_compaction("source", SourceDb), - - compact_db("target", TargetDb), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(TargetDb))), - wait_for_compaction("target", TargetDb), - - {ok, SourceDb2} = reopen_db(SourceDb), - {ok, TargetDb2} = reopen_db(TargetDb), - - resume_writer(Writer), - wait_writer(Writer, BatchSize * N), - - compact_db("source", SourceDb2), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(SourceDb2))), - pause_writer(Writer), - wait_for_compaction("source", SourceDb2), - resume_writer(Writer), - - compact_db("target", TargetDb2), - ?assert(is_process_alive(RepPid)), - ?assert(is_process_alive(couch_db:get_pid(TargetDb2))), - pause_writer(Writer), - wait_for_compaction("target", TargetDb2), - resume_writer(Writer) - end, lists:seq(1, Rounds)), - stop_writer(Writer) - end)}. - -should_wait_target_in_sync({remote, Source}, Target) -> - should_wait_target_in_sync(Source, Target); -should_wait_target_in_sync(Source, {remote, Target}) -> - should_wait_target_in_sync(Source, Target); -should_wait_target_in_sync(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_assert(begin - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, SourceInfo} = couch_db:get_db_info(SourceDb), - ok = couch_db:close(SourceDb), - SourceDocCount = couch_util:get_value(doc_count, SourceInfo), - wait_target_in_sync_loop(SourceDocCount, Target, 300) - end)}. - -wait_target_in_sync_loop(_DocCount, _TargetName, 0) -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, "Could not get source and target databases in sync"}]}); -wait_target_in_sync_loop(DocCount, {remote, TargetName}, RetriesLeft) -> - wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft); -wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> - {ok, Target} = couch_db:open_int(TargetName, []), - {ok, TargetInfo} = couch_db:get_db_info(Target), - ok = couch_db:close(Target), - TargetDocCount = couch_util:get_value(doc_count, TargetInfo), - case TargetDocCount == DocCount of - true -> - true; - false -> - ok = timer:sleep(?DELAY), - wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft - 1) - end. - -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, 35, ?_test(begin - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - Fun = fun(FullDocInfo, Acc) -> - {ok, Doc} = couch_db:open_doc(SourceDb, FullDocInfo), - {Props} = DocJson = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - DocTarget = case couch_db:open_doc(TargetDb, DocId) of - {ok, DocT} -> - DocT; - Error -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Error opening document '", - ?b2l(DocId), "' from target: ", - couch_util:to_list(Error)])}]}) - end, - DocTargetJson = couch_doc:to_json_obj(DocTarget, [attachments]), - ?assertEqual(DocJson, DocTargetJson), - {ok, Acc} - end, - {ok, _} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb) - end)}. - - -reopen_db({remote, Db}) -> - reopen_db(Db); -reopen_db(DbName) when is_binary(DbName) -> - {ok, Db} = couch_db:open_int(DbName, []), - ok = couch_db:close(Db), - {ok, Db}; -reopen_db(Db) -> - reopen_db(couch_db:name(Db)). - - -compact_db(Type, Db0) -> - Name = couch_db:name(Db0), - {ok, Db} = couch_db:open_int(Name, []), - {ok, CompactPid} = couch_db:start_compact(Db), - MonRef = erlang:monitor(process, CompactPid), - receive - {'DOWN', MonRef, process, CompactPid, normal} -> - ok; - {'DOWN', MonRef, process, CompactPid, noproc} -> - ok; - {'DOWN', MonRef, process, CompactPid, Reason} -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, - lists:concat(["Error compacting ", Type, " database ", - ?b2l(Name), ": ", - couch_util:to_list(Reason)])}]}) - after ?TIMEOUT -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Compaction for ", Type, " database ", - ?b2l(Name), " didn't finish"])}]}) - end, - ok = couch_db:close(Db). - -wait_for_compaction(Type, Db) -> - case couch_db:wait_for_compaction(Db) of - ok -> - ok; - {error, noproc} -> - ok; - {error, Reason} -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Compaction of ", Type, - " database failed with: ", Reason])}]}) - end. - -replicate({remote, Db}, Target) -> - replicate(db_url(Db), Target); - -replicate(Source, {remote, Db}) -> - replicate(Source, db_url(Db)); - -replicate(Source, Target) -> - RepObject = {[ - {<<"source">>, Source}, - {<<"target">>, Target}, - {<<"continuous">>, true} - ]}, - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - Pid = get_pid(Rep#rep.id), - {ok, Pid, Rep#rep.id}. - - -wait_writer(Pid, NumDocs) -> - case get_writer_num_docs_written(Pid) of - N when N >= NumDocs -> - ok; - _ -> - wait_writer(Pid, NumDocs) - end. - -spawn_writer(Db) -> - Parent = self(), - Pid = spawn(fun() -> writer_loop(Db, Parent, 0) end), - Pid. - - -pause_writer(Pid) -> - Ref = make_ref(), - Pid ! {pause, Ref}, - receive - {paused, Ref} -> - ok - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Failed to pause source database writer"}]}) - end. - -resume_writer(Pid) -> - Ref = make_ref(), - Pid ! {continue, Ref}, - receive - {ok, Ref} -> - ok - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Failed to pause source database writer"}]}) - end. - -get_writer_num_docs_written(Pid) -> - Ref = make_ref(), - Pid ! {get_count, Ref}, - receive - {count, Ref, Count} -> - Count - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Timeout getting number of documents written" - " from source database writer"}]}) - end. - -stop_writer(Pid) -> - Ref = make_ref(), - Pid ! {stop, Ref}, - receive - {stopped, Ref, DocsWritten} -> - MonRef = erlang:monitor(process, Pid), - receive - {'DOWN', MonRef, process, Pid, _Reason} -> - DocsWritten - after ?TIMEOUT -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Timeout stopping source database writer"}]}) - end - after ?TIMEOUT_WRITER -> - erlang:error({assertion_failed, - [{module, ?MODULE}, - {line, ?LINE}, - {reason, "Timeout stopping source database writer"}]}) - end. - -writer_loop(Db0, Parent, Counter) -> - DbName = couch_db:name(Db0), - {ok, Data} = file:read_file(?ATTFILE), - maybe_pause(Parent, Counter), - Docs = lists:map(fun(I) -> - couch_doc:from_json_obj({[ - {<<"_id">>, ?l2b(integer_to_list(Counter + I))}, - {<<"value">>, Counter + I}, - {<<"_attachments">>, {[ - {<<"icon1.png">>, {[ - {<<"data">>, base64:encode(Data)}, - {<<"content_type">>, <<"image/png">>} - ]}}, - {<<"icon2.png">>, {[ - {<<"data">>, base64:encode(iolist_to_binary([Data, Data]))}, - {<<"content_type">>, <<"image/png">>} - ]}} - ]}} - ]}) - end, lists:seq(1, ?WRITE_BATCH_SIZE)), - maybe_pause(Parent, Counter), - {ok, Db} = couch_db:open_int(DbName, []), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db), - receive - {get_count, Ref} -> - Parent ! {count, Ref, Counter + ?WRITE_BATCH_SIZE}, - writer_loop(Db, Parent, Counter + ?WRITE_BATCH_SIZE); - {stop, Ref} -> - Parent ! {stopped, Ref, Counter + ?WRITE_BATCH_SIZE} - after 0 -> - timer:sleep(?DELAY), - writer_loop(Db, Parent, Counter + ?WRITE_BATCH_SIZE) - end. - -maybe_pause(Parent, Counter) -> - receive - {get_count, Ref} -> - Parent ! {count, Ref, Counter}; - {pause, Ref} -> - Parent ! {paused, Ref}, - receive - {continue, Ref2} -> - Parent ! {ok, Ref2} - end - after 0 -> - ok - end. diff --git a/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl index e75cc5a63..df30db25d 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_connection_tests.erl @@ -12,187 +12,176 @@ -module(couch_replicator_connection_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). - --define(TIMEOUT, 1000). - - -setup() -> - Host = config:get("httpd", "bind_address", "127.0.0.1"), - Port = config:get("httpd", "port", "5984"), - {Host, Port}. - -teardown(_) -> - ok. +-include_lib("fabric/test/fabric2_test.hrl"). httpc_pool_test_() -> { - "replicator connection sharing tests", + "Replicator connection sharing tests", { setup, - fun() -> test_util:start_couch([couch_replicator]) end, fun test_util:stop_couch/1, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { foreach, - fun setup/0, fun teardown/1, + fun setup/0, + fun teardown/1, [ - fun connections_shared_after_release/1, - fun connections_not_shared_after_owner_death/1, - fun idle_connections_closed/1, - fun test_owner_monitors/1, - fun worker_discards_creds_on_create/1, - fun worker_discards_url_creds_after_request/1, - fun worker_discards_creds_in_headers_after_request/1, - fun worker_discards_proxy_creds_after_request/1 + ?TDEF_FE(connections_shared_after_release), + ?TDEF_FE(connections_not_shared_after_owner_death), + ?TDEF_FE(idle_connections_closed), + ?TDEF_FE(test_owner_monitors), + ?TDEF_FE(worker_discards_creds_on_create), + ?TDEF_FE(worker_discards_url_creds_after_request), + ?TDEF_FE(worker_discards_creds_in_headers_after_request), + ?TDEF_FE(worker_discards_proxy_creds_after_request) ] } } }. +setup() -> + Host = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = config:get("chttpd", "port", "5984"), + {Host, Port}. + + +teardown(_) -> + ok. + + connections_shared_after_release({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - Self = self(), - {ok, Pid} = couch_replicator_connection:acquire(URL), - couch_replicator_connection:release(Pid), - spawn(fun() -> - Self ! couch_replicator_connection:acquire(URL) - end), - receive - {ok, Pid2} -> - ?assertEqual(Pid, Pid2) - end - end). + URL = "http://" ++ Host ++ ":" ++ Port, + Self = self(), + {ok, Pid} = couch_replicator_connection:acquire(URL), + couch_replicator_connection:release(Pid), + spawn(fun() -> + Self ! couch_replicator_connection:acquire(URL) + end), + receive + {ok, Pid2} -> + ?assertEqual(Pid, Pid2) + end. connections_not_shared_after_owner_death({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - Self = self(), - spawn(fun() -> - Self ! couch_replicator_connection:acquire(URL), - error("simulate division by zero without compiler warning") - end), - receive - {ok, Pid} -> - {ok, Pid2} = couch_replicator_connection:acquire(URL), - ?assertNotEqual(Pid, Pid2), - MRef = monitor(process, Pid), - receive {'DOWN', MRef, process, Pid, _Reason} -> + URL = "http://" ++ Host ++ ":" ++ Port, + Self = self(), + spawn(fun() -> + Self ! couch_replicator_connection:acquire(URL), + error("simulate division by zero without compiler warning") + end), + receive + {ok, Pid} -> + {ok, Pid2} = couch_replicator_connection:acquire(URL), + ?assertNotEqual(Pid, Pid2), + MRef = monitor(process, Pid), + receive + {'DOWN', MRef, process, Pid, _Reason} -> ?assert(not is_process_alive(Pid)); - Other -> throw(Other) - end - end - end). + Other -> + throw(Other) + end + end. idle_connections_closed({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - {ok, Pid} = couch_replicator_connection:acquire(URL), - couch_replicator_connection ! close_idle_connections, - ?assert(ets:member(couch_replicator_connection, Pid)), - % block until idle connections have closed - sys:get_status(couch_replicator_connection), - couch_replicator_connection:release(Pid), - couch_replicator_connection ! close_idle_connections, - % block until idle connections have closed - sys:get_status(couch_replicator_connection), - ?assert(not ets:member(couch_replicator_connection, Pid)) - end). + URL = "http://" ++ Host ++ ":" ++ Port, + {ok, Pid} = couch_replicator_connection:acquire(URL), + couch_replicator_connection ! close_idle_connections, + ?assert(ets:member(couch_replicator_connection, Pid)), + % block until idle connections have closed + sys:get_status(couch_replicator_connection), + couch_replicator_connection:release(Pid), + couch_replicator_connection ! close_idle_connections, + % block until idle connections have closed + sys:get_status(couch_replicator_connection), + ?assert(not ets:member(couch_replicator_connection, Pid)). test_owner_monitors({Host, Port}) -> - ?_test(begin - URL = "http://" ++ Host ++ ":" ++ Port, - {ok, Worker0} = couch_replicator_connection:acquire(URL), - assert_monitors_equal([{process, self()}]), - couch_replicator_connection:release(Worker0), - assert_monitors_equal([]), - {Workers, Monitors} = lists:foldl(fun(_, {WAcc, MAcc}) -> - {ok, Worker1} = couch_replicator_connection:acquire(URL), - MAcc1 = [{process, self()} | MAcc], - assert_monitors_equal(MAcc1), - {[Worker1 | WAcc], MAcc1} - end, {[], []}, lists:seq(1,5)), - lists:foldl(fun(Worker2, Acc) -> - [_ | NewAcc] = Acc, - couch_replicator_connection:release(Worker2), - assert_monitors_equal(NewAcc), - NewAcc - end, Monitors, Workers) - end). + URL = "http://" ++ Host ++ ":" ++ Port, + {ok, Worker0} = couch_replicator_connection:acquire(URL), + assert_monitors_equal([{process, self()}]), + couch_replicator_connection:release(Worker0), + assert_monitors_equal([]), + {Workers, Monitors} = lists:foldl(fun(_, {WAcc, MAcc}) -> + {ok, Worker1} = couch_replicator_connection:acquire(URL), + MAcc1 = [{process, self()} | MAcc], + assert_monitors_equal(MAcc1), + {[Worker1 | WAcc], MAcc1} + end, {[], []}, lists:seq(1, 5)), + lists:foldl(fun(Worker2, Acc) -> + [_ | NewAcc] = Acc, + couch_replicator_connection:release(Worker2), + assert_monitors_equal(NewAcc), + NewAcc + end, Monitors, Workers). worker_discards_creds_on_create({Host, Port}) -> - ?_test(begin - {User, Pass, B64Auth} = user_pass(), - URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ Port, - {ok, WPid} = couch_replicator_connection:acquire(URL), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0) - end). + {User, Pass, B64Auth} = user_pass(), + URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ Port, + {ok, WPid} = couch_replicator_connection:acquire(URL), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0). worker_discards_url_creds_after_request({Host, _}) -> - ?_test(begin - {User, Pass, B64Auth} = user_pass(), - {Port, ServerPid} = server(), - PortStr = integer_to_list(Port), - URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ PortStr, - {ok, WPid} = couch_replicator_connection:acquire(URL), - ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], [])), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0), - couch_replicator_connection:release(WPid), - unlink(ServerPid), - exit(ServerPid, kill) - end). + {User, Pass, B64Auth} = user_pass(), + {Port, ServerPid} = server(), + PortStr = integer_to_list(Port), + URL = "http://" ++ User ++ ":" ++ Pass ++ "@" ++ Host ++ ":" ++ PortStr, + {ok, WPid} = couch_replicator_connection:acquire(URL), + ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], [])), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0), + couch_replicator_connection:release(WPid), + unlink(ServerPid), + exit(ServerPid, kill). worker_discards_creds_in_headers_after_request({Host, _}) -> - ?_test(begin - {_User, Pass, B64Auth} = user_pass(), - {Port, ServerPid} = server(), - PortStr = integer_to_list(Port), - URL = "http://" ++ Host ++ ":" ++ PortStr, - {ok, WPid} = couch_replicator_connection:acquire(URL), - Headers = [{"Authorization", "Basic " ++ B64Auth}], - ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, Headers, [])), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0), - couch_replicator_connection:release(WPid), - unlink(ServerPid), - exit(ServerPid, kill) - end). + {_User, Pass, B64Auth} = user_pass(), + {Port, ServerPid} = server(), + PortStr = integer_to_list(Port), + URL = "http://" ++ Host ++ ":" ++ PortStr, + {ok, WPid} = couch_replicator_connection:acquire(URL), + Headers = [{"Authorization", "Basic " ++ B64Auth}], + ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, Headers, [])), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0), + couch_replicator_connection:release(WPid), + unlink(ServerPid), + exit(ServerPid, kill). worker_discards_proxy_creds_after_request({Host, _}) -> - ?_test(begin - {User, Pass, B64Auth} = user_pass(), - {Port, ServerPid} = server(), - PortStr = integer_to_list(Port), - URL = "http://" ++ Host ++ ":" ++ PortStr, - {ok, WPid} = couch_replicator_connection:acquire(URL), - Opts = [ - {proxy_host, Host}, - {proxy_port, Port}, - {proxy_user, User}, - {proxy_pass, Pass} - ], - ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], Opts)), - Internals = worker_internals(WPid), - ?assert(string:str(Internals, B64Auth) =:= 0), - ?assert(string:str(Internals, Pass) =:= 0), - couch_replicator_connection:release(WPid), - unlink(ServerPid), - exit(ServerPid, kill) - end). + {User, Pass, B64Auth} = user_pass(), + {Port, ServerPid} = server(), + PortStr = integer_to_list(Port), + URL = "http://" ++ Host ++ ":" ++ PortStr, + {ok, WPid} = couch_replicator_connection:acquire(URL), + Opts = [ + {proxy_host, Host}, + {proxy_port, Port}, + {proxy_user, User}, + {proxy_pass, Pass} + ], + ?assertMatch({ok, "200", _, _}, send_req(WPid, URL, [], Opts)), + Internals = worker_internals(WPid), + ?assert(string:str(Internals, B64Auth) =:= 0), + ?assert(string:str(Internals, Pass) =:= 0), + couch_replicator_connection:release(WPid), + unlink(ServerPid), + exit(ServerPid, kill). send_req(WPid, URL, Headers, Opts) -> @@ -237,5 +226,6 @@ server_responder(LSock) -> assert_monitors_equal(ShouldBe) -> sys:get_status(couch_replicator_connection), - {monitors, Monitors} = process_info(whereis(couch_replicator_connection), monitors), + {monitors, Monitors} = process_info(whereis(couch_replicator_connection), + monitors), ?assertEqual(Monitors, ShouldBe). diff --git a/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl index 63310d39e..c957fc199 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_create_target_with_options_tests.erl @@ -12,132 +12,137 @@ -module(couch_replicator_create_target_with_options_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - --define(USERNAME, "rep_admin"). --define(PASSWORD, "secret"). - -setup() -> - Ctx = test_util:start_couch([fabric, mem3, couch_replicator, chttpd]), - Hashed = couch_passwords:hash_admin_password(?PASSWORD), - ok = config:set("admins", ?USERNAME, ?b2l(Hashed), _Persist=false), - Source = ?tempdb(), - Target = ?tempdb(), - {Ctx, {Source, Target}}. - - -teardown({Ctx, {_Source, _Target}}) -> - config:delete("admins", ?USERNAME), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). create_target_with_options_replication_test_() -> { "Create target with range partitions tests", { - foreach, - fun setup/0, fun teardown/1, - [ - fun should_create_target_with_q_4/1, - fun should_create_target_with_q_2_n_1/1, - fun should_create_target_with_default/1, - fun should_not_create_target_with_q_any/1 - ] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_create_target_with_q_4), + ?TDEF_FE(should_create_target_with_q_2_n_1), + ?TDEF_FE(should_create_target_with_default), + ?TDEF_FE(should_not_create_target_with_q_any) + ] + } } }. -should_create_target_with_q_4({_Ctx, {Source, Target}}) -> +setup() -> + Source = ?tempdb(), + Target = ?tempdb(), + {Source, Target}. + + +teardown({Source, Target}) -> + delete_db(Source), + delete_db(Target). + + +should_create_target_with_q_4({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, true}, {<<"create_target_params">>, {[{<<"q">>, <<"4">>}]}} ]}, create_db(Source), create_doc(Source), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), - {ok, TargetInfo} = fabric:get_db_info(Target), + TargetInfo = db_info(Target), {ClusterInfo} = couch_util:get_value(cluster, TargetInfo), delete_db(Source), delete_db(Target), - ?_assertEqual(4, couch_util:get_value(q, ClusterInfo)). + ?assertEqual(0, couch_util:get_value(q, ClusterInfo)). -should_create_target_with_q_2_n_1({_Ctx, {Source, Target}}) -> +should_create_target_with_q_2_n_1({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, true}, {<<"create_target_params">>, {[{<<"q">>, <<"2">>}, {<<"n">>, <<"1">>}]}} ]}, create_db(Source), create_doc(Source), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), - {ok, TargetInfo} = fabric:get_db_info(Target), + TargetInfo = db_info(Target), {ClusterInfo} = couch_util:get_value(cluster, TargetInfo), delete_db(Source), delete_db(Target), - [ - ?_assertEqual(2, couch_util:get_value(q, ClusterInfo)), - ?_assertEqual(1, couch_util:get_value(n, ClusterInfo)) - ]. + ?assertEqual(0, couch_util:get_value(q, ClusterInfo)), + ?assertEqual(0, couch_util:get_value(n, ClusterInfo)). -should_create_target_with_default({_Ctx, {Source, Target}}) -> +should_create_target_with_default({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, true} ]}, create_db(Source), create_doc(Source), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), - {ok, TargetInfo} = fabric:get_db_info(Target), + TargetInfo = db_info(Target), {ClusterInfo} = couch_util:get_value(cluster, TargetInfo), - Q = config:get("cluster", "q", "8"), delete_db(Source), delete_db(Target), - ?_assertEqual(list_to_integer(Q), couch_util:get_value(q, ClusterInfo)). + ?assertEqual(0, couch_util:get_value(q, ClusterInfo)). -should_not_create_target_with_q_any({_Ctx, {Source, Target}}) -> +should_not_create_target_with_q_any({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(Source)}, - {<<"target">>, db_url(Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"create_target">>, false}, {<<"create_target_params">>, {[{<<"q">>, <<"1">>}]}} ]}, create_db(Source), create_doc(Source), - {error, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - DbExist = is_list(catch mem3:shards(Target)), + {error, _} = couch_replicator_test_helper:replicate(RepObject), + Exists = try + fabric2_db:open(Target, [?ADMIN_CTX]), + ?assert(false) + catch + error:database_does_not_exist -> + database_does_not_exist + end, delete_db(Source), - ?_assertEqual(false, DbExist). + ?assertEqual(Exists, database_does_not_exist). create_doc(DbName) -> - Body = {[{<<"foo">>, <<"bar">>}]}, - NewDoc = #doc{body = Body}, - {ok, _} = fabric:update_doc(DbName, NewDoc, [?ADMIN_CTX]). + couch_replicator_test_helper:create_docs(DbName, [ + #{<<"_id">> => fabric2_util:uuid(), <<"foo">> => <<"bar">>} + ]). create_db(DbName) -> - ok = fabric:create_db(DbName, [?ADMIN_CTX]). + couch_replicator_test_helper:create_db(DbName). delete_db(DbName) -> - ok = fabric:delete_db(DbName, [?ADMIN_CTX]). + couch_replicator_test_helper:delete_db(DbName). -db_url(DbName) -> - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - ?l2b(io_lib:format("http://~s:~s@~s:~b/~s", [?USERNAME, ?PASSWORD, Addr, - Port, DbName])). +db_info(DbName) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Info} = fabric2_db:get_db_info(Db), + Info. diff --git a/src/couch_replicator/test/eunit/couch_replicator_db_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_db_tests.erl new file mode 100644 index 000000000..053441007 --- /dev/null +++ b/src/couch_replicator/test/eunit/couch_replicator_db_tests.erl @@ -0,0 +1,332 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_db_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +couch_replicator_db_test_() -> + { + "Replications are started from docs in _replicator dbs", + { + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(default_replicator_db_is_created), + ?TDEF_FE(continuous_replication_created_from_doc, 15), + ?TDEF_FE(normal_replication_created_from_doc, 15), + ?TDEF_FE(replicator_db_deleted, 15), + ?TDEF_FE(replicator_db_recreated, 15), + ?TDEF_FE(invalid_replication_docs), + ?TDEF_FE(duplicate_persistent_replication, 15), + ?TDEF_FE(duplicate_transient_replication, 30) + ] + } + } + }. + + +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_doc(Source, #{<<"_id">> => <<"doc1">>}), + Target = couch_replicator_test_helper:create_db(), + Name = ?tempdb(), + RepDb = couch_replicator_test_helper:create_db(<<Name/binary, + "/_replicator">>), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "create_replicator_db", "false", false), + config:set("couchdb", "enable_database_recovery", "false", false), + config:set("replicator", "min_backoff_penalty_sec", "1", false), + {Source, Target, RepDb}. + + +teardown({Source, Target, RepDb}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "create_replicator_db", false), + config:delete("couchdb", "enable_database_recovery", false), + config:delete("replicator", "min_backoff_penalty_sec", false), + + couch_replicator_test_helper:delete_db(RepDb), + couch_replicator_test_helper:delete_db(?REP_DB_NAME), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +default_replicator_db_is_created({_, _, _}) -> + config:set("replicator", "create_replicator_db", "true", false), + ?assertEqual(ignore, couch_replicator:ensure_rep_db_exists()), + ?assertMatch({ok, #{}}, fabric2_db:open(?REP_DB_NAME, [])). + + +continuous_replication_created_from_doc({Source, Target, RepDb}) -> + DocId = <<"rdoc1">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + + {Code, DocInfo} = scheduler_docs(RepDb, DocId), + ?assertEqual(200, Code), + ?assertMatch(#{ + <<"database">> := RepDb, + <<"doc_id">> := DocId + }, DocInfo), + + RepId = maps:get(<<"id">>, DocInfo), + + ?assertMatch([#{ + <<"database">> := RepDb, + <<"doc_id">> := DocId, + <<"id">> := RepId, + <<"state">> := <<"running">> + }], couch_replicator_test_helper:scheduler_jobs()), + + ?assertMatch({200, #{ + <<"database">> := RepDb, + <<"doc_id">> := DocId, + <<"id">> := RepId, + <<"state">> := <<"running">> + }}, scheduler_jobs(RepId)), + + delete_doc(RepDb, DocId), + wait_scheduler_docs_not_found(RepDb, DocId), + ?assertMatch({404, #{}}, scheduler_jobs(RepId)). + + +normal_replication_created_from_doc({Source, Target, RepDb}) -> + DocId = <<"rdoc2">>, + RDoc = rep_doc(Source, Target, DocId), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"completed">>), + + {Code, DocInfo} = scheduler_docs(RepDb, DocId), + ?assertEqual(200, Code), + ?assertMatch(#{ + <<"database">> := RepDb, + <<"doc_id">> := DocId, + <<"state">> := <<"completed">>, + <<"info">> := #{ + <<"docs_written">> := 1, + <<"docs_read">> := 1, + <<"missing_revisions_found">> := 1 + } + }, DocInfo), + + wait_doc_state(RepDb, DocId, <<"completed">>), + ?assertMatch(#{ + <<"_replication_state">> := <<"completed">>, + <<"_replication_stats">> := #{ + <<"docs_written">> := 1, + <<"docs_read">> := 1, + <<"missing_revisions_found">> := 1 + } + }, read_doc(RepDb, DocId)), + + delete_doc(RepDb, DocId), + wait_scheduler_docs_not_found(RepDb, DocId). + + +replicator_db_deleted({Source, Target, RepDb}) -> + DocId = <<"rdoc3">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + fabric2_db:delete(RepDb, [?ADMIN_CTX]), + wait_scheduler_docs_not_found(RepDb, DocId). + + +replicator_db_recreated({Source, Target, RepDb}) -> + DocId = <<"rdoc4">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + + config:set("couchdb", "enable_database_recovery", "true", false), + fabric2_db:delete(RepDb, [?ADMIN_CTX]), + wait_scheduler_docs_not_found(RepDb, DocId), + + Opts = [{start_key, RepDb}, {end_key, RepDb}], + {ok, [DbInfo]} = fabric2_db:list_deleted_dbs_info(Opts), + {_, Timestamp} = lists:keyfind(timestamp, 1, DbInfo), + ok = fabric2_db:undelete(RepDb, RepDb, Timestamp, [?ADMIN_CTX]), + wait_scheduler_docs_state(RepDb, DocId, <<"running">>), + + config:set("couchdb", "enable_database_recovery", "false", false), + fabric2_db:delete(RepDb, [?ADMIN_CTX]), + wait_scheduler_docs_not_found(RepDb, DocId). + + +invalid_replication_docs({_, _, RepDb}) -> + Docs = [ + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1000">> + }, + #{ + <<"_id">> => <<"1">>, + <<"target">> => <<"http://127.0.0.1:1001">> + }, + #{ + <<"_id">> => <<"1">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1002">>, + <<"target">> => <<"http://127.0.0.1:1003">>, + <<"create_target">> => <<"bad">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => #{<<"junk">> => 42}, + <<"target">> => <<"http://127.0.0.1:1004">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1005">>, + <<"target">> => <<"http://127.0.0.1:1006">>, + <<"selector">> => #{}, + <<"filter">> => <<"a/b">> + }, + #{ + <<"_id">> => <<"1">>, + <<"source">> => <<"http://127.0.0.1:1007">>, + <<"target">> => <<"https://127.0.0.1:1008">>, + <<"doc_ids">> => 42 + } + ], + lists:foreach(fun(Doc) -> + ?assertThrow({forbidden, _}, create_doc(RepDb, Doc)) + end, Docs). + + +duplicate_persistent_replication({Source, Target, RepDb}) -> + DocId1 = <<"rdoc5">>, + RDoc1 = rep_doc(Source, Target, DocId1, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc1), + wait_scheduler_docs_state(RepDb, DocId1, <<"running">>), + + DocId2 = <<"rdoc6">>, + RDoc2 = rep_doc(Source, Target, DocId2, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc2), + wait_scheduler_docs_state(RepDb, DocId2, <<"failed">>), + + delete_doc(RepDb, DocId1), + delete_doc(RepDb, DocId2), + + wait_scheduler_docs_not_found(RepDb, DocId1), + wait_scheduler_docs_not_found(RepDb, DocId2). + + +duplicate_transient_replication({Source, Target, RepDb}) -> + {ok, _Pid, RepId} = couch_replicator_test_helper:replicate_continuous( + Source, Target), + + DocId = <<"rdoc7">>, + RDoc = rep_doc(Source, Target, DocId, #{<<"continuous">> => true}), + create_doc(RepDb, RDoc), + wait_scheduler_docs_state(RepDb, DocId, <<"crashing">>), + + couch_replicator_test_helper:cancel(RepId), + wait_reschedule_docs_state(RepDb, DocId, <<"running">>), + + delete_doc(RepDb, DocId), + wait_scheduler_docs_not_found(RepDb, DocId). + + +scheduler_jobs(Id) -> + SUrl = couch_replicator_test_helper:server_url(), + Url = lists:flatten(io_lib:format("~s/_scheduler/jobs/~s", [SUrl, Id])), + {ok, Code, _, Body} = test_request:get(Url, []), + {Code, jiffy:decode(Body, [return_maps])}. + + +scheduler_docs(DbName, DocId) -> + SUrl = couch_replicator_test_helper:server_url(), + Fmt = "~s/_scheduler/docs/~s/~s", + Url = lists:flatten(io_lib:format(Fmt, [SUrl, DbName, DocId])), + {ok, Code, _, Body} = test_request:get(Url, []), + {Code, jiffy:decode(Body, [return_maps])}. + + +rep_doc(Source, Target, DocId) -> + rep_doc(Source, Target, DocId, #{}). + + +rep_doc(Source, Target, DocId, #{} = Extra) -> + maps:merge(#{ + <<"_id">> => DocId, + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target) + }, Extra). + + +create_doc(DbName, Doc) -> + couch_replicator_test_helper:create_docs(DbName, [Doc]). + + +delete_doc(DbName, DocId) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Doc} = fabric2_db:open_doc(Db, DocId), + Doc1 = Doc#doc{deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, Doc1, []). + + +read_doc(DbName, DocId) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Doc} = fabric2_db:open_doc(Db, DocId, [ejson_body]), + Body = Doc#doc.body, + couch_util:json_decode(couch_util:json_encode(Body), [return_maps]). + + +wait_scheduler_docs_state(DbName, DocId, State) -> + test_util:wait(fun() -> + case scheduler_docs(DbName, DocId) of + {200, #{<<"state">> := State} = Res} -> Res; + {_, _} -> wait + end + end, 10000, 250). + + +wait_scheduler_docs_not_found(DbName, DocId) -> + test_util:wait(fun() -> + case scheduler_docs(DbName, DocId) of + {404, _} -> ok; + {_, _} -> wait + end + end, 10000, 250). + + +wait_reschedule_docs_state(DbName, DocId, State) -> + test_util:wait(fun() -> + couch_replicator_job_server:reschedule(), + case scheduler_docs(DbName, DocId) of + {200, #{<<"state">> := State} = Res} -> Res; + {_, _} -> wait + end + end, 10000, 500). + + +wait_doc_state(DbName, DocId, State) -> + test_util:wait(fun() -> + case read_doc(DbName, DocId) of + #{<<"_replication_state">> := State} -> ok; + #{} -> wait + end + end, 10000, 250). diff --git a/src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl deleted file mode 100644 index 6b4f95c25..000000000 --- a/src/couch_replicator/test/eunit/couch_replicator_error_reporting_tests.erl +++ /dev/null @@ -1,271 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(couch_replicator_error_reporting_tests). - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/src/couch_replicator.hrl"). - - -setup_all() -> - test_util:start_couch([couch_replicator, chttpd, mem3, fabric]). - - -teardown_all(Ctx) -> - ok = test_util:stop_couch(Ctx). - - -setup() -> - meck:unload(), - Source = setup_db(), - Target = setup_db(), - {Source, Target}. - - -teardown({Source, Target}) -> - meck:unload(), - teardown_db(Source), - teardown_db(Target), - ok. - - -error_reporting_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - fun t_fail_bulk_docs/1, - fun t_fail_changes_reader/1, - fun t_fail_revs_diff/1, - fun t_fail_changes_queue/1, - fun t_fail_changes_manager/1, - fun t_fail_changes_reader_proc/1 - ] - } - }. - - -t_fail_bulk_docs({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - {ok, Listener} = rep_result_listener(RepId), - mock_fail_req("/_bulk_docs", {ok, "403", [], [<<"{\"x\":\"y\"}">>]}), - populate_db(Source, 6, 6), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({bulk_docs_failed, 403, {[{<<"x">>, <<"y">>}]}}, Result), - - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_reader({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - {ok, Listener} = rep_result_listener(RepId), - mock_fail_req("/_changes", {ok, "418", [], [<<"{\"x\":\"y\"}">>]}), - populate_db(Source, 6, 6), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_req_failed, 418, {[{<<"x">>, <<"y">>}]}}, Result), - - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_revs_diff({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - {ok, Listener} = rep_result_listener(RepId), - mock_fail_req("/_revs_diff", {ok, "407", [], [<<"{\"x\":\"y\"}">>]}), - populate_db(Source, 6, 6), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({revs_diff_failed, 407, {[{<<"x">>, <<"y">>}]}}, Result), - - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_queue({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - RepPid = couch_replicator_test_helper:get_pid(RepId), - State = sys:get_state(RepPid), - ChangesQueue = element(20, State), - ?assert(is_process_alive(ChangesQueue)), - - {ok, Listener} = rep_result_listener(RepId), - exit(ChangesQueue, boom), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_queue_died, boom}, Result), - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_manager({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - RepPid = couch_replicator_test_helper:get_pid(RepId), - State = sys:get_state(RepPid), - ChangesManager = element(21, State), - ?assert(is_process_alive(ChangesManager)), - - {ok, Listener} = rep_result_listener(RepId), - exit(ChangesManager, bam), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_manager_died, bam}, Result), - couch_replicator_notifier:stop(Listener) - end). - - -t_fail_changes_reader_proc({Source, Target}) -> - ?_test(begin - populate_db(Source, 1, 5), - {ok, RepId} = replicate(Source, Target), - wait_target_in_sync(Source, Target), - - RepPid = couch_replicator_test_helper:get_pid(RepId), - State = sys:get_state(RepPid), - ChangesReader = element(22, State), - ?assert(is_process_alive(ChangesReader)), - - {ok, Listener} = rep_result_listener(RepId), - exit(ChangesReader, kapow), - - {error, Result} = wait_rep_result(RepId), - ?assertEqual({changes_reader_died, kapow}, Result), - couch_replicator_notifier:stop(Listener) - end). - - -mock_fail_req(Path, Return) -> - meck:expect(ibrowse, send_req_direct, - fun(W, Url, Headers, Meth, Body, Opts, TOut) -> - Args = [W, Url, Headers, Meth, Body, Opts, TOut], - {ok, {_, _, _, _, UPath, _}} = http_uri:parse(Url), - case lists:suffix(Path, UPath) of - true -> Return; - false -> meck:passthrough(Args) - end - end). - - -rep_result_listener(RepId) -> - ReplyTo = self(), - {ok, _Listener} = couch_replicator_notifier:start_link( - fun({_, RepId2, _} = Ev) when RepId2 =:= RepId -> - ReplyTo ! Ev; - (_) -> - ok - end). - - -wait_rep_result(RepId) -> - receive - {finished, RepId, RepResult} -> {ok, RepResult}; - {error, RepId, Reason} -> {error, Reason} - end. - - - -setup_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -teardown_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - - -populate_db(DbName, Start, End) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Id = integer_to_binary(DocIdCounter), - Doc = #doc{id = Id, body = {[]}}, - [Doc | Acc] - end, - [], lists:seq(Start, End)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db). - - -wait_target_in_sync(Source, Target) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, SourceInfo} = couch_db:get_db_info(SourceDb), - ok = couch_db:close(SourceDb), - SourceDocCount = couch_util:get_value(doc_count, SourceInfo), - wait_target_in_sync_loop(SourceDocCount, Target, 300). - - -wait_target_in_sync_loop(_DocCount, _TargetName, 0) -> - erlang:error({assertion_failed, [ - {module, ?MODULE}, {line, ?LINE}, - {reason, "Could not get source and target databases in sync"} - ]}); - -wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> - {ok, Target} = couch_db:open_int(TargetName, []), - {ok, TargetInfo} = couch_db:get_db_info(Target), - ok = couch_db:close(Target), - TargetDocCount = couch_util:get_value(doc_count, TargetInfo), - case TargetDocCount == DocCount of - true -> - true; - false -> - ok = timer:sleep(500), - wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft - 1) - end. - - -replicate(Source, Target) -> - SrcUrl = couch_replicator_test_helper:db_url(Source), - TgtUrl = couch_replicator_test_helper:db_url(Target), - RepObject = {[ - {<<"source">>, SrcUrl}, - {<<"target">>, TgtUrl}, - {<<"continuous">>, true}, - {<<"worker_processes">>, 1}, - {<<"retries_per_request">>, 1}, - % Low connection timeout so _changes feed gets restarted quicker - {<<"connection_timeout">>, 3000} - ]}, - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - {ok, Rep#rep.id}. diff --git a/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl index 7ac9a4d71..4d72c84f2 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_filtered_tests.erl @@ -12,17 +12,20 @@ -module(couch_replicator_filtered_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --define(DDOC, {[ - {<<"_id">>, <<"_design/filter_ddoc">>}, - {<<"filters">>, {[ - {<<"testfilter">>, <<" +-define(DDOC_ID, <<"_design/filter_ddoc">>). +-define(DDOC, #{ + <<"_id">> => ?DDOC_ID, + <<"filters">> => #{ + <<"testfilter">> => <<" function(doc, req){if (doc.class == 'mammal') return true;} - ">>}, - {<<"queryfilter">>, <<" + ">>, + <<"queryfilter">> => <<" function(doc, req) { if (doc.class && req.query.starts) { return doc.class.indexOf(req.query.starts) === 0; @@ -31,99 +34,87 @@ return false; } } - ">>} - ]}}, - {<<"views">>, {[ - {<<"mammals">>, {[ - {<<"map">>, <<" + ">> + }, + <<"views">> => #{ + <<"mammals">> => #{ + <<"map">> => <<" function(doc) { if (doc.class == 'mammal') { emit(doc._id, null); } } - ">>} - ]}} - ]}} -]}). - -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_docs(Source), - Target = create_db(), - {Ctx, {Source, Target}}. + ">> + } + } +}). -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). filtered_replication_test_() -> - Pairs = [{remote, remote}], { - "Filtered replication tests", + "Replications with filters tests", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(filtered_replication_test), + ?TDEF_FE(query_filtered_replication_test), + ?TDEF_FE(view_filtered_replication_test), + ?TDEF_FE(replication_id_changes_if_filter_changes, 15) + ] + } } }. -query_filtered_replication_test_() -> - Pairs = [{remote, remote}], - { - "Filtered with query replication tests", - { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed_with_query/2} || Pair <- Pairs] - } - }. -view_filtered_replication_test_() -> - Pairs = [{remote, remote}], - { - "Filtered with a view replication tests", - { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed_with_view/2} || Pair <- Pairs] - } - }. +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_docs(Source), + Target = couch_replicator_test_helper:create_db(), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "interval_sec", "1", false), + {Source, Target}. + + +teardown({Source, Target}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "checkpoint_interval", false), + config:delete("replicator", "interval_sec", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> + +filtered_replication_test({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"filter">>, <<"filter_ddoc/testfilter">>} ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - %% FilteredFun is an Erlang version of following JS function - %% function(doc, req){if (doc.class == 'mammal') return true;} + {ok, _} = couch_replicator_test_helper:replicate(RepObject), FilterFun = fun(_DocId, {Props}) -> couch_util:get_value(<<"class">>, Props) == <<"mammal">> end, {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(1, proplists:get_value(doc_count, TargetDbInfo))}, - {"Target DB doesn't have deleted docs", - ?_assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo))}, - {"All the docs filtered as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. - -should_succeed_with_query({From, To}, {_Ctx, {Source, Target}}) -> + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo)), + ?assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + + +query_filtered_replication_test({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"filter">>, <<"filter_ddoc/queryfilter">>}, {<<"query_params">>, {[ {<<"starts">>, <<"a">>} ]}} ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), FilterFun = fun(_DocId, {Props}) -> case couch_util:get_value(<<"class">>, Props) of <<"a", _/binary>> -> true; @@ -131,109 +122,144 @@ should_succeed_with_query({From, To}, {_Ctx, {Source, Target}}) -> end end, {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(2, proplists:get_value(doc_count, TargetDbInfo))}, - {"Target DB doesn't have deleted docs", - ?_assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo))}, - {"All the docs filtered as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. - -should_succeed_with_view({From, To}, {_Ctx, {Source, Target}}) -> + ?assertEqual(2, proplists:get_value(doc_count, TargetDbInfo)), + ?assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + + +view_filtered_replication_test({Source, Target}) -> RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, + {<<"source">>, Source}, + {<<"target">>, Target}, {<<"filter">>, <<"_view">>}, {<<"query_params">>, {[ {<<"view">>, <<"filter_ddoc/mammals">>} ]}} ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), + {ok, _} = couch_replicator_test_helper:replicate(RepObject), FilterFun = fun(_DocId, {Props}) -> couch_util:get_value(<<"class">>, Props) == <<"mammal">> end, {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(1, proplists:get_value(doc_count, TargetDbInfo))}, - {"Target DB doesn't have deleted docs", - ?_assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo))}, - {"All the docs filtered as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo)), + ?assertEqual(0, proplists:get_value(doc_del_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + + +replication_id_changes_if_filter_changes({Source, Target}) -> + config:set("replicator", "checkpoint_interval", "500", false), + Rep = {[ + {<<"source">>, Source}, + {<<"target">>, Target}, + {<<"filter">>, <<"filter_ddoc/testfilter">>}, + {<<"continuous">>, true} + ]}, + {ok, _, RepId1} = couch_replicator_test_helper:replicate_continuous(Rep), + + wait_scheduler_docs_written(1), + + ?assertMatch([#{<<"id">> := RepId1}], + couch_replicator_test_helper:scheduler_jobs()), + + FilterFun1 = fun(_, {Props}) -> + couch_util:get_value(<<"class">>, Props) == <<"mammal">> + end, + {ok, TargetDbInfo1, AllReplies1} = compare_dbs(Source, Target, FilterFun1), + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo1)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies1)), + + {ok, SourceDb} = fabric2_db:open(Source, [?ADMIN_CTX]), + {ok, DDoc1} = fabric2_db:open_doc(SourceDb, ?DDOC_ID), + Flt = <<"function(doc, req) {if (doc.class == 'reptiles') return true};">>, + DDoc2 = DDoc1#doc{body = {[ + {<<"filters">>, {[ + {<<"testfilter">>, Flt} + ]}} + ]}}, + {ok, {_, _}} = fabric2_db:update_doc(SourceDb, DDoc2), + Info = wait_scheduler_repid_change(RepId1), + + RepId2 = maps:get(<<"id">>, Info), + ?assert(RepId1 =/= RepId2), + + wait_scheduler_docs_written(1), + + FilterFun2 = fun(_, {Props}) -> + Class = couch_util:get_value(<<"class">>, Props), + Class == <<"mammal">> orelse Class == <<"reptiles">> + end, + {ok, TargetDbInfo2, AllReplies2} = compare_dbs(Source, Target, FilterFun2), + ?assertEqual(2, proplists:get_value(doc_count, TargetDbInfo2)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies2)), + + couch_replicator_test_helper:cancel(RepId2). + compare_dbs(Source, Target, FilterFun) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - {ok, TargetDbInfo} = couch_db:get_db_info(TargetDb), - Fun = fun(FullDocInfo, Acc) -> - {ok, DocId, SourceDoc} = read_doc(SourceDb, FullDocInfo), - TargetReply = read_doc(TargetDb, DocId), - case FilterFun(DocId, SourceDoc) of - true -> - ValidReply = {ok, DocId, SourceDoc} == TargetReply, - {ok, [ValidReply|Acc]}; - false -> - ValidReply = {not_found, missing} == TargetReply, - {ok, [ValidReply|Acc]} + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + {ok, TargetDbInfo} = fabric2_db:get_db_info(TargetDb), + Fun = fun(SrcDoc, TgtDoc, Acc) -> + case FilterFun(SrcDoc#doc.id, SrcDoc#doc.body) of + true -> [SrcDoc == TgtDoc | Acc]; + false -> [not_found == TgtDoc | Acc] end end, - {ok, AllReplies} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb), - {ok, TargetDbInfo, AllReplies}. - -read_doc(Db, DocIdOrInfo) -> - case couch_db:open_doc(Db, DocIdOrInfo) of - {ok, Doc} -> - {Props} = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - {ok, DocId, {Props}}; - Error -> - Error - end. - -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. + Res = couch_replicator_test_helper:compare_fold(Source, Target, Fun, []), + {ok, TargetDbInfo, Res}. + create_docs(DbName) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), - DDoc = couch_doc:from_json_obj(?DDOC), - Doc1 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc1">>}, - {<<"class">>, <<"mammal">>}, - {<<"value">>, 1} - - ]}), - Doc2 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc2">>}, - {<<"class">>, <<"amphibians">>}, - {<<"value">>, 2} - - ]}), - Doc3 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc3">>}, - {<<"class">>, <<"reptiles">>}, - {<<"value">>, 3} - - ]}), - Doc4 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc4">>}, - {<<"class">>, <<"arthropods">>}, - {<<"value">>, 2} - - ]}), - {ok, _} = couch_db:update_docs(Db, [DDoc, Doc1, Doc2, Doc3, Doc4]), - couch_db:close(Db). - -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). + couch_replicator_test_helper:create_docs(DbName, [ + ?DDOC, + #{ + <<"_id">> => <<"doc1">>, + <<"class">> => <<"mammal">>, + <<"value">> => 1 + }, + #{ + <<"_id">> => <<"doc2">>, + <<"class">> => <<"amphibians">>, + <<"value">> => 2 + }, + #{ + <<"_id">> => <<"doc3">>, + <<"class">> => <<"reptiles">>, + <<"value">> => 3 + }, + #{ + <<"_id">> => <<"doc4">>, + <<"class">> => <<"arthropods">>, + <<"value">> => 2 + } + ]). + + +wait_scheduler_docs_written(DocsWritten) -> + test_util:wait(fun() -> + case couch_replicator_test_helper:scheduler_jobs() of + [] -> + wait; + [#{<<"info">> := null}] -> + wait; + [#{<<"info">> := Info}] -> + case Info of + #{<<"docs_written">> := DocsWritten} -> Info; + #{} -> wait + end + end + end, 10000, 250). + + +wait_scheduler_repid_change(OldRepId) -> + test_util:wait(fun() -> + case couch_replicator_test_helper:scheduler_jobs() of + [] -> + wait; + [#{<<"id">> := OldRepId}] -> + wait; + [#{<<"id">> := null}] -> + wait; + [#{<<"id">> := NewId} = Info] when is_binary(NewId) -> + Info + end + end, 10000, 250). diff --git a/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl index c4ad4e9b6..6c61446cc 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_httpc_pool_tests.erl @@ -12,17 +12,13 @@ -module(couch_replicator_httpc_pool_tests). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). - --define(TIMEOUT, 1000). +-include_lib("fabric/test/fabric2_test.hrl"). -setup() -> - spawn_pool(). - -teardown(Pool) -> - stop_pool(Pool). +-define(TIMEOUT, 1000). httpc_pool_test_() -> @@ -30,75 +26,81 @@ httpc_pool_test_() -> "httpc pool tests", { setup, - fun() -> test_util:start_couch([couch_replicator]) end, fun test_util:stop_couch/1, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { foreach, - fun setup/0, fun teardown/1, + fun setup/0, + fun teardown/1, [ - fun should_block_new_clients_when_full/1, - fun should_replace_worker_on_death/1 + ?TDEF_FE(should_block_new_clients_when_full), + ?TDEF_FE(should_replace_worker_on_death) ] } } }. +setup() -> + spawn_pool(). + + +teardown(Pool) -> + stop_pool(Pool). + + should_block_new_clients_when_full(Pool) -> - ?_test(begin - Client1 = spawn_client(Pool), - Client2 = spawn_client(Pool), - Client3 = spawn_client(Pool), + Client1 = spawn_client(Pool), + Client2 = spawn_client(Pool), + Client3 = spawn_client(Pool), + + ?assertEqual(ok, ping_client(Client1)), + ?assertEqual(ok, ping_client(Client2)), + ?assertEqual(ok, ping_client(Client3)), - ?assertEqual(ok, ping_client(Client1)), - ?assertEqual(ok, ping_client(Client2)), - ?assertEqual(ok, ping_client(Client3)), + Worker1 = get_client_worker(Client1, "1"), + Worker2 = get_client_worker(Client2, "2"), + Worker3 = get_client_worker(Client3, "3"), - Worker1 = get_client_worker(Client1, "1"), - Worker2 = get_client_worker(Client2, "2"), - Worker3 = get_client_worker(Client3, "3"), + ?assert(is_process_alive(Worker1)), + ?assert(is_process_alive(Worker2)), + ?assert(is_process_alive(Worker3)), - ?assert(is_process_alive(Worker1)), - ?assert(is_process_alive(Worker2)), - ?assert(is_process_alive(Worker3)), + ?assertNotEqual(Worker1, Worker2), + ?assertNotEqual(Worker2, Worker3), + ?assertNotEqual(Worker3, Worker1), - ?assertNotEqual(Worker1, Worker2), - ?assertNotEqual(Worker2, Worker3), - ?assertNotEqual(Worker3, Worker1), + Client4 = spawn_client(Pool), + ?assertEqual(timeout, ping_client(Client4)), - Client4 = spawn_client(Pool), - ?assertEqual(timeout, ping_client(Client4)), + ?assertEqual(ok, stop_client(Client1)), + ?assertEqual(ok, ping_client(Client4)), - ?assertEqual(ok, stop_client(Client1)), - ?assertEqual(ok, ping_client(Client4)), + Worker4 = get_client_worker(Client4, "4"), + ?assertEqual(Worker1, Worker4), - Worker4 = get_client_worker(Client4, "4"), - ?assertEqual(Worker1, Worker4), + lists:foreach(fun(C) -> + ?assertEqual(ok, stop_client(C)) + end, [Client2, Client3, Client4]). - lists:foreach( - fun(C) -> - ?assertEqual(ok, stop_client(C)) - end, [Client2, Client3, Client4]) - end). should_replace_worker_on_death(Pool) -> - ?_test(begin - Client1 = spawn_client(Pool), - ?assertEqual(ok, ping_client(Client1)), - Worker1 = get_client_worker(Client1, "1"), - ?assert(is_process_alive(Worker1)), + Client1 = spawn_client(Pool), + ?assertEqual(ok, ping_client(Client1)), + Worker1 = get_client_worker(Client1, "1"), + ?assert(is_process_alive(Worker1)), - ?assertEqual(ok, kill_client_worker(Client1)), - ?assertNot(is_process_alive(Worker1)), - ?assertEqual(ok, stop_client(Client1)), + ?assertEqual(ok, kill_client_worker(Client1)), + ?assertNot(is_process_alive(Worker1)), + ?assertEqual(ok, stop_client(Client1)), - Client2 = spawn_client(Pool), - ?assertEqual(ok, ping_client(Client2)), - Worker2 = get_client_worker(Client2, "2"), - ?assert(is_process_alive(Worker2)), + Client2 = spawn_client(Pool), + ?assertEqual(ok, ping_client(Client2)), + Worker2 = get_client_worker(Client2, "2"), + ?assert(is_process_alive(Worker2)), - ?assertNotEqual(Worker1, Worker2), - ?assertEqual(ok, stop_client(Client2)) - end). + ?assertNotEqual(Worker1, Worker2), + ?assertEqual(ok, stop_client(Client2)). spawn_client(Pool) -> @@ -110,6 +112,7 @@ spawn_client(Pool) -> end), {Pid, Ref}. + ping_client({Pid, Ref}) -> Pid ! ping, receive @@ -119,18 +122,18 @@ ping_client({Pid, Ref}) -> timeout end. + get_client_worker({Pid, Ref}, ClientName) -> Pid ! get_worker, receive {worker, Ref, Worker} -> Worker after ?TIMEOUT -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, "Timeout getting client " ++ ClientName ++ " worker"}]}) + erlang:error({assertion_failed, [{module, ?MODULE}, {line, ?LINE}, + {reason, "Timeout getting client " ++ ClientName ++ " worker"}]}) end. + stop_client({Pid, Ref}) -> Pid ! stop, receive @@ -140,6 +143,7 @@ stop_client({Pid, Ref}) -> timeout end. + kill_client_worker({Pid, Ref}) -> Pid ! get_worker, receive @@ -150,6 +154,7 @@ kill_client_worker({Pid, Ref}) -> timeout end. + loop(Parent, Ref, Worker, Pool) -> receive ping -> @@ -163,12 +168,14 @@ loop(Parent, Ref, Worker, Pool) -> Parent ! {stop, Ref} end. + spawn_pool() -> - Host = config:get("httpd", "bind_address", "127.0.0.1"), - Port = config:get("httpd", "port", "5984"), + Host = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = config:get("chttpd", "port", "5984"), {ok, Pool} = couch_replicator_httpc_pool:start_link( "http://" ++ Host ++ ":" ++ Port, [{max_connections, 3}]), Pool. + stop_pool(Pool) -> ok = couch_replicator_httpc_pool:stop(Pool). diff --git a/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl index a4696c4b8..3a0e6f7bd 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_id_too_long_tests.erl @@ -15,76 +15,57 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - - -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_doc(Source), - Target = create_db(), - {Ctx, {Source, Target}}. - - -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - config:set("replicator", "max_document_id_length", "infinity"), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). id_too_long_replication_test_() -> - Pairs = [{remote, remote}], { "Doc id too long tests", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] ++ - [{Pair, fun should_fail/2} || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_succeed), + ?TDEF_FE(should_fail) + + ] + } } }. -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("replicator", "max_document_id_length", "5"), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). - +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_doc(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -should_fail({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)} - ]}, - config:set("replicator", "max_document_id_length", "4"), - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - ?_assertError({badmatch, {not_found, missing}}, - couch_replicator_test_helper:compare_dbs(Source, Target)). +teardown({Source, Target}) -> + config:delete("replicator", "max_document_id_length", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. +should_succeed({Source, Target}) -> + config:set("replicator", "max_document_id_length", "5", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). -create_doc(DbName) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), - Doc = couch_doc:from_json_obj({[{<<"_id">>, <<"12345">>}]}), - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db). +should_fail({Source, Target}) -> + config:set("replicator", "max_document_id_length", "4", false), + {ok, _} = couch_replicator_test_helper:replicate(Source, Target), + ExceptIds = [<<"12345">>], + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target, + ExceptIds)). -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). - -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). +create_doc(DbName) -> + Docs = [#{<<"_id">> => <<"12345">>}], + couch_replicator_test_helper:create_docs(DbName, Docs). diff --git a/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl new file mode 100644 index 000000000..921f29fed --- /dev/null +++ b/src/couch_replicator/test/eunit/couch_replicator_job_server_tests.erl @@ -0,0 +1,449 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_job_server_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(SHUTDOWN_TIMEOUT, 1000). +-define(JOB_SERVER, couch_replicator_job_server). + + +job_server_test_() -> + { + "Test job server", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_start_up), + ?TDEF_FE(reschedule_resets_timer), + ?TDEF_FE(reschedule_reads_config), + ?TDEF_FE(acceptors_spawned_if_pending), + ?TDEF_FE(acceptors_not_spawned_if_no_pending), + ?TDEF_FE(acceptors_not_spawned_if_no_max_churn), + ?TDEF_FE(acceptors_not_spawned_if_no_churn_budget), + ?TDEF_FE(acceptors_spawned_on_acceptor_exit), + ?TDEF_FE(acceptor_turns_into_worker), + ?TDEF_FE(acceptors_spawned_on_worker_exit), + ?TDEF_FE(excess_acceptors_spawned), + ?TDEF_FE(excess_workers_trimmed_on_reschedule), + ?TDEF_FE(recent_workers_are_not_stopped) + ] + } + } + }. + + +setup_all() -> + Ctx = test_util:start_couch(), + meck:new(couch_replicator_job_server, [passthrough]), + mock_pending(0), + meck:expect(couch_replicator_jobs, set_timeout, 0, ok), + meck:expect(couch_replicator_jobs, fold_jobs, 3, ok), + meck:expect(couch_replicator_job, start_link, fun() -> + {ok, spawn_link(fun() -> start_job() end)} + end), + Ctx. + + +teardown_all(Ctx) -> + meck:unload(), + config_delete("interval_sec"), + config_delete("max_acceptors"), + config_delete("max_jobs"), + config_delete("max_churn"), + config_delete("min_run_time_sec"), + config_delete("transient_job_max_age_sec"), + test_util:stop_couch(Ctx). + + +setup() -> + config_set("interval_sec", "99999"), + config_set("max_acceptors", "0"), + config_set("max_jobs", "0"), + config_set("max_churn", "1"), + config_set("min_run_time_sec", "0"), + config_set("transient_job_max_age_sec", "99999"), + + mock_pending(0), + + {ok, SPid} = ?JOB_SERVER:start_link(?SHUTDOWN_TIMEOUT), + SPid. + + +teardown(SPid) when is_pid(SPid) -> + unlink(SPid), + Ref = monitor(process, SPid), + exit(SPid, kill), + receive {'DOWN', Ref, _, _, _} -> ok end, + + meck:reset(couch_replicator_jobs), + meck:reset(couch_replicator_job), + meck:reset(couch_replicator_job_server), + + config_delete("interval_sec"), + config_delete("max_acceptors"), + config_delete("max_jobs"), + config_delete("max_churn"), + config_delete("min_run_time_sec"), + config_delete("transient_job_max_age_sec"). + + +should_start_up(SPid) -> + ?assert(is_process_alive(SPid)), + ?assertEqual(SPid, whereis(?JOB_SERVER)), + State = sys:get_state(?JOB_SERVER), + #{ + acceptors := #{}, + workers := #{}, + churn := 0, + config := Config, + timer := Timer, + timeout := ?SHUTDOWN_TIMEOUT + } = State, + + % Make sure it read the config + ?assertMatch(#{ + max_acceptors := 0, + interval_sec := 99999, + max_jobs := 0, + max_churn := 1, + min_run_time_sec := 0, + transient_job_max_age_sec := 99999 + }, Config), + + % Timer was set up + ?assert(is_reference(Timer)), + ?assert(is_integer(erlang:read_timer(Timer))). + + +reschedule_resets_timer(_) -> + #{timer := OldTimer} = sys:get_state(?JOB_SERVER), + + ?assertEqual(ok, ?JOB_SERVER:reschedule()), + + #{timer := Timer} = sys:get_state(?JOB_SERVER), + ?assert(is_reference(Timer)), + ?assert(Timer =/= OldTimer). + + +reschedule_reads_config(_) -> + config_set("interval_sec", "99998"), + + ?JOB_SERVER:reschedule(), + + #{config := Config} = sys:get_state(?JOB_SERVER), + ?assertMatch(#{interval_sec := 99998}, Config). + + +acceptors_spawned_if_pending(_) -> + config_set("max_acceptors", "1"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + ?assertMatch([Pid] when is_pid(Pid), acceptors()). + + +acceptors_not_spawned_if_no_pending(_) -> + config_set("max_acceptors", "1"), + mock_pending(0), + + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()). + + +acceptors_not_spawned_if_no_max_churn(_) -> + config_set("max_churn", "0"), + config_set("max_acceptors", "1"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()). + + +acceptors_not_spawned_if_no_churn_budget(_) -> + config_set("max_churn", "1"), + config_set("max_acceptors", "1"), + mock_pending(0), + + % To read the config + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()), + + mock_pending(1), + + % Exhaust churn budget + sys:replace_state(couch_replicator_job_server, fun(#{} = St) -> + St#{churn := 1} + end), + + ?JOB_SERVER:reschedule(), + + ?assertEqual([], acceptors()). + + +acceptors_spawned_on_acceptor_exit(_) -> + config_set("max_acceptors", "3"), + config_set("max_jobs", "4"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + [A1] = acceptors(), + + exit(A1, kill), + wait_job_exit(A1, killed), + + ?assertEqual(3, length(acceptors())). + + +acceptor_turns_into_worker(_) -> + config_set("max_acceptors", "3"), + config_set("max_jobs", "4"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + [A1] = acceptors(), + accept_job(A1, true), + ?assertEqual(3, length(acceptors())), + #{workers := Workers} = sys:get_state(?JOB_SERVER), + ?assertMatch([{A1, {true, _}}], maps:to_list(Workers)). + + +acceptors_spawned_on_worker_exit(_) -> + config_set("max_acceptors", "1"), + config_set("max_jobs", "1"), + mock_pending(1), + + ?JOB_SERVER:reschedule(), + + [A1] = acceptors(), + accept_job(A1, true), + + % Since max_jobs = 1 no more acceptors are spawned + ?assertEqual(0, length(acceptors())), + + % Same acceptor process is now a worker + ?assertEqual([A1], workers()), + + meck:reset(couch_replicator_job_server), + exit(A1, shutdown), + wait_job_exit(A1, shutdown), + + % New acceptor process started + ?assertEqual(1, length(acceptors())), + ?assertEqual(0, length(workers())). + + +excess_acceptors_spawned(_) -> + config_set("max_acceptors", "2"), + config_set("max_churn", "3"), + config_set("max_jobs", "4"), + mock_pending(100), + + ?JOB_SERVER:reschedule(), + + ?assertEqual(3, length(acceptors())), + + accept_all(), + + ?assertEqual(3, length(workers())), + ?assertEqual(1, length(acceptors())), + % Check that the churn budget was consumed + ?assertMatch(#{churn := 3}, sys:get_state(?JOB_SERVER)), + + accept_all(), + + % No more acceptors spawned after reaching max_jobs + ?assertEqual(0, length(acceptors())), + ?assertEqual(4, length(workers())), + + ?JOB_SERVER:reschedule(), + + % Since all churn budget was consumed, no new acceptors should have beens + % spawned this cycle but churn budget should have been reset + ?assertEqual(0, length(acceptors())), + ?assertEqual(4, length(workers())), + ?assertMatch(#{churn := 0}, sys:get_state(?JOB_SERVER)), + + ?JOB_SERVER:reschedule(), + + % Should have spawned 3 excess acceptors + ?assertEqual(3, length(acceptors())), + ?assertEqual(4, length(workers())), + + accept_all(), + + % Running with an excess number of workers + ?assertEqual(0, length(acceptors())), + ?assertEqual(7, length(workers())). + + +excess_workers_trimmed_on_reschedule(_) -> + config_set("max_acceptors", "2"), + config_set("max_churn", "3"), + config_set("max_jobs", "4"), + mock_pending(100), + + ?JOB_SERVER:reschedule(), + + [A1, A2, A3] = acceptors(), + accept_job(A1, true), + accept_job(A2, false), + accept_job(A3, false), + [A4] = acceptors(), + accept_job(A4, true), + + ?JOB_SERVER:reschedule(), + + % First reschedule was to reset the churn budget, this next one is to spawn + % an excess number of acceptors. + ?JOB_SERVER:reschedule(), + + [A5, A6, A7] = acceptors(), + accept_job(A5, true), + accept_job(A6, false), + accept_job(A7, false), + + ?assertEqual(7, length(workers())), + + % Running with an excess number of workers. These should be trimmed on the + % during the next cycle + meck:reset(couch_replicator_job_server), + ?JOB_SERVER:reschedule(), + wait_jobs_exit([A2, A3, A6], shutdown), + + Workers = workers(), + ?assertEqual(4, length(Workers)), + ?assertEqual(0, length(acceptors())), + + % Check that A1 and A4 were skipped since they are not continuous + ?assertEqual(Workers, Workers -- [A2, A3, A6]). + + +recent_workers_are_not_stopped(_) -> + config_set("max_acceptors", "2"), + config_set("max_churn", "3"), + config_set("max_jobs", "4"), + mock_pending(100), + + ?JOB_SERVER:reschedule(), + + [A1, A2, A3] = acceptors(), + accept_job(A1, true), + accept_job(A2, false), + accept_job(A3, false), + [A4] = acceptors(), + accept_job(A4, true), + + ?JOB_SERVER:reschedule(), + + % First reschedule was to reset the churn budget, this next one is to spawn + % an excess number of acceptors. + ?JOB_SERVER:reschedule(), + + [A5, A6, A7] = acceptors(), + accept_job(A5, true), + accept_job(A6, false), + accept_job(A7, false), + + ?assertEqual(7, length(workers())), + + % Running with an excess number of workers. But they won't be stopped on + % reschedule if they ran for a period less than min_run_time_sec during the + % next cycle + config_set("min_run_time_sec", "9999"), + + % don't want to start new acceptors anymore + mock_pending(0), + config_set("max_acceptors", "0"), + + ?JOB_SERVER:reschedule(), + + ?assertEqual(7, length(workers())), + ?assertEqual(0, length(acceptors())), + + config_set("min_run_time_sec", "0"), + + meck:reset(couch_replicator_job_server), + ?JOB_SERVER:reschedule(), + wait_jobs_exit([A2, A3, A6], shutdown), + + ?assertEqual(4, length(workers())), + ?assertEqual(0, length(acceptors())). + + +config_set(K, V) -> + config:set("replicator", K, V, _Persist = false). + + +config_delete(K) -> + config:delete("replicator", K, _Persist = false). + + +mock_pending(N) -> + meck:expect(couch_replicator_jobs, pending_count, 2, N). + + +acceptors() -> + #{acceptors := Acceptors} = sys:get_state(?JOB_SERVER), + maps:keys(Acceptors). + + +workers() -> + #{workers := Workers} = sys:get_state(?JOB_SERVER), + maps:keys(Workers). + + +accept_job(APid, Normal) -> + APid ! {accept_job, Normal, self()}, + receive + {job_accepted, APid} -> ok + after + 5000 -> + error(test_job_accept_timeout) + end. + + +accept_all() -> + [accept_job(APid, true) || APid <- acceptors()]. + + +start_job() -> + receive + {accept_job, Normal, From} -> + ok = ?JOB_SERVER:accepted(self(), Normal), + From ! {job_accepted, self()}, + start_job() + end. + + +wait_jobs_exit(PidList, Signal) when is_list(PidList) -> + [wait_job_exit(Pid, Signal) || Pid <- PidList], + ok. + + +wait_job_exit(Pid, Signal) when is_pid(Pid) -> + meck:wait(?JOB_SERVER, handle_info, [{'EXIT', Pid, Signal}, '_'], 2000). diff --git a/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl index 27c89a0cd..fcbdf229f 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_large_atts_tests.erl @@ -14,12 +14,8 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/2, - compare_dbs/2 -]). -define(ATT_SIZE_1, 2 * 1024 * 1024). -define(ATT_SIZE_2, round(6.6 * 1024 * 1024)). @@ -27,90 +23,65 @@ -define(TIMEOUT_EUNIT, 120). -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - config:set("attachments", "compressible_types", "text/*", false), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - large_atts_test_() -> - Pairs = [{remote, remote}], { - "Replicate docs with large attachments", + "Large attachment replication test", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_populate_replicate_compact/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_attachments, 120) + ] + } } }. -should_populate_replicate_compact({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target)]}}. +setup() -> + AttCfg = config:get("attachments", "compressible_types"), + config:set("attachments", "compressible_types", "text/*", false), + Source = couch_replicator_test_helper:create_db(), + ok = populate_db(Source, ?DOCS_COUNT), + Target = couch_replicator_test_helper:create_db(), + {AttCfg, Source, Target}. + -should_populate_source({remote, Source}) -> - should_populate_source(Source); -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source, ?DOCS_COUNT))}. +teardown({AttCfg, Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target), + case AttCfg of + undefined -> + config:delete("attachments", "compressible_types", false); + _ -> + config:set("attachments", "compressible_types", AttCfg) + end. -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target))}. +should_replicate_attachments({_AttCfg, Source, Target}) -> + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(Source, Target)), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). populate_db(DbName, DocCount) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Doc = #doc{ - id = iolist_to_binary(["doc", integer_to_list(DocIdCounter)]), - body = {[]}, - atts = [ - att(<<"att1">>, ?ATT_SIZE_1, <<"text/plain">>), - att(<<"att2">>, ?ATT_SIZE_2, <<"app/binary">>) - ] - }, - [Doc | Acc] - end, - [], lists:seq(1, DocCount)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - couch_db:close(Db). + Docs = lists:foldl(fun(DocIdCounter, Acc) -> + Doc = #doc{ + id = iolist_to_binary(["doc", integer_to_list(DocIdCounter)]), + body = {[]}, + atts = [ + att(<<"att1">>, ?ATT_SIZE_1, <<"text/plain">>), + att(<<"att2">>, ?ATT_SIZE_2, <<"app/binary">>) + ] + }, + [Doc | Acc] + end, [], lists:seq(1, DocCount)), + couch_replicator_test_helper:create_docs(DbName, Docs). + att(Name, Size, Type) -> couch_att:new([ diff --git a/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl index c7933b472..3dbfa6aba 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_many_leaves_tests.erl @@ -14,11 +14,8 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/2 -]). -define(DOCS_CONFLICTS, [ {<<"doc1">>, 10}, @@ -28,178 +25,150 @@ {<<"doc3">>, 210} ]). -define(NUM_ATTS, 2). --define(TIMEOUT_EUNIT, 60). -define(i2l(I), integer_to_list(I)). -define(io2b(Io), iolist_to_binary(Io)). -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). docs_with_many_leaves_test_() -> - Pairs = [{remote, remote}], { "Replicate documents with many leaves", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_populate_replicate_compact/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_doc_with_many_leaves, 180) + ] + } } }. -should_populate_replicate_compact({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_populate_source(Source), - should_replicate(Source, Target), - should_verify_target(Source, Target), - should_add_attachments_to_source(Source), - should_replicate(Source, Target), - should_verify_target(Source, Target) - ]}}. - -should_populate_source({remote, Source}) -> - should_populate_source(Source); -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source))}. - -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. - -should_verify_target({remote, Source}, Target) -> - should_verify_target(Source, Target); -should_verify_target(Source, {remote, Target}) -> - should_verify_target(Source, Target); -should_verify_target(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(begin - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - verify_target(SourceDb, TargetDb, ?DOCS_CONFLICTS), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb) - end)}. - -should_add_attachments_to_source({remote, Source}) -> - should_add_attachments_to_source(Source); -should_add_attachments_to_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(begin - {ok, SourceDb} = couch_db:open_int(Source, [?ADMIN_CTX]), - add_attachments(SourceDb, ?NUM_ATTS, ?DOCS_CONFLICTS), - ok = couch_db:close(SourceDb) - end)}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + populate_db(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. + + +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +should_replicate_doc_with_many_leaves({Source, Target}) -> + replicate(Source, Target), + {ok, SourceDb} = fabric2_db:open(Source, [?ADMIN_CTX]), + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + verify_target(SourceDb, TargetDb, ?DOCS_CONFLICTS), + add_attachments(SourceDb, ?NUM_ATTS, ?DOCS_CONFLICTS), + replicate(Source, Target), + verify_target(SourceDb, TargetDb, ?DOCS_CONFLICTS). + populate_db(DbName) -> - {ok, Db} = couch_db:open_int(DbName, [?ADMIN_CTX]), - lists:foreach( - fun({DocId, NumConflicts}) -> - Value = <<"0">>, - Doc = #doc{ - id = DocId, - body = {[ {<<"value">>, Value} ]} - }, - {ok, _} = couch_db:update_doc(Db, Doc, [?ADMIN_CTX]), - {ok, _} = add_doc_siblings(Db, DocId, NumConflicts) - end, ?DOCS_CONFLICTS), - couch_db:close(Db). - -add_doc_siblings(Db, DocId, NumLeaves) when NumLeaves > 0 -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + lists:foreach(fun({DocId, NumConflicts}) -> + Doc = #doc{ + id = DocId, + body = {[{<<"value">>, <<"0">>}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc), + {ok, _} = add_doc_siblings(Db, DocId, NumConflicts) + end, ?DOCS_CONFLICTS). + + +add_doc_siblings(#{} = Db, DocId, NumLeaves) when NumLeaves > 0 -> add_doc_siblings(Db, DocId, NumLeaves, [], []). -add_doc_siblings(Db, _DocId, 0, AccDocs, AccRevs) -> - {ok, []} = couch_db:update_docs(Db, AccDocs, [], replicated_changes), + +add_doc_siblings(#{} = Db, _DocId, 0, AccDocs, AccRevs) -> + {ok, []} = fabric2_db:update_docs(Db, AccDocs, [replicated_changes]), {ok, AccRevs}; -add_doc_siblings(Db, DocId, NumLeaves, AccDocs, AccRevs) -> +add_doc_siblings(#{} = Db, DocId, NumLeaves, AccDocs, AccRevs) -> Value = ?l2b(?i2l(NumLeaves)), Rev = couch_hash:md5_hash(Value), Doc = #doc{ id = DocId, revs = {1, [Rev]}, - body = {[ {<<"value">>, Value} ]} + body = {[{<<"value">>, Value}]} }, add_doc_siblings(Db, DocId, NumLeaves - 1, - [Doc | AccDocs], [{1, Rev} | AccRevs]). + [Doc | AccDocs], [{1, Rev} | AccRevs]). + verify_target(_SourceDb, _TargetDb, []) -> ok; -verify_target(SourceDb, TargetDb, [{DocId, NumConflicts} | Rest]) -> - {ok, SourceLookups} = couch_db:open_doc_revs( - SourceDb, - DocId, - all, - [conflicts, deleted_conflicts]), - {ok, TargetLookups} = couch_db:open_doc_revs( - TargetDb, - DocId, - all, - [conflicts, deleted_conflicts]), + +verify_target(#{} = SourceDb, #{} = TargetDb, + [{DocId, NumConflicts} | Rest]) -> + Opts = [conflicts, deleted_conflicts], + {ok, SourceLookups} = open_doc_revs(SourceDb, DocId, Opts), + {ok, TargetLookups} = open_doc_revs(TargetDb, DocId, Opts), SourceDocs = [Doc || {ok, Doc} <- SourceLookups], TargetDocs = [Doc || {ok, Doc} <- TargetLookups], Total = NumConflicts + 1, ?assertEqual(Total, length(TargetDocs)), - lists:foreach( - fun({SourceDoc, TargetDoc}) -> - SourceJson = couch_doc:to_json_obj(SourceDoc, [attachments]), - TargetJson = couch_doc:to_json_obj(TargetDoc, [attachments]), - ?assertEqual(SourceJson, TargetJson) - end, - lists:zip(SourceDocs, TargetDocs)), + lists:foreach(fun({SourceDoc, TargetDoc}) -> + ?assertEqual(json_doc(SourceDoc), json_doc(TargetDoc)) + end, lists:zip(SourceDocs, TargetDocs)), verify_target(SourceDb, TargetDb, Rest). -add_attachments(_SourceDb, _NumAtts, []) -> + +add_attachments(_SourceDb, _NumAtts, []) -> ok; -add_attachments(SourceDb, NumAtts, [{DocId, NumConflicts} | Rest]) -> - {ok, SourceLookups} = couch_db:open_doc_revs(SourceDb, DocId, all, []), + +add_attachments(#{} = SourceDb, NumAtts, + [{DocId, NumConflicts} | Rest]) -> + {ok, SourceLookups} = open_doc_revs(SourceDb, DocId, []), SourceDocs = [Doc || {ok, Doc} <- SourceLookups], Total = NumConflicts + 1, ?assertEqual(Total, length(SourceDocs)), - NewDocs = lists:foldl( - fun(#doc{atts = Atts, revs = {Pos, [Rev | _]}} = Doc, Acc) -> + NewDocs = lists:foldl(fun + (#doc{atts = Atts, revs = {Pos, [Rev | _]}} = Doc, Acc) -> NewAtts = lists:foldl(fun(I, AttAcc) -> - AttData = crypto:strong_rand_bytes(100), - NewAtt = couch_att:new([ - {name, ?io2b(["att_", ?i2l(I), "_", - couch_doc:rev_to_str({Pos, Rev})])}, - {type, <<"application/foobar">>}, - {att_len, byte_size(AttData)}, - {data, AttData} - ]), - [NewAtt | AttAcc] + [att(I, {Pos, Rev}, 100) | AttAcc] end, [], lists:seq(1, NumAtts)), [Doc#doc{atts = Atts ++ NewAtts} | Acc] - end, - [], SourceDocs), - {ok, UpdateResults} = couch_db:update_docs(SourceDb, NewDocs, []), - NewRevs = [R || {ok, R} <- UpdateResults], - ?assertEqual(length(NewDocs), length(NewRevs)), + end, [], SourceDocs), + lists:foreach(fun(#doc{} = Doc) -> + ?assertMatch({ok, _}, fabric2_db:update_doc(SourceDb, Doc)) + end, NewDocs), add_attachments(SourceDb, NumAtts, Rest). + +att(I, PosRev, Size) -> + Name = ?io2b(["att_", ?i2l(I), "_", couch_doc:rev_to_str(PosRev)]), + AttData = crypto:strong_rand_bytes(Size), + couch_att:new([ + {name, Name}, + {type, <<"application/foobar">>}, + {att_len, byte_size(AttData)}, + {data, AttData} + ]). + + +open_doc_revs(#{} = Db, DocId, Opts) -> + fabric2_db:open_doc_revs(Db, DocId, all, Opts). + + +json_doc(#doc{} = Doc) -> + couch_doc:to_json_obj(Doc, [attachments]). + + +replicate(Source, Target) -> + % Serialize the concurrent updates of the same document in order + % to prevent having to set higher timeouts due to FDB conflicts + RepObject = #{ + <<"source">> => Source, + <<"target">> => Target, + <<"worker_processes">> => 1, + <<"http_connections">> => 1 + }, + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(RepObject)). diff --git a/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl index ff08b5ee5..e672c76b7 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_missing_stubs_tests.erl @@ -14,103 +14,59 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/2, - compare_dbs/2 -]). -define(REVS_LIMIT, 3). --define(TIMEOUT_EUNIT, 30). -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). missing_stubs_test_() -> - Pairs = [{remote, remote}], { "Replicate docs with missing stubs (COUCHDB-1365)", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_replicate_docs_with_missed_att_stubs/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_docs_with_missed_att_stubs, 60) + ] + } } }. -should_replicate_docs_with_missed_att_stubs({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_populate_source(Source), - should_set_target_revs_limit(Target, ?REVS_LIMIT), - should_replicate(Source, Target), - should_compare_databases(Source, Target), - should_update_source_docs(Source, ?REVS_LIMIT * 2), - should_replicate(Source, Target), - should_compare_databases(Source, Target) - ]}}. - -should_populate_source({remote, Source}) -> - should_populate_source(Source); -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source))}. - -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. - -should_set_target_revs_limit({remote, Target}, RevsLimit) -> - should_set_target_revs_limit(Target, RevsLimit); -should_set_target_revs_limit(Target, RevsLimit) -> - ?_test(begin - {ok, Db} = couch_db:open_int(Target, [?ADMIN_CTX]), - ?assertEqual(ok, couch_db:set_revs_limit(Db, RevsLimit)), - ok = couch_db:close(Db) - end). - -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target))}. - -should_update_source_docs({remote, Source}, Times) -> - should_update_source_docs(Source, Times); -should_update_source_docs(Source, Times) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(update_db_docs(Source, Times))}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + populate_db(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. + + +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +should_replicate_docs_with_missed_att_stubs({Source, Target}) -> + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + ?assertEqual(ok, fabric2_db:set_revs_limit(TargetDb, ?REVS_LIMIT)), + + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(Source, Target)), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)), + + ok = update_db_docs(Source, ?REVS_LIMIT * 2), + + ?assertMatch({ok, _}, + couch_replicator_test_helper:replicate(Source, Target)), + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target)). populate_db(DbName) -> - {ok, Db} = couch_db:open_int(DbName, []), AttData = crypto:strong_rand_bytes(6000), Doc = #doc{ id = <<"doc1">>, @@ -120,35 +76,40 @@ populate_db(DbName) -> {type, <<"application/foobar">>}, {att_len, byte_size(AttData)}, {data, AttData} - ]) + ]) ] }, - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db). + couch_replicator_test_helper:create_docs(DbName, [Doc]). + update_db_docs(DbName, Times) -> - {ok, Db} = couch_db:open_int(DbName, []), - {ok, _} = couch_db:fold_docs( - Db, - fun(FDI, Acc) -> db_fold_fun(FDI, Acc) end, - {DbName, Times}, - []), - ok = couch_db:close(Db). - -db_fold_fun(FullDocInfo, {DbName, Times}) -> - {ok, Db} = couch_db:open_int(DbName, []), - {ok, Doc} = couch_db:open_doc(Db, FullDocInfo), - lists:foldl( - fun(_, {Pos, RevId}) -> - {ok, Db2} = couch_db:reopen(Db), - NewDocVersion = Doc#doc{ - revs = {Pos, [RevId]}, - body = {[{<<"value">>, base64:encode(crypto:strong_rand_bytes(100))}]} - }, - {ok, NewRev} = couch_db:update_doc(Db2, NewDocVersion, []), - NewRev - end, - {element(1, Doc#doc.revs), hd(element(2, Doc#doc.revs))}, - lists:seq(1, Times)), - ok = couch_db:close(Db), - {ok, {DbName, Times}}. + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + FoldFun = fun + ({meta, _Meta}, Acc) -> + {ok, Acc}; + (complete, Acc) -> + {ok, Acc}; + ({row, Row}, Acc) -> + {_, DocId} = lists:keyfind(id, 1, Row), + ok = update_doc(DbName, DocId, Times), + {ok, Acc} + end, + Opts = [{restart_tx, true}], + {ok, _} = fabric2_db:fold_docs(Db, FoldFun, ok, Opts), + ok. + + +update_doc(_DbName, _DocId, 0) -> + ok; + +update_doc(DbName, DocId, Times) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, Doc} = fabric2_db:open_doc(Db, DocId, []), + #doc{revs = {Pos, [Rev | _]}} = Doc, + Val = base64:encode(crypto:strong_rand_bytes(100)), + Doc1 = Doc#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"value">>, Val}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + update_doc(DbName, DocId, Times - 1). diff --git a/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl index da46b8a26..f5e745d90 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_proxy_tests.erl @@ -14,15 +14,7 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). --include_lib("couch_replicator/include/couch_replicator_api_wrap.hrl"). - - -setup() -> - ok. - - -teardown(_) -> - ok. +-include_lib("fabric/test/fabric2_test.hrl"). replicator_proxy_test_() -> @@ -30,87 +22,78 @@ replicator_proxy_test_() -> "replicator proxy tests", { setup, - fun() -> test_util:start_couch([couch_replicator]) end, fun test_util:stop_couch/1, - { - foreach, - fun setup/0, fun teardown/1, - [ - fun parse_rep_doc_without_proxy/1, - fun parse_rep_doc_with_proxy/1, - fun parse_rep_source_target_proxy/1, - fun mutually_exclusive_proxy_and_source_proxy/1, - fun mutually_exclusive_proxy_and_target_proxy/1 - ] - } + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + with([ + ?TDEF(parse_rep_doc_without_proxy), + ?TDEF(parse_rep_doc_with_proxy), + ?TDEF(parse_rep_source_target_proxy), + ?TDEF(mutually_exclusive_proxy_and_source_proxy), + ?TDEF(mutually_exclusive_proxy_and_target_proxy) + ]) } }. parse_rep_doc_without_proxy(_) -> - ?_test(begin - NoProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>} - ]}, - Rep = couch_replicator_docs:parse_rep_doc(NoProxyDoc), - ?assertEqual((Rep#rep.source)#httpdb.proxy_url, undefined), - ?assertEqual((Rep#rep.target)#httpdb.proxy_url, undefined) - end). + NoProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>} + ]}, + Rep = couch_replicator_parse:parse_rep_doc(NoProxyDoc), + Src = maps:get(?SOURCE, Rep), + Tgt = maps:get(?TARGET, Rep), + ?assertEqual(null, maps:get(<<"proxy_url">>, Src)), + ?assertEqual(null, maps:get(<<"proxy_url">>, Tgt)). parse_rep_doc_with_proxy(_) -> - ?_test(begin - ProxyURL = <<"http://myproxy.com">>, - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"proxy">>, ProxyURL} - ]}, - Rep = couch_replicator_docs:parse_rep_doc(ProxyDoc), - ?assertEqual((Rep#rep.source)#httpdb.proxy_url, binary_to_list(ProxyURL)), - ?assertEqual((Rep#rep.target)#httpdb.proxy_url, binary_to_list(ProxyURL)) - end). + ProxyURL = <<"http://myproxy.com">>, + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"proxy">>, ProxyURL} + ]}, + Rep = couch_replicator_parse:parse_rep_doc(ProxyDoc), + Src = maps:get(?SOURCE, Rep), + Tgt = maps:get(?TARGET, Rep), + ?assertEqual(ProxyURL, maps:get(<<"proxy_url">>, Src)), + ?assertEqual(ProxyURL, maps:get(<<"proxy_url">>, Tgt)). parse_rep_source_target_proxy(_) -> - ?_test(begin - SrcProxyURL = <<"http://mysrcproxy.com">>, - TgtProxyURL = <<"http://mytgtproxy.com:9999">>, - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"source_proxy">>, SrcProxyURL}, - {<<"target_proxy">>, TgtProxyURL} - ]}, - Rep = couch_replicator_docs:parse_rep_doc(ProxyDoc), - ?assertEqual((Rep#rep.source)#httpdb.proxy_url, - binary_to_list(SrcProxyURL)), - ?assertEqual((Rep#rep.target)#httpdb.proxy_url, - binary_to_list(TgtProxyURL)) - end). + SrcProxyURL = <<"http://mysrcproxy.com">>, + TgtProxyURL = <<"http://mytgtproxy.com:9999">>, + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"source_proxy">>, SrcProxyURL}, + {<<"target_proxy">>, TgtProxyURL} + ]}, + Rep = couch_replicator_parse:parse_rep_doc(ProxyDoc), + Src = maps:get(?SOURCE, Rep), + Tgt = maps:get(?TARGET, Rep), + ?assertEqual(SrcProxyURL, maps:get(<<"proxy_url">>, Src)), + ?assertEqual(TgtProxyURL, maps:get(<<"proxy_url">>, Tgt)). mutually_exclusive_proxy_and_source_proxy(_) -> - ?_test(begin - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"proxy">>, <<"oldstyleproxy.local">>}, - {<<"source_proxy">>, <<"sourceproxy.local">>} - ]}, - ?assertThrow({bad_rep_doc, _}, - couch_replicator_docs:parse_rep_doc(ProxyDoc)) - end). + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"proxy">>, <<"oldstyleproxy.local">>}, + {<<"source_proxy">>, <<"sourceproxy.local">>} + ]}, + ?assertThrow({bad_rep_doc, _}, + couch_replicator_parse:parse_rep_doc(ProxyDoc)). mutually_exclusive_proxy_and_target_proxy(_) -> - ?_test(begin - ProxyDoc = {[ - {<<"source">>, <<"http://unproxied.com">>}, - {<<"target">>, <<"http://otherunproxied.com">>}, - {<<"proxy">>, <<"oldstyleproxy.local">>}, - {<<"target_proxy">>, <<"targetproxy.local">>} - ]}, - ?assertThrow({bad_rep_doc, _}, - couch_replicator_docs:parse_rep_doc(ProxyDoc)) - end). + ProxyDoc = {[ + {<<"source">>, <<"http://unproxied.com">>}, + {<<"target">>, <<"http://otherunproxied.com">>}, + {<<"proxy">>, <<"oldstyleproxy.local">>}, + {<<"target_proxy">>, <<"targetproxy.local">>} + ]}, + ?assertThrow({bad_rep_doc, _}, + couch_replicator_parse:parse_rep_doc(ProxyDoc)). diff --git a/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl index 034550aec..fb9892017 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_rate_limiter_tests.erl @@ -1,6 +1,7 @@ -module(couch_replicator_rate_limiter_tests). -include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). rate_limiter_test_() -> @@ -9,64 +10,52 @@ rate_limiter_test_() -> fun setup/0, fun teardown/1, [ - t_new_key(), - t_1_failure(), - t_2_failures_back_to_back(), - t_2_failures(), - t_success_threshold(), - t_1_failure_2_successes() + ?TDEF_FE(t_new_key), + ?TDEF_FE(t_1_failure), + ?TDEF_FE(t_2_failures_back_to_back), + ?TDEF_FE(t_2_failures), + ?TDEF_FE(t_success_threshold), + ?TDEF_FE(t_1_failure_2_successes) ] }. -t_new_key() -> - ?_test(begin - ?assertEqual(0, couch_replicator_rate_limiter:interval({"foo", get})) - end). +t_new_key(_) -> + ?assertEqual(0, couch_replicator_rate_limiter:interval({"foo", get})). -t_1_failure() -> - ?_test(begin - ?assertEqual(24, couch_replicator_rate_limiter:failure({"foo", get})) - end). +t_1_failure(_) -> + ?assertEqual(24, couch_replicator_rate_limiter:failure({"foo", get})). -t_2_failures() -> - ?_test(begin - couch_replicator_rate_limiter:failure({"foo", get}), - low_pass_filter_delay(), - Interval = couch_replicator_rate_limiter:failure({"foo", get}), - ?assertEqual(29, Interval) - end). +t_2_failures(_) -> + couch_replicator_rate_limiter:failure({"foo", get}), + low_pass_filter_delay(), + Interval = couch_replicator_rate_limiter:failure({"foo", get}), + ?assertEqual(29, Interval). -t_2_failures_back_to_back() -> - ?_test(begin - couch_replicator_rate_limiter:failure({"foo", get}), - Interval = couch_replicator_rate_limiter:failure({"foo", get}), - ?assertEqual(24, Interval) - end). +t_2_failures_back_to_back(_) -> + couch_replicator_rate_limiter:failure({"foo", get}), + Interval = couch_replicator_rate_limiter:failure({"foo", get}), + ?assertEqual(24, Interval). -t_success_threshold() -> - ?_test(begin - Interval = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(0, Interval), - Interval = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(0, Interval) - end). +t_success_threshold(_) -> + Interval = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(0, Interval), + Interval = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(0, Interval). -t_1_failure_2_successes() -> - ?_test(begin - couch_replicator_rate_limiter:failure({"foo", get}), - low_pass_filter_delay(), - Succ1 = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(20, Succ1), - low_pass_filter_delay(), - Succ2 = couch_replicator_rate_limiter:success({"foo", get}), - ?assertEqual(0, Succ2) - end). +t_1_failure_2_successes(_) -> + couch_replicator_rate_limiter:failure({"foo", get}), + low_pass_filter_delay(), + Succ1 = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(20, Succ1), + low_pass_filter_delay(), + Succ2 = couch_replicator_rate_limiter:success({"foo", get}), + ?assertEqual(0, Succ2). low_pass_filter_delay() -> diff --git a/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl b/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl index 037f37191..4b7c37d9e 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_retain_stats_between_job_runs.erl @@ -15,139 +15,72 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). - --define(DELAY, 500). --define(TIMEOUT, 60000). - - -setup_all() -> - test_util:start_couch([couch_replicator, chttpd, mem3, fabric]). - - -teardown_all(Ctx) -> - ok = test_util:stop_couch(Ctx). - - -setup() -> - Source = setup_db(), - Target = setup_db(), - {Source, Target}. +-include_lib("fabric/test/fabric2_test.hrl"). -teardown({Source, Target}) -> - teardown_db(Source), - teardown_db(Target), - ok. +-define(DELAY, 500). stats_retained_test_() -> { setup, - fun setup_all/0, - fun teardown_all/1, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { foreach, fun setup/0, fun teardown/1, [ - fun t_stats_retained_by_scheduler/1, - fun t_stats_retained_on_job_removal/1 + ?TDEF_FE(t_stats_retained_on_job_removal, 60) ] } }. -t_stats_retained_by_scheduler({Source, Target}) -> - ?_test(begin - {ok, _} = add_vdu(Target), - populate_db_reject_even_docs(Source, 1, 10), - {ok, RepPid, RepId} = replicate(Source, Target), - wait_target_in_sync(6, Target), - - check_active_tasks(10, 5, 5), - check_scheduler_jobs(10, 5, 5), +setup() -> + Source = couch_replicator_test_helper:create_db(), + Target = couch_replicator_test_helper:create_db(), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "checkpoint_interval", "1000", false), + {Source, Target}. - stop_job(RepPid), - check_scheduler_jobs(10, 5, 5), - start_job(), - check_active_tasks(10, 5, 5), - check_scheduler_jobs(10, 5, 5), - couch_replicator_scheduler:remove_job(RepId) - end). +teardown({Source, Target}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "checkpoint_interval", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). t_stats_retained_on_job_removal({Source, Target}) -> - ?_test(begin - {ok, _} = add_vdu(Target), - populate_db_reject_even_docs(Source, 1, 10), - {ok, _, RepId} = replicate(Source, Target), - wait_target_in_sync(6, Target), % 5 + 1 vdu - - check_active_tasks(10, 5, 5), - check_scheduler_jobs(10, 5, 5), + {ok, _} = add_vdu(Target), + populate_db_reject_even_docs(Source, 1, 10), + {ok, Pid1, RepId} = replicate(Source, Target), + wait_target_in_sync(6, Target), % 5 + 1 vdu - couch_replicator_scheduler:remove_job(RepId), + check_scheduler_jobs(10, 5, 5), - populate_db_reject_even_docs(Source, 11, 20), - {ok, _, RepId} = replicate(Source, Target), - wait_target_in_sync(11, Target), % 6 + 5 + cancel(RepId, Pid1), - check_scheduler_jobs(20, 10, 10), - check_active_tasks(20, 10, 10), + populate_db_reject_even_docs(Source, 11, 20), + {ok, Pid2, RepId} = replicate(Source, Target), + wait_target_in_sync(11, Target), % 6 + 5 - couch_replicator_scheduler:remove_job(RepId), + check_scheduler_jobs(20, 10, 10), - populate_db_reject_even_docs(Source, 21, 30), - {ok, _, RepId} = replicate(Source, Target), - wait_target_in_sync(16, Target), % 11 + 5 + cancel(RepId, Pid2), - check_scheduler_jobs(30, 15, 15), - check_active_tasks(30, 15, 15), - - couch_replicator_scheduler:remove_job(RepId) - end). + populate_db_reject_even_docs(Source, 21, 30), + {ok, Pid3, RepId} = replicate(Source, Target), + wait_target_in_sync(16, Target), % 11 + 5 + check_scheduler_jobs(30, 15, 15), -setup_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -teardown_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - - -stop_job(RepPid) -> - Ref = erlang:monitor(process, RepPid), - gen_server:cast(couch_replicator_scheduler, {set_max_jobs, 0}), - couch_replicator_scheduler:reschedule(), - receive - {'DOWN', Ref, _, _, _} -> ok - after ?TIMEOUT -> - erlang:error(timeout) - end. - - -start_job() -> - gen_server:cast(couch_replicator_scheduler, {set_max_jobs, 500}), - couch_replicator_scheduler:reschedule(). - - -check_active_tasks(DocsRead, DocsWritten, DocsFailed) -> - RepTask = wait_for_task_status(), - ?assertNotEqual(timeout, RepTask), - ?assertEqual(DocsRead, couch_util:get_value(docs_read, RepTask)), - ?assertEqual(DocsWritten, couch_util:get_value(docs_written, RepTask)), - ?assertEqual(DocsFailed, couch_util:get_value(doc_write_failures, - RepTask)). + cancel(RepId, Pid3). check_scheduler_jobs(DocsRead, DocsWritten, DocFailed) -> - Info = wait_scheduler_info(), + Info = wait_scheduler_info(DocsRead), ?assert(maps:is_key(<<"changes_pending">>, Info)), ?assert(maps:is_key(<<"doc_write_failures">>, Info)), ?assert(maps:is_key(<<"docs_read">>, Info)), @@ -161,27 +94,18 @@ check_scheduler_jobs(DocsRead, DocsWritten, DocFailed) -> ?assertMatch(#{<<"doc_write_failures">> := DocFailed}, Info). -replication_tasks() -> - lists:filter(fun(P) -> - couch_util:get_value(type, P) =:= replication - end, couch_task_status:all()). - - -wait_for_task_status() -> +wait_scheduler_info(DocsRead) -> test_util:wait(fun() -> - case replication_tasks() of - [] -> wait; - [RepTask] -> RepTask - end - end). - - -wait_scheduler_info() -> - test_util:wait(fun() -> - case scheduler_jobs() of - [] -> wait; - [#{<<"info">> := null}] -> wait; - [#{<<"info">> := Info}] -> Info + case couch_replicator_test_helper:scheduler_jobs() of + [] -> + wait; + [#{<<"info">> := null}] -> + wait; + [#{<<"info">> := Info}] -> + case Info of + #{<<"docs_read">> := DocsRead} -> Info; + #{} -> wait + end end end). @@ -197,16 +121,12 @@ populate_db_reject_even_docs(DbName, Start, End) -> populate_db(DbName, Start, End, BodyFun) when is_function(BodyFun, 1) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Id = integer_to_binary(DocIdCounter), - Doc = #doc{id = Id, body = BodyFun(DocIdCounter)}, - [Doc | Acc] - end, - [], lists:seq(Start, End)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db). + Docs = lists:foldl(fun(DocIdCounter, Acc) -> + Id = integer_to_binary(DocIdCounter), + Doc = #doc{id = Id, body = BodyFun(DocIdCounter)}, + [Doc | Acc] + end, [], lists:seq(Start, End)), + couch_replicator_test_helper:create_docs(DbName, Docs). wait_target_in_sync(DocCount, Target) when is_integer(DocCount) -> @@ -215,14 +135,13 @@ wait_target_in_sync(DocCount, Target) when is_integer(DocCount) -> wait_target_in_sync_loop(_DocCount, _TargetName, 0) -> erlang:error({assertion_failed, [ - {module, ?MODULE}, {line, ?LINE}, - {reason, "Could not get source and target databases in sync"} + {module, ?MODULE}, {line, ?LINE}, + {reason, "Could not get source and target databases in sync"} ]}); wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> - {ok, Target} = couch_db:open_int(TargetName, []), - {ok, TargetInfo} = couch_db:get_db_info(Target), - ok = couch_db:close(Target), + {ok, Db} = fabric2_db:open(TargetName, [?ADMIN_CTX]), + {ok, TargetInfo} = fabric2_db:get_db_info(Db), TargetDocCount = couch_util:get_value(doc_count, TargetInfo), case TargetDocCount == DocCount of true -> @@ -234,27 +153,11 @@ wait_target_in_sync_loop(DocCount, TargetName, RetriesLeft) -> replicate(Source, Target) -> - SrcUrl = couch_replicator_test_helper:db_url(Source), - TgtUrl = couch_replicator_test_helper:db_url(Target), - RepObject = {[ - {<<"source">>, SrcUrl}, - {<<"target">>, TgtUrl}, - {<<"continuous">>, true} - ]}, - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - Pid = couch_replicator_test_helper:get_pid(Rep#rep.id), - {ok, Pid, Rep#rep.id}. - - -scheduler_jobs() -> - Addr = config:get("chttpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(chttpd, port), - Url = lists:flatten(io_lib:format("http://~s:~b/_scheduler/jobs", [Addr, Port])), - {ok, 200, _, Body} = test_request:get(Url, []), - Json = jiffy:decode(Body, [return_maps]), - maps:get(<<"jobs">>, Json). + couch_replicator_test_helper:replicate_continuous(Source, Target). + + +cancel(RepId, Pid) -> + couch_replicator_test_helper:cancel(RepId, Pid). vdu() -> @@ -274,9 +177,5 @@ add_vdu(DbName) -> {<<"validate_doc_update">>, vdu()} ], Doc = couch_doc:from_json_obj({DocProps}, []), - {ok, Db} = couch_db:open_int(DbName, [?ADMIN_CTX]), - try - {ok, _Rev} = couch_db:update_doc(Db, Doc, []) - after - couch_db:close(Db) - end. + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + {ok, _} = fabric2_db:update_doc(Db, Doc, []). diff --git a/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl index 5026c1435..5dfe4ba91 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_selector_tests.erl @@ -15,103 +15,69 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). -setup(_) -> - Ctx = test_util:start_couch([couch_replicator]), - Source = create_db(), - create_docs(Source), - Target = create_db(), - {Ctx, {Source, Target}}. - -teardown(_, {Ctx, {Source, Target}}) -> - delete_db(Source), - delete_db(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - selector_replication_test_() -> - Pairs = [{remote, remote}], { "Selector filtered replication tests", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_succeed/2} || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_with_selector) + ] + } } }. -should_succeed({From, To}, {_Ctx, {Source, Target}}) -> - RepObject = {[ - {<<"source">>, db_url(From, Source)}, - {<<"target">>, db_url(To, Target)}, - {<<"selector">>, {[{<<"_id">>, <<"doc2">>}]}} - ]}, - {ok, _} = couch_replicator:replicate(RepObject, ?ADMIN_USER), - %% FilteredFun is an Erlang version of following mango selector - FilterFun = fun(_DocId, {Props}) -> - couch_util:get_value(<<"_id">>, Props) == <<"doc2">> - end, - {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target, FilterFun), - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), [ - {"Target DB has proper number of docs", - ?_assertEqual(1, proplists:get_value(doc_count, TargetDbInfo))}, - {"All the docs selected as expected", - ?_assert(lists:all(fun(Valid) -> Valid end, AllReplies))} - ]}. -compare_dbs(Source, Target, FilterFun) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - {ok, TargetDbInfo} = couch_db:get_db_info(TargetDb), - Fun = fun(FullDocInfo, Acc) -> - {ok, DocId, SourceDoc} = read_doc(SourceDb, FullDocInfo), - TargetReply = read_doc(TargetDb, DocId), - case FilterFun(DocId, SourceDoc) of - true -> - ValidReply = {ok, DocId, SourceDoc} == TargetReply, - {ok, [ValidReply|Acc]}; - false -> - ValidReply = {not_found, missing} == TargetReply, - {ok, [ValidReply|Acc]} - end - end, - {ok, AllReplies} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb), - {ok, TargetDbInfo, AllReplies}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + create_docs(Source), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -read_doc(Db, DocIdOrInfo) -> - case couch_db:open_doc(Db, DocIdOrInfo) of - {ok, Doc} -> - {Props} = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - {ok, DocId, {Props}}; - Error -> - Error - end. -create_db() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -create_docs(DbName) -> - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), - Doc1 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc1">>} - ]}), - Doc2 = couch_doc:from_json_obj({[ - {<<"_id">>, <<"doc2">>} - ]}), - {ok, _} = couch_db:update_docs(Db, [Doc1, Doc2]), - couch_db:close(Db). -delete_db(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]). +should_replicate_with_selector({Source, Target}) -> + RepObject = #{ + <<"source">> => Source, + <<"target">> => Target, + <<"selector">> => #{ + <<"_id">> => <<"doc2">> + } + }, + ?assertMatch({ok, _}, couch_replicator_test_helper:replicate(RepObject)), + {ok, TargetDbInfo, AllReplies} = compare_dbs(Source, Target), + ?assertEqual(1, proplists:get_value(doc_count, TargetDbInfo)), + ?assert(lists:all(fun(Valid) -> Valid end, AllReplies)). + -db_url(remote, DbName) -> - Addr = config:get("httpd", "bind_address", "127.0.0.1"), - Port = mochiweb_socket_server:get(couch_httpd, port), - ?l2b(io_lib:format("http://~s:~b/~s", [Addr, Port, DbName])). +compare_dbs(Source, Target) -> + {ok, TargetDb} = fabric2_db:open(Target, []), + {ok, TargetDbInfo} = fabric2_db:get_db_info(TargetDb), + Fun = fun(SrcDoc, TgtDoc, Acc) -> + case SrcDoc#doc.id == <<"doc2">> of + true -> [SrcDoc#doc.body == TgtDoc#doc.body | Acc]; + false -> [not_found == TgtDoc | Acc] + end + end, + Res = couch_replicator_test_helper:compare_fold(Source, Target, Fun, []), + {ok, TargetDbInfo, Res}. + + +create_docs(DbName) -> + couch_replicator_test_helper:create_docs(DbName, [ + #{<<"_id">> => <<"doc1">>}, + #{<<"_id">> => <<"doc2">>} + ]). diff --git a/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl b/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl index 8aebbe151..b113c5392 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_small_max_request_size_target.erl @@ -1,139 +1,70 @@ -module(couch_replicator_small_max_request_size_target). + -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). - --import(couch_replicator_test_helper, [ - db_url/1, - replicate/1, - compare_dbs/3 -]). - --define(TIMEOUT_EUNIT, 360). - - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - - -setup(remote) -> - {remote, setup()}; - -setup({A, B}) -> - Ctx = test_util:start_couch([couch_replicator]), - config:set("httpd", "max_http_request_size", "10000", false), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target}}. - - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target}}) -> - teardown(Source), - teardown(Target), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). +-include_lib("fabric/test/fabric2_test.hrl"). reduce_max_request_size_test_() -> - Pairs = [{remote, remote}], { "Replicate docs when target has a small max_http_request_size", { - foreachx, - fun setup/1, fun teardown/2, - [{Pair, fun should_replicate_all_docs/2} - || Pair <- Pairs] - ++ [{Pair, fun should_replicate_one/2} - || Pair <- Pairs] - % Disabled. See issue 574. Sometimes PUTs with a doc and - % attachment which exceed maximum request size are simply - % closed instead of returning a 413 request. That makes these - % tests flaky. - ++ [{Pair, fun should_replicate_one_with_attachment/2} - || Pair <- Pairs] + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(should_replicate_all_docs, 120), + ?TDEF_FE(should_replicate_one, 120), + ?TDEF_FE(should_replicate_one_with_attachment, 120) + ] + } } }. -% Test documents which are below max_http_request_size but when batched, batch size -% will be greater than max_http_request_size. Replicator could automatically split -% the batch into smaller batches and POST those separately. -should_replicate_all_docs({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target, [])]}}. - - -% If a document is too large to post as a single request, that document is -% skipped but replication overall will make progress and not crash. -should_replicate_one({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source_one_large_one_small(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target, [<<"doc0">>])]}}. - - -% If a document has an attachment > 64 * 1024 bytes, replicator will switch to -% POST-ing individual documents directly and skip bulk_docs. Test that case -% separately -% See note in main test function why this was disabled. -should_replicate_one_with_attachment({From, To}, {_Ctx, {Source, Target}}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [should_populate_source_one_large_attachment(Source), - should_populate_source(Source), - should_replicate(Source, Target), - should_compare_databases(Source, Target, [<<"doc0">>])]}}. - - -should_populate_source({remote, Source}) -> - should_populate_source(Source); - -should_populate_source(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(add_docs(Source, 5, 3000, 0))}. - - -should_populate_source_one_large_one_small({remote, Source}) -> - should_populate_source_one_large_one_small(Source); - -should_populate_source_one_large_one_small(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(one_large_one_small(Source, 12000, 3000))}. - - -should_populate_source_one_large_attachment({remote, Source}) -> - should_populate_source_one_large_attachment(Source); - -should_populate_source_one_large_attachment(Source) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(one_large_attachment(Source, 70000, 70000))}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + Target = couch_replicator_test_helper:create_db(), + config:set("httpd", "max_http_request_size", "10000", false), + {Source, Target}. -should_replicate({remote, Source}, Target) -> - should_replicate(db_url(Source), Target); +teardown({Source, Target}) -> + config:delete("httpd", "max_http_request_size", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). -should_replicate(Source, {remote, Target}) -> - should_replicate(Source, db_url(Target)); -should_replicate(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target))}. +% Test documents which are below max_http_request_size but when batched, batch +% size will be greater than max_http_request_size. Replicator could +% automatically split the batch into smaller batches and POST those separately. +should_replicate_all_docs({Source, Target}) -> + ?assertEqual(ok, add_docs(Source, 5, 3000, 0)), + replicate(Source, Target), + compare_dbs(Source, Target, []). -should_compare_databases({remote, Source}, Target, ExceptIds) -> - should_compare_databases(Source, Target, ExceptIds); +% If a document is too large to post as a single request, that document is +% skipped but replication overall will make progress and not crash. +should_replicate_one({Source, Target}) -> + ?assertEqual(ok, one_large_one_small(Source, 12000, 3000)), + replicate(Source, Target), + compare_dbs(Source, Target, [<<"doc0">>]). -should_compare_databases(Source, {remote, Target}, ExceptIds) -> - should_compare_databases(Source, Target, ExceptIds); -should_compare_databases(Source, Target, ExceptIds) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target, ExceptIds))}. +% If a document has an attachment > 64 * 1024 bytes, replicator will switch to +% POST-ing individual documents directly and skip bulk_docs. Test that case +% separately See note in main test function why this was disabled. +should_replicate_one_with_attachment({Source, Target}) -> + ?assertEqual(ok, one_large_attachment(Source, 70000, 70000)), + ?assertEqual(ok, add_docs(Source, 5, 3000, 0)), + replicate(Source, Target), + compare_dbs(Source, Target, [<<"doc0">>]). binary_chunk(Size) when is_integer(Size), Size > 0 -> @@ -150,19 +81,21 @@ add_docs(DbName, DocCount, DocSize, AttSize) -> one_large_one_small(DbName, Large, Small) -> add_doc(DbName, <<"doc0">>, Large, 0), - add_doc(DbName, <<"doc1">>, Small, 0). + add_doc(DbName, <<"doc1">>, Small, 0), + ok. one_large_attachment(DbName, Size, AttSize) -> - add_doc(DbName, <<"doc0">>, Size, AttSize). + add_doc(DbName, <<"doc0">>, Size, AttSize), + ok. add_doc(DbName, DocId, Size, AttSize) when is_binary(DocId) -> - {ok, Db} = couch_db:open_int(DbName, []), - Doc0 = #doc{id = DocId, body = {[{<<"x">>, binary_chunk(Size)}]}}, - Doc = Doc0#doc{atts = atts(AttSize)}, - {ok, _} = couch_db:update_doc(Db, Doc, []), - couch_db:close(Db). + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + Doc0 = #doc{id = DocId, body = {[{<<"x">>, binary_chunk(Size)}]}}, + Doc = Doc0#doc{atts = atts(AttSize)}, + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + ok. atts(0) -> @@ -178,8 +111,13 @@ atts(Size) -> replicate(Source, Target) -> - replicate({[ - {<<"source">>, Source}, - {<<"target">>, Target}, - {<<"worker_processes">>, "1"} % This make batch_size predictable - ]}). + ?assertMatch({ok, _}, couch_replicator_test_helper:replicate(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"worker_processes">> => 1 % This make batch_size predictable + })). + + +compare_dbs(Source, Target, ExceptIds) -> + ?assertEqual(ok, couch_replicator_test_helper:compare_dbs(Source, Target, + ExceptIds)). diff --git a/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl b/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl index fd0409164..2ac447eb3 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_test_helper.erl @@ -1,51 +1,166 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + -module(couch_replicator_test_helper). --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). --include_lib("couch_replicator/src/couch_replicator.hrl"). -export([ + start_couch/0, + stop_couch/1, + + create_db/0, + create_db/1, + delete_db/1, + + server_url/0, + db_url/1, + + create_docs/2, + compare_dbs/2, compare_dbs/3, - db_url/1, - replicate/1, + compare_fold/4, + + compare_docs/2, + get_pid/1, - replicate/2 + + replicate/1, + replicate/2, + replicate_continuous/1, + replicate_continuous/2, + + cancel/1, + cancel/2, + + scheduler_jobs/0 ]). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/src/couch_replicator.hrl"). + + +-define(USERNAME, "rep_eunit_admin"). +-define(PASSWORD, "rep_eunit_password"). + + +start_couch() -> + Ctx = test_util:start_couch([fabric, chttpd, couch_replicator]), + Hashed = couch_passwords:hash_admin_password(?PASSWORD), + ok = config:set("admins", ?USERNAME, ?b2l(Hashed), _Persist = false), + Ctx. + + +stop_couch(Ctx) -> + config:delete("admins", ?USERNAME, _Persist = false), + test_util:stop_couch(Ctx). + + +create_db() -> + {ok, Db} = fabric2_db:create(?tempdb(), [?ADMIN_CTX]), + fabric2_db:name(Db). + + +create_db(DbName) when is_binary(DbName) -> + {ok, Db} = fabric2_db:create(DbName, [?ADMIN_CTX]), + fabric2_db:name(Db). + + +delete_db(DbName) -> + try + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]) + catch + error:database_does_not_exist -> + ok + end. + + +server_url() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + Fmt = "http://~s:~s@~s:~b", + ?l2b(io_lib:format(Fmt, [?USERNAME, ?PASSWORD, Addr, Port])). + + +db_url(DbName) -> + ?l2b(io_lib:format("~s/~s", [server_url(), DbName])). + + +create_docs(DbName, Docs) when is_binary(DbName), is_list(Docs) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + Docs1 = lists:map(fun(Doc) -> + case Doc of + #{} -> + Doc1 = couch_util:json_decode(couch_util:json_encode(Doc)), + couch_doc:from_json_obj(Doc1); + #doc{} -> + Doc + end + end, Docs), + {ok, ResList} = fabric2_db:update_docs(Db, Docs1), + lists:foreach(fun(Res) -> + ?assertMatch({ok, {_, Rev}} when is_binary(Rev), Res) + end, ResList). + + compare_dbs(Source, Target) -> - compare_dbs(Source, Target, []). - - -compare_dbs(Source, Target, ExceptIds) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - - Fun = fun(FullDocInfo, Acc) -> - {ok, DocSource} = couch_db:open_doc(SourceDb, FullDocInfo), - Id = DocSource#doc.id, - case lists:member(Id, ExceptIds) of - true -> - ?assertEqual(not_found, couch_db:get_doc_info(TargetDb, Id)); - false -> - {ok, TDoc} = couch_db:open_doc(TargetDb, Id), - compare_docs(DocSource, TDoc) + Fun = fun(SrcDoc, TgtDoc, ok) -> compare_docs(SrcDoc, TgtDoc) end, + compare_fold(Source, Target, Fun, ok). + + +compare_dbs(Source, Target, ExceptIds) when is_binary(Source), + is_binary(Target), is_list(ExceptIds) -> + Fun = fun(SrcDoc, TgtDoc, ok) -> + case lists:member(SrcDoc#doc.id, ExceptIds) of + true -> ?assertEqual(not_found, TgtDoc); + false -> compare_docs(SrcDoc, TgtDoc) end, - {ok, Acc} + ok end, + compare_fold(Source, Target, Fun, ok). + - {ok, _} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb). +compare_fold(Source, Target, Fun, Acc0) when + is_binary(Source), is_binary(Target), is_function(Fun, 3) -> + {ok, SourceDb} = fabric2_db:open(Source, [?ADMIN_CTX]), + {ok, TargetDb} = fabric2_db:open(Target, [?ADMIN_CTX]), + fabric2_fdb:transactional(SourceDb, fun(TxSourceDb) -> + FoldFun = fun + ({meta, _Meta}, Acc) -> + {ok, Acc}; + (complete, Acc) -> + {ok, Acc}; + ({row, Row}, Acc) -> + {_, Id} = lists:keyfind(id, 1, Row), + SrcDoc = open_doc(TxSourceDb, Id), + TgtDoc = open_doc(TargetDb, Id), + {ok, Fun(SrcDoc, TgtDoc, Acc)} + end, + Opts = [{restart_tx, true}], + {ok, AccF} = fabric2_db:fold_docs(TxSourceDb, FoldFun, Acc0, Opts), + AccF + end). -compare_docs(Doc1, Doc2) -> +compare_docs(#doc{} = Doc1, Doc2) when + is_record(Doc2, doc) orelse Doc2 =:= not_found -> + ?assert(Doc2 =/= not_found), ?assertEqual(Doc1#doc.body, Doc2#doc.body), #doc{atts = Atts1} = Doc1, #doc{atts = Atts2} = Doc2, ?assertEqual(lists:sort([couch_att:fetch(name, Att) || Att <- Atts1]), - lists:sort([couch_att:fetch(name, Att) || Att <- Atts2])), + lists:sort([couch_att:fetch(name, Att) || Att <- Atts2])), FunCompareAtts = fun(Att) -> AttName = couch_att:fetch(name, Att), {ok, AttTarget} = find_att(Atts2, AttName), @@ -68,19 +183,109 @@ compare_docs(Doc1, Doc2) -> ?assert(is_integer(couch_att:fetch(disk_len, AttTarget))), ?assert(is_integer(couch_att:fetch(att_len, AttTarget))), ?assertEqual(couch_att:fetch(disk_len, Att), - couch_att:fetch(disk_len, AttTarget)), + couch_att:fetch(disk_len, AttTarget)), ?assertEqual(couch_att:fetch(att_len, Att), - couch_att:fetch(att_len, AttTarget)), + couch_att:fetch(att_len, AttTarget)), ?assertEqual(couch_att:fetch(type, Att), - couch_att:fetch(type, AttTarget)), + couch_att:fetch(type, AttTarget)), ?assertEqual(couch_att:fetch(md5, Att), - couch_att:fetch(md5, AttTarget)) + couch_att:fetch(md5, AttTarget)) end, lists:foreach(FunCompareAtts, Atts1). +get_pid(RepId) -> + JobId = case couch_replicator_jobs:get_job_id(undefined, RepId) of + {ok, JobId0} -> JobId0; + {error, not_found} -> RepId + end, + {ok, #{<<"state">> := <<"running">>, <<"pid">> := Pid0}} = + couch_replicator_jobs:get_job_data(undefined, JobId), + Pid = list_to_pid(binary_to_list(Pid0)), + ?assert(is_pid(Pid)), + ?assert(is_process_alive(Pid)), + Pid. + + +replicate({[_ | _]} = EJson) -> + Str = couch_util:json_encode(EJson), + replicate(couch_util:json_decode(Str, [return_maps])); + +replicate(#{} = Rep0) -> + Rep = maybe_db_urls(Rep0), + {ok, Id, _} = couch_replicator_parse:parse_transient_rep(Rep, null), + ok = cancel(Id), + try + couch_replicator:replicate(Rep, ?ADMIN_USER) + after + ok = cancel(Id) + end. + + +replicate(Source, Target) -> + replicate(#{ + <<"source">> => Source, + <<"target">> => Target + }). + + +replicate_continuous({[_ | _]} = EJson) -> + Str = couch_util:json_encode(EJson), + replicate_continuous(couch_util:json_decode(Str, [return_maps])); + +replicate_continuous(#{<<"continuous">> := true} = Rep0) -> + Rep = maybe_db_urls(Rep0), + {ok, {continuous, RepId}} = couch_replicator:replicate(Rep, ?ADMIN_USER), + {ok, get_pid(RepId), RepId}. + + +replicate_continuous(Source, Target) -> + replicate_continuous(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"continuous">> => true + }). + + +cancel(Id) when is_binary(Id) -> + CancelRep = #{<<"cancel">> => true, <<"id">> => Id}, + case couch_replicator:replicate(CancelRep, ?ADMIN_USER) of + {ok, {cancelled, <<_/binary>>}} -> ok; + {error, not_found} -> ok + end. + + +cancel(Id, Pid) when is_pid(Pid), is_binary(Id) -> + Ref = monitor(process, Pid), + try + cancel(Id) + after + receive + {'DOWN', Ref, _, _, _} -> ok + after 60000 -> + error(replicator_pid_death_timeout) + end + end. + + +scheduler_jobs() -> + ServerUrl = couch_replicator_test_helper:server_url(), + Url = lists:flatten(io_lib:format("~s/_scheduler/jobs", [ServerUrl])), + {ok, 200, _, Body} = test_request:get(Url, []), + Json = jiffy:decode(Body, [return_maps]), + maps:get(<<"jobs">>, Json). + + +open_doc(Db, DocId) -> + case fabric2_db:open_doc(Db, DocId, []) of + {ok, #doc{deleted = false} = Doc} -> Doc; + {not_found, missing} -> not_found + end. + + find_att([], _Name) -> nil; + find_att([Att | Rest], Name) -> case couch_att:fetch(name, Att) of Name -> @@ -91,45 +296,29 @@ find_att([Att | Rest], Name) -> att_md5(Att) -> - Md50 = couch_att:foldl( - Att, - fun(Chunk, Acc) -> couch_hash:md5_hash_update(Acc, Chunk) end, - couch_hash:md5_hash_init()), + Md50 = couch_att:foldl(Att, fun(Chunk, Acc) -> + couch_hash:md5_hash_update(Acc, Chunk) + end, couch_hash:md5_hash_init()), couch_hash:md5_hash_final(Md50). + att_decoded_md5(Att) -> - Md50 = couch_att:foldl_decode( - Att, - fun(Chunk, Acc) -> couch_hash:md5_hash_update(Acc, Chunk) end, - couch_hash:md5_hash_init()), + Md50 = couch_att:foldl_decode(Att, fun(Chunk, Acc) -> + couch_hash:md5_hash_update(Acc, Chunk) + end, couch_hash:md5_hash_init()), couch_hash:md5_hash_final(Md50). -db_url(DbName) -> - iolist_to_binary([ - "http://", config:get("httpd", "bind_address", "127.0.0.1"), - ":", integer_to_list(mochiweb_socket_server:get(couch_httpd, port)), - "/", DbName - ]). - -get_pid(RepId) -> - Pid = global:whereis_name({couch_replicator_scheduler_job,RepId}), - ?assert(is_pid(Pid)), - Pid. -replicate(Source, Target) -> - replicate({[ - {<<"source">>, Source}, - {<<"target">>, Target} - ]}). - -replicate({[_ | _]} = RepObject) -> - {ok, Rep} = couch_replicator_utils:parse_rep_doc(RepObject, ?ADMIN_USER), - ok = couch_replicator_scheduler:add_job(Rep), - couch_replicator_scheduler:reschedule(), - Pid = get_pid(Rep#rep.id), - MonRef = erlang:monitor(process, Pid), - receive - {'DOWN', MonRef, process, Pid, _} -> - ok +maybe_db_urls(#{} = Rep) -> + #{<<"source">> := Src, <<"target">> := Tgt} = Rep, + Src1 = case Src of + <<"http://", _/binary>> -> Src; + <<"https://", _/binary>> -> Src; + <<_/binary>> -> db_url(Src) + end, + Tgt1 = case Tgt of + <<"http://", _/binary>> -> Tgt; + <<"https://", _/binary>> -> Tgt; + <<_/binary>> -> db_url(Tgt) end, - ok = couch_replicator_scheduler:remove_job(Rep#rep.id). + Rep#{<<"source">> := Src1, <<"target">> := Tgt1}. diff --git a/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl new file mode 100644 index 000000000..222d13809 --- /dev/null +++ b/src/couch_replicator/test/eunit/couch_replicator_transient_jobs_tests.erl @@ -0,0 +1,119 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_replicator_transient_jobs_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_replicator/src/couch_replicator.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +transient_jobs_test_() -> + { + "Transient jobs tests", + { + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(transient_job_is_removed, 10), + ?TDEF_FE(posting_same_job_is_a_noop, 10), + ?TDEF_FE(transient_job_with_a_bad_filter, 10) + ] + } + } + }. + + +setup() -> + Source = couch_replicator_test_helper:create_db(), + couch_replicator_test_helper:create_docs(Source, [ + #{<<"_id">> => <<"doc1">>} + ]), + Target = couch_replicator_test_helper:create_db(), + config:set("replicator", "stats_update_interval_sec", "0", false), + config:set("replicator", "transient_job_max_age_sec", "9999", false), + {Source, Target}. + + +teardown({Source, Target}) -> + config:delete("replicator", "stats_update_interval_sec", false), + config:delete("replicator", "transient_job_max_age_sec", false), + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + + +transient_job_is_removed({Source, Target}) -> + {ok, #{}} = replicate(Source, Target), + JobId = get_rep_id(Source, Target), + + couch_replicator_job_server:reschedule(), + + % Still there after clean up attempt ran + ?assertMatch({200, #{}}, scheduler_jobs(JobId)), + + config:set("replicator", "transient_job_max_age_sec", "0", false), + couch_replicator_job_server:reschedule(), + + % Should be gone now + ?assertMatch({404, #{}}, scheduler_jobs(JobId)). + + +posting_same_job_is_a_noop({Source, Target}) -> + {ok, Pid1, RepId1} = replicate_continuous(Source, Target), + {ok, Pid2, RepId2} = replicate_continuous(Source, Target), + ?assertEqual(RepId1, RepId2), + ?assertEqual(Pid1, Pid2), + couch_replicator_test_helper:cancel(RepId1). + + +transient_job_with_a_bad_filter({Source, Target}) -> + DDoc = #{<<"_id">> => <<"_design/myddoc">>}, + couch_replicator_test_helper:create_docs(Source, [DDoc]), + Result = couch_replicator:replicate(#{ + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target), + <<"continuous">> => true, + <<"filter">> => <<"myddoc/myfilter">> + }, ?ADMIN_USER), + ?assertMatch({error, #{<<"error">> := <<"filter_fetch_error">>}}, Result). + + +get_rep_id(Source, Target) -> + {ok, Id, _} = couch_replicator_parse:parse_transient_rep(#{ + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target) + }, null), + Id. + + +replicate(Source, Target) -> + couch_replicator:replicate(#{ + <<"source">> => couch_replicator_test_helper:db_url(Source), + <<"target">> => couch_replicator_test_helper:db_url(Target) + }, ?ADMIN_USER). + + +replicate_continuous(Source, Target) -> + couch_replicator_test_helper:replicate_continuous(Source, Target). + + +scheduler_jobs(Id) -> + SUrl = couch_replicator_test_helper:server_url(), + Url = lists:flatten(io_lib:format("~s/_scheduler/jobs/~s", [SUrl, Id])), + {ok, Code, _, Body} = test_request:get(Url, []), + {Code, jiffy:decode(Body, [return_maps])}. diff --git a/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl b/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl index 8e4a21dbb..4371eff1f 100644 --- a/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl +++ b/src/couch_replicator/test/eunit/couch_replicator_use_checkpoints_tests.erl @@ -14,165 +14,82 @@ -include_lib("couch/include/couch_eunit.hrl"). -include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). --import(couch_replicator_test_helper, [ - db_url/1, - replicate/1 -]). -define(DOCS_COUNT, 100). --define(TIMEOUT_EUNIT, 30). -define(i2l(I), integer_to_list(I)). -define(io2b(Io), iolist_to_binary(Io)). -start(false) -> - fun - ({finished, _, {CheckpointHistory}}) -> - ?assertEqual([{<<"use_checkpoints">>,false}], CheckpointHistory); - (_) -> - ok - end; -start(true) -> - fun - ({finished, _, {CheckpointHistory}}) -> - ?assertNotEqual(false, lists:keyfind(<<"session_id">>, - 1, CheckpointHistory)); - (_) -> - ok - end. - -stop(_, _) -> - ok. - -setup() -> - DbName = ?tempdb(), - {ok, Db} = couch_db:create(DbName, [?ADMIN_CTX]), - ok = couch_db:close(Db), - DbName. - -setup(remote) -> - {remote, setup()}; -setup({_, Fun, {A, B}}) -> - Ctx = test_util:start_couch([couch_replicator]), - {ok, Listener} = couch_replicator_notifier:start_link(Fun), - Source = setup(A), - Target = setup(B), - {Ctx, {Source, Target, Listener}}. - -teardown({remote, DbName}) -> - teardown(DbName); -teardown(DbName) -> - ok = couch_server:delete(DbName, [?ADMIN_CTX]), - ok. - -teardown(_, {Ctx, {Source, Target, Listener}}) -> - teardown(Source), - teardown(Target), - - couch_replicator_notifier:stop(Listener), - ok = application:stop(couch_replicator), - ok = test_util:stop_couch(Ctx). - use_checkpoints_test_() -> { - "Replication use_checkpoints feature tests", + setup, + fun couch_replicator_test_helper:start_couch/0, + fun couch_replicator_test_helper:stop_couch/1, { - foreachx, - fun start/1, fun stop/2, - [{UseCheckpoints, fun use_checkpoints_tests/2} - || UseCheckpoints <- [false, true]] + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_replicate_with_checkpoints, 15), + ?TDEF_FE(t_replicate_without_checkpoints, 15) + ] } }. -use_checkpoints_tests(UseCheckpoints, Fun) -> - Pairs = [{remote, remote}], - { - "use_checkpoints: " ++ atom_to_list(UseCheckpoints), - { - foreachx, - fun setup/1, fun teardown/2, - [{{UseCheckpoints, Fun, Pair}, fun should_test_checkpoints/2} - || Pair <- Pairs] - } - }. -should_test_checkpoints({UseCheckpoints, _, {From, To}}, {_Ctx, {Source, Target, _}}) -> - should_test_checkpoints(UseCheckpoints, {From, To}, {Source, Target}). -should_test_checkpoints(UseCheckpoints, {From, To}, {Source, Target}) -> - {lists:flatten(io_lib:format("~p -> ~p", [From, To])), - {inorder, [ - should_populate_source(Source, ?DOCS_COUNT), - should_replicate(Source, Target, UseCheckpoints), - should_compare_databases(Source, Target) - ]}}. - -should_populate_source({remote, Source}, DocCount) -> - should_populate_source(Source, DocCount); -should_populate_source(Source, DocCount) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(populate_db(Source, DocCount))}. - -should_replicate({remote, Source}, Target, UseCheckpoints) -> - should_replicate(db_url(Source), Target, UseCheckpoints); -should_replicate(Source, {remote, Target}, UseCheckpoints) -> - should_replicate(Source, db_url(Target), UseCheckpoints); -should_replicate(Source, Target, UseCheckpoints) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(replicate(Source, Target, UseCheckpoints))}. - -should_compare_databases({remote, Source}, Target) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, {remote, Target}) -> - should_compare_databases(Source, Target); -should_compare_databases(Source, Target) -> - {timeout, ?TIMEOUT_EUNIT, ?_test(compare_dbs(Source, Target))}. +setup() -> + Source = couch_replicator_test_helper:create_db(), + Target = couch_replicator_test_helper:create_db(), + {Source, Target}. -populate_db(DbName, DocCount) -> - {ok, Db} = couch_db:open_int(DbName, []), - Docs = lists:foldl( - fun(DocIdCounter, Acc) -> - Id = ?io2b(["doc", ?i2l(DocIdCounter)]), - Value = ?io2b(["val", ?i2l(DocIdCounter)]), - Doc = #doc{ - id = Id, - body = {[ {<<"value">>, Value} ]} - }, - [Doc | Acc] - end, - [], lists:seq(1, DocCount)), - {ok, _} = couch_db:update_docs(Db, Docs, []), - ok = couch_db:close(Db). - -compare_dbs(Source, Target) -> - {ok, SourceDb} = couch_db:open_int(Source, []), - {ok, TargetDb} = couch_db:open_int(Target, []), - Fun = fun(FullDocInfo, Acc) -> - {ok, Doc} = couch_db:open_doc(SourceDb, FullDocInfo), - {Props} = DocJson = couch_doc:to_json_obj(Doc, [attachments]), - DocId = couch_util:get_value(<<"_id">>, Props), - DocTarget = case couch_db:open_doc(TargetDb, DocId) of - {ok, DocT} -> - DocT; - Error -> - erlang:error( - {assertion_failed, - [{module, ?MODULE}, {line, ?LINE}, - {reason, lists:concat(["Error opening document '", - ?b2l(DocId), "' from target: ", - couch_util:to_list(Error)])}]}) - end, - DocTargetJson = couch_doc:to_json_obj(DocTarget, [attachments]), - ?assertEqual(DocJson, DocTargetJson), - {ok, Acc} - end, - {ok, _} = couch_db:fold_docs(SourceDb, Fun, [], []), - ok = couch_db:close(SourceDb), - ok = couch_db:close(TargetDb). - -replicate(Source, Target, UseCheckpoints) -> - replicate({[ - {<<"source">>, Source}, - {<<"target">>, Target}, - {<<"use_checkpoints">>, UseCheckpoints} - ]}). +teardown({Source, Target}) -> + couch_replicator_test_helper:delete_db(Source), + couch_replicator_test_helper:delete_db(Target). + +t_replicate_with_checkpoints({Source, Target}) -> + populate_db(Source, ?DOCS_COUNT), + Res = couch_replicator_test_helper:replicate(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"use_checkpoints">> => true + }), + ?assertMatch({ok, _}, Res), + + {ok, History} = Res, + ?assertMatch(#{<<"history">> := _, <<"session_id">> := _}, History), + + Checkpoints = maps:get(<<"history">>, History), + SessionId = maps:get(<<"session_id">>, History), + ?assert(is_binary(SessionId)), + ?assert(is_list(Checkpoints)), + ?assert(length(Checkpoints) >= 1), + + couch_replicator_test_helper:compare_dbs(Source, Target). + + +t_replicate_without_checkpoints({Source, Target}) -> + populate_db(Source, ?DOCS_COUNT), + Res = couch_replicator_test_helper:replicate(#{ + <<"source">> => Source, + <<"target">> => Target, + <<"use_checkpoints">> => false + }), + ?assertEqual({ok, #{<<"use_checkpoints">> => false}}, Res), + couch_replicator_test_helper:compare_dbs(Source, Target). + + +populate_db(DbName, DocCount) -> + Docs = lists:foldl(fun(DocIdCounter, Acc) -> + Id = ?io2b(["doc", ?i2l(DocIdCounter)]), + Value = ?io2b(["val", ?i2l(DocIdCounter)]), + Doc = #doc{ + id = Id, + body = {[{<<"value">>, Value}]} + }, + [Doc | Acc] + end, [], lists:seq(1, DocCount)), + couch_replicator_test_helper:create_docs(DbName, Docs). diff --git a/src/couch_stats/src/couch_stats_aggregator.erl b/src/couch_stats/src/couch_stats_aggregator.erl index 0416636c9..8d8cdf7e5 100644 --- a/src/couch_stats/src/couch_stats_aggregator.erl +++ b/src/couch_stats/src/couch_stats_aggregator.erl @@ -27,7 +27,8 @@ handle_cast/2, handle_info/2, code_change/3, - terminate/2 + terminate/2, + format_status/2 ]). @@ -88,6 +89,20 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, State]) -> + #st{ + descriptions=Descs, + stats=Stats, + collect_timer=CollectT, + reload_timer=ReloadT + } = State, + [{data, [{"State", [ + {descriptions, {set_size, sets:size(Descs)}}, + {stats, {length, length(Stats)}}, + {collect_timer,CollectT}, + {reload_timer,ReloadT} + ]}]}]. + comparison_set(Metrics) -> sets:from_list( [{Name, proplists:get_value(type, Props)} || {Name, Props} <- Metrics] diff --git a/src/couch_views/.gitignore b/src/couch_views/.gitignore new file mode 100644 index 000000000..f1c455451 --- /dev/null +++ b/src/couch_views/.gitignore @@ -0,0 +1,19 @@ +.rebar3 +_* +.eunit +*.o +*.beam +*.plt +*.swp +*.swo +.erlang.cookie +ebin +log +erl_crash.dump +.rebar +logs +_build +.idea +*.iml +rebar3.crashdump +*~ diff --git a/src/couch_views/README.md b/src/couch_views/README.md new file mode 100644 index 000000000..181f48ed2 --- /dev/null +++ b/src/couch_views/README.md @@ -0,0 +1,35 @@ +CouchDB Views +===== + +This is the new application that builds and runs Map/reduce views against FoundationDB. +Currently only map indexes are supported and it will always return the full index. + +Code layout: + +* `couch_views` - Main entry point to query a view +* `couch_views_batch` - Dynamically determine optimal batch sizes for view indexers. +* `couch_views_batch_impl` - Default implementation for optimizing batch sizes. +* `couch_views_encoding` - Encodes view keys that are byte comparable following CouchDB view sort order. +* `couch_views_fdb` - Maps view operations to FoundationDB logic. +* `couch_views_http` - View specific helpers for chttpd +* `couch_views_indexer` - `couch_jobs` worker that builds an index from the changes feed. +* `couch_views_reader` - Reads from the index for queries +* `couch_vews_jobs` - `couch_views` interactions with `couch_jobs`. It handles adding index jobs and subscribes to jobs. +* `couch_views_server` - Spawns `couch_views_indexer` workers to handle index update jobs. +* `couch_views_updater` - Update interactive indexes during doc update transactions +* `couch_views_util` - Various utility functions + +# Configuration + +; Batch size sensing parameters +; batch_initial_size = 100 ; Initial batch size in number of documents +; batch_search_increment = 500 ; Size change when searching for the threshold +; batch_sense_increment = 100 ; Size change increment after hitting a threshold +; batch_max_tx_size_bytes = 9000000 ; Maximum transaction size in bytes +; batch_max_tx_time_msec = 4500 ; Maximum transaction time in milliseconds +; batch_thresold_penalty = 0.2 ; Amount to reduce batch size when crossing a threshold + +The default batch size sensing parameters are fairly straight forward. These +values can be tweaked in the config if desired. If you find that you need to +tweak these values for any reason please open an issue on GitHub reporting your +experience in case we need to adjust them for common cases. diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl new file mode 100644 index 000000000..92b8f46fb --- /dev/null +++ b/src/couch_views/include/couch_views.hrl @@ -0,0 +1,42 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% Current implementation version +-define(CURRENT_VIEW_IMPL_VERSION, 1). + +% Index info/data subspaces +-define(VIEW_INFO, 0). +-define(VIEW_DATA, 1). +-define(VIEW_TREES, 3). + +% Index info keys +-define(VIEW_UPDATE_SEQ, 0). +-define(VIEW_ROW_COUNT, 1). +-define(VIEW_KV_SIZE, 2). +-define(VIEW_BUILD_STATUS, 3). +-define(VIEW_CREATION_VS, 4). +-define(VIEW_IMPL_VERSION, 5). + +% Data keys +-define(VIEW_ID_RANGE, 0). +-define(VIEW_MAP_RANGE, 1). + +% Tree keys +-define(VIEW_ID_TREE, 0). +-define(VIEW_ROW_TREES, 1). + +% jobs api +-define(INDEX_JOB_TYPE, <<"views">>). + +% indexing progress +-define(INDEX_BUILDING, <<"building">>). +-define(INDEX_READY, <<"ready">>). diff --git a/src/couch_views/rebar.config b/src/couch_views/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/couch_views/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/couch_views/src/couch_views.app.src b/src/couch_views/src/couch_views.app.src new file mode 100644 index 000000000..985c503cd --- /dev/null +++ b/src/couch_views/src/couch_views.app.src @@ -0,0 +1,33 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_views, [ + {description, "CouchDB Views on FDB"}, + {vsn, git}, + {mod, {couch_views_app, []}}, + {registered, [ + couch_views_sup, + couch_views_server + ]}, + {applications, [ + kernel, + stdlib, + erlfdb, + couch_epi, + couch_log, + config, + couch_stats, + fabric, + couch_jobs, + couch_eval + ]} +]}. diff --git a/src/couch_views/src/couch_views.erl b/src/couch_views/src/couch_views.erl new file mode 100644 index 000000000..2d916314f --- /dev/null +++ b/src/couch_views/src/couch_views.erl @@ -0,0 +1,231 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views). + + +-behavior(fabric2_index). + + +-export([ + query/6, + + % fabric2_index behavior + build_indices/2, + cleanup_indices/2, + get_info/2 +]). + +-include("couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +query(Db, DDoc, ViewName, Callback, Acc0, Args0) -> + case fabric2_db:is_users_db(Db) of + true -> + fabric2_users_db:after_doc_read(DDoc, Db); + false -> + ok + end, + + DbName = fabric2_db:name(Db), + IsInteractive = couch_views_ddoc:is_interactive(DDoc), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + + #mrst{ + views = Views + } = Mrst, + + Args1 = to_mrargs(Args0), + Args2 = couch_mrview_util:set_view_type(Args1, ViewName, Views), + Args3 = couch_mrview_util:validate_args(Args2), + ok = check_range(Mrst, ViewName, Args3), + + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + ok = maybe_update_view(TxDb, Mrst, IsInteractive, Args3), + read_view(TxDb, Mrst, ViewName, Callback, Acc0, Args3) + end) + catch throw:{build_view, WaitSeq} -> + couch_views_jobs:build_view(Db, Mrst, WaitSeq), + read_view(Db, Mrst, ViewName, Callback, Acc0, Args3) + end. + + +build_indices(#{} = Db, DDocs) when is_list(DDocs) -> + DbName = fabric2_db:name(Db), + lists:filtermap(fun(DDoc) -> + try couch_views_util:ddoc_to_mrst(DbName, DDoc) of + {ok, #mrst{} = Mrst} -> + {true, couch_views_jobs:build_view_async(Db, Mrst)} + catch _:_ -> + false + end + end, DDocs). + + +cleanup_indices(#{} = Db, DDocs) when is_list(DDocs) -> + DbName = fabric2_db:name(Db), + ActiveSigs = lists:filtermap(fun(DDoc) -> + try couch_views_util:ddoc_to_mrst(DbName, DDoc) of + {ok, #mrst{sig = Sig}} -> + {true, Sig} + catch _:_ -> + false + end + end, DDocs), + ExistingSigs = couch_views_fdb:list_signatures(Db), + StaleSigs = ExistingSigs -- ActiveSigs, + lists:foreach(fun(Sig) -> + couch_views_jobs:remove(Db, Sig), + couch_views_fdb:clear_index(Db, Sig) + end, StaleSigs). + + +get_info(Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + Sig = fabric2_util:to_hex(Mrst#mrst.sig), + {UpdateSeq, DataSize, Status} = fabric2_fdb:transactional(Db, fun(TxDb) -> + Mrst1 = couch_views_trees:open(TxDb, Mrst), + Seq = couch_views_fdb:get_update_seq(TxDb, Mrst1), + DataSize = get_total_view_size(TxDb, Mrst1), + JobStatus = case couch_views_jobs:job_state(TxDb, Mrst1) of + {ok, pending} -> true; + {ok, running} -> true; + {ok, finished} -> false; + {error, not_found} -> false + end, + {Seq, DataSize, JobStatus} + end), + UpdateOptions = get_update_options(Mrst), + {ok, [ + {language, Mrst#mrst.language}, + {signature, Sig}, + {sizes, {[ + {active, DataSize} + ]}}, + {update_seq, UpdateSeq}, + {updater_running, Status}, + {update_options, UpdateOptions} + ]}. + + +get_total_view_size(TxDb, Mrst) -> + lists:foldl(fun(View, Total) -> + Total + couch_views_trees:get_kv_size(TxDb, View) + end, 0, Mrst#mrst.views). + + +read_view(Db, Mrst, ViewName, Callback, Acc0, Args) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + couch_views_reader:read(TxDb, Mrst, ViewName, Callback, Acc0, Args) + after + UpdateAfter = Args#mrargs.update == lazy, + if UpdateAfter == false -> ok; true -> + couch_views_jobs:build_view_async(TxDb, Mrst) + end + end + end). + + +maybe_update_view(_Db, _Mrst, _, #mrargs{update = false}) -> + ok; + +maybe_update_view(_Db, _Mrst, _, #mrargs{update = lazy}) -> + ok; + +maybe_update_view(TxDb, Mrst, true, _Args) -> + BuildState = couch_views_fdb:get_build_status(TxDb, Mrst), + if BuildState == ?INDEX_READY -> ok; true -> + VS = couch_views_fdb:get_creation_vs(TxDb, Mrst), + throw({build_view, fabric2_fdb:vs_to_seq(VS)}) + end; + +maybe_update_view(TxDb, Mrst, false, _Args) -> + DbSeq = fabric2_db:get_update_seq(TxDb), + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + case DbSeq == ViewSeq of + true -> ok; + false -> throw({build_view, DbSeq}) + end. + + +to_mrargs(#mrargs{} = Args) -> + Args; + +to_mrargs(#{} = Args) -> + Fields = record_info(fields, mrargs), + Indexes = lists:seq(2, record_info(size, mrargs)), + LU = lists:zip(Fields, Indexes), + + maps:fold(fun(Key, Value, Acc) -> + Index = fabric2_util:get_value(couch_util:to_existing_atom(Key), LU), + setelement(Index, Acc, Value) + end, #mrargs{}, Args). + + +check_range(Mrst, ViewName, Args) -> + #mrst{ + language = Lang, + views = Views + } = Mrst, + View = case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of + {map, V, _} -> V; + {red, {_, _, V}, _} -> V + end, + Cmp = couch_views_util:collate_fun(View), + check_range(Args, Cmp). + + +check_range(#mrargs{start_key = undefined}, _Cmp) -> + ok; + +check_range(#mrargs{end_key = undefined}, _Cmp) -> + ok; + +check_range(#mrargs{start_key = K, end_key = K}, _Cmp) -> + ok; + +check_range(Args, Cmp) -> + #mrargs{ + direction = Dir, + start_key = SK, + start_key_docid = SKD, + end_key = EK, + end_key_docid = EKD + } = Args, + + case {Dir, Cmp({SK, SKD}, {EK, EKD})} of + {fwd, gt} -> + throw(check_range_error(<<"true">>)); + {rev, lt} -> + throw(check_range_error(<<"false">>)); + _ -> + ok + end. + + +check_range_error(Descending) -> + {query_parse_error, + <<"No rows can match your key range, reverse your ", + "start_key and end_key or set descending=", + Descending/binary>>}. + + +get_update_options(#mrst{design_opts = Opts}) -> + IncDesign = couch_util:get_value(<<"include_design">>, Opts, false), + LocalSeq = couch_util:get_value(<<"local_seq">>, Opts, false), + UpdateOptions = if IncDesign -> [include_design]; true -> [] end + ++ if LocalSeq -> [local_seq]; true -> [] end, + [atom_to_binary(O, latin1) || O <- UpdateOptions]. diff --git a/src/couch_views/src/couch_views_app.erl b/src/couch_views/src/couch_views_app.erl new file mode 100644 index 000000000..7337d0580 --- /dev/null +++ b/src/couch_views/src/couch_views_app.erl @@ -0,0 +1,31 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_app). + + +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_StartType, _StartArgs) -> + couch_views_sup:start_link(). + + +stop(_State) -> + ok. diff --git a/src/couch_views/src/couch_views_batch.erl b/src/couch_views/src/couch_views_batch.erl new file mode 100644 index 000000000..ba2a22782 --- /dev/null +++ b/src/couch_views/src/couch_views_batch.erl @@ -0,0 +1,86 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_batch). + + +-export([ + start/1, + success/2, + failure/1 +]). + + +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +-type update_stats() :: #{ + docs_read => non_neg_integer(), + tx_size => non_neg_integer(), + total_kvs => non_neg_integer() +}. + +-export_type([update_stats/0]). + +-callback start( + Mrst::#mrst{}, + State::term() + ) -> {NewState::term(), BatchSize::pos_integer()}. + +-callback success( + Mrst::#mrst{}, + UpdateStats::update_stats(), + State::term() + ) -> NewState::term(). + +-callback failure(Mrst::#mrst{}, State::term()) -> NewState::term(). + + +-define(DEFAULT_MOD, "couch_views_batch_impl"). + + +-spec start(#mrst{}) -> pos_integer(). +start(#mrst{} = Mrst) -> + {Mod, State} = case load_state() of + {M, S} -> + {M, S}; + undefined -> + ModStr = config:get("couch_views", "batch_module", ?DEFAULT_MOD), + ModAtom = list_to_existing_atom(ModStr), + {ModAtom, undefined} + end, + {NewState, BatchSize} = Mod:start(Mrst, State), + save_state(Mod, NewState), + BatchSize. + + +-spec success(#mrst{}, UpdateStats::update_stats()) -> ok. +success(#mrst{} = Mrst, UpdateStats) -> + {Mod, State} = load_state(), + NewState = Mod:success(Mrst, UpdateStats, State), + save_state(Mod, NewState), + ok. + + +-spec failure(#mrst{}) -> ok. +failure(#mrst{} = Mrst) -> + {Mod, State} = load_state(), + NewState = Mod:failure(Mrst, State), + save_state(Mod, NewState), + ok. + + +load_state() -> + get(?MODULE). + + +save_state(Mod, State) -> + put(?MODULE, {Mod, State}). diff --git a/src/couch_views/src/couch_views_batch_impl.erl b/src/couch_views/src/couch_views_batch_impl.erl new file mode 100644 index 000000000..d315a3bf6 --- /dev/null +++ b/src/couch_views/src/couch_views_batch_impl.erl @@ -0,0 +1,248 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_batch_impl). + +-behavior(couch_views_batch). + + +-export([ + start/2, + success/3, + failure/2 +]). + + +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +-record(batch_st, { + start_time, + state, + size, + search_incr, + sense_incr, + max_tx_size_bytes, + max_tx_time_msec, + threshold_penalty +}). + + +-spec start( + Mrst::#mrst{}, + State::term() + ) -> {NewState::term(), BatchSize::pos_integer()}. +start(Mrst, undefined) -> + St = #batch_st{ + state = search, + size = get_config(batch_initial_size, "100"), + search_incr = get_config(batch_search_increment, "500"), + sense_incr = get_config(batch_sense_increment, "100"), + max_tx_size_bytes = get_config(batch_max_tx_size_bytes, "9000000"), + max_tx_time_msec = get_config(batch_max_tx_time_msec, "4500"), + threshold_penalty = get_config( + batch_threshold_penalty, + "0.2", + fun float_0_to_1/2 + ) + }, + start(Mrst, St); + +start(_Mrst, #batch_st{size = Size} = St) -> + NewSt = St#batch_st{ + start_time = erlang:monotonic_time() + }, + {NewSt, Size}. + + +-spec success( + Mrst::#mrst{}, + UpdateStats::couch_views_batch:update_stats(), + State::term() + ) -> NewState::term(). +success(_Mrst, #{tx_size := TxSize}, #batch_st{} = St) -> + #batch_st{ + start_time = StartTime, + size = Size, + state = State, + search_incr = SearchIncr, + sense_incr = SenseIncr, + max_tx_size_bytes = MaxTxSize, + max_tx_time_msec = MaxTxTime, + threshold_penalty = ThresholdPenalty + } = St, + + TxTimeNative = erlang:monotonic_time() - StartTime, + TxTime = erlang:convert_time_unit(TxTimeNative, native, millisecond), + + {NewSize, NewState} = case TxSize > MaxTxSize orelse TxTime > MaxTxTime of + true -> + {round(Size * (1.0 - ThresholdPenalty)), sense}; + false when State == search -> + {Size + SearchIncr, State}; + false when State == sense -> + {Size + SenseIncr, State} + end, + + St#batch_st{ + size = erlang:max(1, NewSize), + state = NewState + }. + + +-spec failure(Mrst::#mrst{}, State::term()) -> NewState::term(). +failure(_Mrst, #batch_st{} = St) -> + St#batch_st{ + size = erlang:max(1, St#batch_st.size div 2), + state = sense + }. + + +get_config(Key, Default) -> + get_config(Key, Default, fun non_neg_integer/2). + + +get_config(Key, Default, Validator) -> + StrVal = config:get("couch_views", atom_to_list(Key), Default), + Validator(Key, StrVal). + + +non_neg_integer(Name, Str) -> + try + Val = list_to_integer(Str), + true = Val > 0, + Val + catch _:_ -> + erlang:error({invalid_non_neg_integer, {couch_views, Name, Str}}) + end. + + +float_0_to_1(Name, Str) -> + Val = try + list_to_float(Str) + catch error:badarg -> + erlang:error({invalid_float, {couch_views, Name, Str}}) + end, + if Val >= 0.0 andalso Val =< 1.0 -> Val; true -> + erlang:error({float_out_of_range, {couch_views, Name, Str}}) + end. + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + + +good_config_test() -> + with_good_config(fun() -> + {St, 1} = start(#mrst{}, undefined), + ?assertMatch( + #batch_st{ + state = search, + size = 1, + search_incr = 2, + sense_incr = 3, + max_tx_size_bytes = 4, + max_tx_time_msec = 5, + threshold_penalty = 0.6 + }, + St + ) + end). + + +bad_config_test() -> + Fields = [ + {batch_initial_size, invalid_non_neg_integer}, + {batch_search_increment, invalid_non_neg_integer}, + {batch_sense_increment, invalid_non_neg_integer}, + {batch_max_tx_size_bytes, invalid_non_neg_integer}, + {batch_max_tx_time_msec, invalid_non_neg_integer}, + {batch_threshold_penalty, invalid_float} + ], + lists:foreach(fun({Field, Error}) -> + with_bad_config(atom_to_list(Field), fun() -> + ?assertError( + {Error, {couch_views, Field, _}}, + start(#mrst{}, undefined) + ) + end) + end, Fields). + + +float_range_test() -> + with_bad_float_config("batch_threshold_penalty", fun() -> + lists:foreach(fun(_) -> + ?assertError( + {float_out_of_range, {couch_views, batch_threshold_penalty, _}}, + start(#mrst{}, undefined) + ) + end, lists:seq(1, 10)) + end). + + +with_good_config(Fun) -> + meck:new(config), + meck:expect(config, get, fun + ("couch_views", "batch_initial_size", _) -> "1"; + ("couch_views", "batch_search_increment", _) -> "2"; + ("couch_views", "batch_sense_increment", _) -> "3"; + ("couch_views", "batch_max_tx_size_bytes", _) -> "4"; + ("couch_views", "batch_max_tx_time_msec", _) -> "5"; + ("couch_views", "batch_threshold_penalty", _) -> "0.6" + end), + try + Fun() + after + meck:unload() + end. + + +with_bad_config(FieldName, Fun) -> + meck:new(config), + meck:expect(config, get, fun("couch_views", Field, Default) -> + case Field == FieldName of + true -> + case rand:uniform() < 0.5 of + true -> "foo"; + false -> -10 + end; + false -> + Default + end + end), + try + Fun() + after + meck:unload() + end. + + +with_bad_float_config(FieldName, Fun) -> + meck:new(config), + meck:expect(config, get, fun("couch_views", Field, Default) -> + case Field == FieldName of + true -> + case rand:uniform() < 0.5 of + true -> "100.0"; + false -> "-0.5" + end; + false -> + Default + end + end), + try + Fun() + after + meck:unload() + end. + +-endif. diff --git a/src/couch_views/src/couch_views_ddoc.erl b/src/couch_views/src/couch_views_ddoc.erl new file mode 100644 index 000000000..fae4a3433 --- /dev/null +++ b/src/couch_views/src/couch_views_ddoc.erl @@ -0,0 +1,42 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_views_ddoc). + + +-export([ + get_interactive_list/1, + get_mango_list/1, + is_interactive/1 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +% TODO: build a ddoc cache that checks the md_version +get_interactive_list(Db) -> + DDocs = fabric2_db:get_design_docs(Db), + lists:filter(fun is_interactive/1, DDocs). + + +get_mango_list(Db) -> + DDocs = fabric2_db:get_design_docs(Db), + lists:filter(fun (DDoc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), + fabric2_util:get_value(<<"language">>, Props) == <<"query">> + end, DDocs). + + +is_interactive(#doc{} = DDoc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), + {Opts} = fabric2_util:get_value(<<"options">>, Props, {[]}), + fabric2_util:get_value(<<"interactive">>, Opts, false). diff --git a/src/couch_views/src/couch_views_encoding.erl b/src/couch_views/src/couch_views_encoding.erl new file mode 100644 index 000000000..2f69db306 --- /dev/null +++ b/src/couch_views/src/couch_views_encoding.erl @@ -0,0 +1,117 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_encoding). + + +-export([ + max/0, + encode/1, + encode/2, + decode/1 +]). + + +-define(NULL, 0). +-define(FALSE, 1). +-define(TRUE, 2). +-define(NUMBER, 3). +-define(STRING, 4). +-define(LIST, 5). +-define(OBJECT, 6). +-define(MAX, 255). + + +max() -> + max_encoding_value. + + +encode(X) -> + encode(X, value). + + +encode(X, Type) when Type == key; Type == value -> + erlfdb_tuple:pack(encode_int(X, Type)). + + +decode(Encoded) -> + Val = erlfdb_tuple:unpack(Encoded), + decode_int(Val). + + +encode_int(null, _Type) -> + {?NULL}; + +encode_int(false, _Type) -> + {?FALSE}; + +encode_int(true, _Type) -> + {?TRUE}; + +encode_int(max_encoding_value, _Type) -> + {?MAX}; + +encode_int(Num, key) when is_number(Num) -> + {?NUMBER, float(Num)}; + +encode_int(Num, value) when is_number(Num) -> + {?NUMBER, Num}; + +encode_int(Bin, key) when is_binary(Bin) -> + {?STRING, couch_util:get_sort_key(Bin)}; + +encode_int(Bin, value) when is_binary(Bin) -> + {?STRING, Bin}; + +encode_int(List, Type) when is_list(List) -> + Encoded = lists:map(fun(Item) -> + encode_int(Item, Type) + end, List), + {?LIST, list_to_tuple(Encoded)}; + +encode_int({Props}, Type) when is_list(Props) -> + Encoded = lists:map(fun({K, V}) -> + EK = encode_int(K, Type), + EV = encode_int(V, Type), + {EK, EV} + end, Props), + {?OBJECT, list_to_tuple(Encoded)}. + + +decode_int({?NULL}) -> + null; + +decode_int({?FALSE}) -> + false; + +decode_int({?TRUE}) -> + true; + +decode_int({?MAX}) -> + max_encoding_value; + +decode_int({?STRING, Bin}) -> + Bin; + +decode_int({?NUMBER, Num}) -> + Num; + +decode_int({?LIST, List}) -> + lists:map(fun decode_int/1, tuple_to_list(List)); + +decode_int({?OBJECT, Object}) -> + Props = lists:map(fun({EK, EV}) -> + K = decode_int(EK), + V = decode_int(EV), + {K, V} + end, tuple_to_list(Object)), + {Props}. diff --git a/src/couch_views/src/couch_views_epi.erl b/src/couch_views/src/couch_views_epi.erl new file mode 100644 index 000000000..127b09f13 --- /dev/null +++ b/src/couch_views/src/couch_views_epi.erl @@ -0,0 +1,60 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_epi). + + +-behaviour(couch_epi_plugin). + + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + + +app() -> + couch_views. + + +providers() -> + [ + {fabric2_db, couch_views_fabric2_plugin} + ]. + + +services() -> + [ + {couch_views, couch_views_plugin} + ]. + + +data_subscriptions() -> + []. + + +data_providers() -> + []. + + +processes() -> + []. + + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/couch_views/src/couch_views_fabric2_plugin.erl b/src/couch_views/src/couch_views_fabric2_plugin.erl new file mode 100644 index 000000000..cae0e1f75 --- /dev/null +++ b/src/couch_views/src/couch_views_fabric2_plugin.erl @@ -0,0 +1,24 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_fabric2_plugin). + + +-export([ + after_doc_write/6 +]). + + +after_doc_write(Db, Doc, NewWinner, OldWinner, NewRevId, Seq)-> + couch_views_updater:index(Db, Doc, NewWinner, OldWinner, NewRevId, Seq), + [Db, Doc, NewWinner, OldWinner, NewRevId, Seq]. diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl new file mode 100644 index 000000000..b0fb82e85 --- /dev/null +++ b/src/couch_views/src/couch_views_fdb.erl @@ -0,0 +1,331 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_fdb). + +-export([ + get_view_state/2, + + new_interactive_index/3, + new_creation_vs/3, + get_creation_vs/2, + get_build_status/2, + set_build_status/3, + + get_update_seq/2, + set_update_seq/3, + + list_signatures/1, + clear_index/2, + + persist_chunks/3, + update_kv_size/4 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + + +-include("couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +get_view_state(Db, #mrst{} = Mrst) -> + get_view_state(Db, Mrst#mrst.sig); + +get_view_state(Db, Sig) when is_binary(Sig) -> + #{ + tx := Tx + } = Db, + + VersionF = erlfdb:get(Tx, version_key(Db, Sig)), + ViewSeqF = erlfdb:get(Tx, seq_key(Db, Sig)), + ViewVSF = erlfdb:get(Tx, creation_vs_key(Db, Sig)), + BuildStatusF = erlfdb:get(Tx, build_status_key(Db, Sig)), + + Version = case erlfdb:wait(VersionF) of + not_found -> not_found; + VsnVal -> element(1, erlfdb_tuple:unpack(VsnVal)) + end, + + ViewSeq = case erlfdb:wait(ViewSeqF) of + not_found -> <<>>; + SeqVal -> SeqVal + end, + + ViewVS = case erlfdb:wait(ViewVSF) of + not_found -> not_found; + VSVal -> element(1, erlfdb_tuple:unpack(VSVal)) + end, + + State = #{ + version => Version, + view_seq => ViewSeq, + view_vs => ViewVS, + build_status => erlfdb:wait(BuildStatusF) + }, + + maybe_upgrade_view(Db, Sig, State). + + +new_interactive_index(Db, #mrst{} = Mrst, VS) -> + new_interactive_index(Db, Mrst#mrst.sig, VS); + +new_interactive_index(Db, Sig, VS) -> + set_version(Db, Sig), + new_creation_vs(Db, Sig, VS), + set_build_status(Db, Sig, ?INDEX_BUILDING). + + +%Interactive View Creation Versionstamp +%(<db>, ?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig) = VS + +new_creation_vs(TxDb, #mrst{} = Mrst, VS) -> + new_creation_vs(TxDb, Mrst#mrst.sig, VS); + +new_creation_vs(TxDb, Sig, VS) -> + #{ + tx := Tx + } = TxDb, + Key = creation_vs_key(TxDb, Sig), + Value = erlfdb_tuple:pack_vs({VS}), + ok = erlfdb:set_versionstamped_value(Tx, Key, Value). + + +get_creation_vs(TxDb, MrstOrSig) -> + #{ + view_vs := ViewVS + } = get_view_state(TxDb, MrstOrSig), + ViewVS. + + +%Interactive View Build Status +%(<db>, ?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig) = INDEX_BUILDING | INDEX_READY + +get_build_status(TxDb, MrstOrSig) -> + #{ + build_status := BuildStatus + } = get_view_state(TxDb, MrstOrSig), + BuildStatus. + + +set_build_status(TxDb, #mrst{} = Mrst, State) -> + set_build_status(TxDb, Mrst#mrst.sig, State); + +set_build_status(TxDb, Sig, State) -> + #{ + tx := Tx + } = TxDb, + + Key = build_status_key(TxDb, Sig), + ok = erlfdb:set(Tx, Key, State). + + +% View Build Sequence Access +% (<db>, ?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ) = Sequence + + +get_update_seq(TxDb, MrstOrSig) -> + #{ + view_seq := ViewSeq + } = get_view_state(TxDb, MrstOrSig), + ViewSeq. + + +set_update_seq(TxDb, Sig, Seq) -> + #{ + tx := Tx + } = TxDb, + ok = erlfdb:set(Tx, seq_key(TxDb, Sig), Seq). + + +list_signatures(Db) -> + #{ + db_prefix := DbPrefix + } = Db, + ViewSeqRange = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ}, + RangePrefix = erlfdb_tuple:pack(ViewSeqRange, DbPrefix), + fabric2_fdb:fold_range(Db, RangePrefix, fun({Key, _Val}, Acc) -> + {Sig} = erlfdb_tuple:unpack(Key, RangePrefix), + [Sig | Acc] + end, [], []). + + +clear_index(Db, Signature) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + % Get view size to remove from global counter + SizeTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Signature}, + SizeKey = erlfdb_tuple:pack(SizeTuple, DbPrefix), + ViewSize = case erlfdb:wait(erlfdb:get(Tx, SizeKey)) of + not_found -> 0; + SizeVal -> ?bin2uint(SizeVal) + end, + + % Clear index info keys + Keys = [ + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Signature}, + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_ROW_COUNT, Signature}, + {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Signature} + ], + lists:foreach(fun(Key) -> + FDBKey = erlfdb_tuple:pack(Key, DbPrefix), + erlfdb:clear(Tx, FDBKey) + end, Keys), + + % Clear index data + DataTuple = {?DB_VIEWS, ?VIEW_DATA, Signature}, + DataPrefix = erlfdb_tuple:pack(DataTuple, DbPrefix), + erlfdb:clear_range_startswith(Tx, DataPrefix), + + % Clear tree data + TreeTuple = {?DB_VIEWS, ?VIEW_TREES, Signature}, + TreePrefix = erlfdb_tuple:pack(TreeTuple, DbPrefix), + erlfdb:clear_range_startswith(Tx, TreePrefix), + + % Decrement db wide view size counter + DbSizeTuple = {?DB_STATS, <<"sizes">>, <<"views">>}, + DbSizeKey = erlfdb_tuple:pack(DbSizeTuple, DbPrefix), + erlfdb:add(Tx, DbSizeKey, -ViewSize). + + +persist_chunks(Tx, set, [Key, Value]) -> + Chunks = fabric2_fdb:chunkify_binary(Value), + LastId = lists:foldl(fun(Chunk, Id) -> + ChunkKey = erlfdb_tuple:pack({Id}, Key), + erlfdb:set(Tx, ChunkKey, Chunk), + Id + 1 + end, 0, Chunks), + + % We update nodes in place, so its possible that + % a node shrank. This clears any keys that we haven't + % just overwritten for the provided key. + LastIdKey = erlfdb_tuple:pack({LastId}, Key), + EndRange = <<Key/binary, 16#FF>>, + erlfdb:clear_range(Tx, LastIdKey, EndRange); + +persist_chunks(Tx, get, Key) -> + Rows = erlfdb:get_range_startswith(Tx, Key), + Values = [V || {_K, V} <- Rows], + iolist_to_binary(Values); + +persist_chunks(Tx, clear, Key) -> + erlfdb:clear_range_startswith(Tx, Key). + + +update_kv_size(TxDb, Sig, OldSize, NewSize) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + ViewTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig}, + ViewKey = erlfdb_tuple:pack(ViewTuple, DbPrefix), + erlfdb:set(Tx, ViewKey, ?uint2bin(NewSize)), + + DbTuple = {?DB_STATS, <<"sizes">>, <<"views">>}, + DbKey = erlfdb_tuple:pack(DbTuple, DbPrefix), + erlfdb:add(Tx, DbKey, NewSize - OldSize). + + +maybe_upgrade_view(_Db, _Sig, #{version := ?CURRENT_VIEW_IMPL_VERSION} = St) -> + St; +maybe_upgrade_view(Db, Sig, #{version := not_found, view_seq := <<>>} = St) -> + % If we haven't started building the view yet + % then we don't change view_vs and build_status + % as they're still correct. + set_version(Db, Sig), + St#{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>> + }; +maybe_upgrade_view(Db, Sig, #{version := not_found} = St) -> + clear_index(Db, Sig), + set_version(Db, Sig), + {ViewVS, BuildStatus} = reset_interactive_index(Db, Sig, St), + #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => ViewVS, + build_status => BuildStatus + }. + + +set_version(Db, Sig) -> + #{ + tx := Tx + } = Db, + Key = version_key(Db, Sig), + Val = erlfdb_tuple:pack({?CURRENT_VIEW_IMPL_VERSION}), + erlfdb:set(Tx, Key, Val). + + +reset_interactive_index(_Db, _Sig, #{view_vs := not_found}) -> + % Not an interactive index + {not_found, not_found}; +reset_interactive_index(Db, Sig, _St) -> + % We have to reset the creation versionstamp + % to the current update seq of the database + % or else we'll not have indexed any documents + % inserted since the creation of the interactive + % index. + #{ + tx := Tx + } = Db, + + DbSeq = fabric2_db:get_update_seq(Db), + VS = fabric2_fdb:seq_to_vs(DbSeq), + Key = creation_vs_key(Db, Sig), + Val = erlfdb_tuple:pack({VS}), + ok = erlfdb:set(Tx, Key, Val), + + set_build_status(Db, Sig, ?INDEX_BUILDING), + + {VS, ?INDEX_BUILDING}. + + +version_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_IMPL_VERSION, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). + + +seq_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). + + +creation_vs_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). + + +build_status_key(Db, Sig) -> + #{ + db_prefix := DbPrefix + } = Db, + Key = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig}, + erlfdb_tuple:pack(Key, DbPrefix). diff --git a/src/couch_views/src/couch_views_http.erl b/src/couch_views/src/couch_views_http.erl new file mode 100644 index 000000000..e21acfb9f --- /dev/null +++ b/src/couch_views/src/couch_views_http.erl @@ -0,0 +1,359 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_http). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +-export([ + parse_body_and_query/2, + parse_body_and_query/3, + parse_params/2, + parse_params/4, + row_to_obj/1, + row_to_obj/2, + view_cb/2, + paginated/5, + paginated/6 +]). + +-define(BOOKMARK_VSN, 1). + +parse_body_and_query(#httpd{method='POST'} = Req, Keys) -> + Props = chttpd:json_body_obj(Req), + parse_body_and_query(Req, Props, Keys); + +parse_body_and_query(Req, Keys) -> + parse_params(chttpd:qs(Req), Keys, #mrargs{keys=Keys, group=undefined, + group_level=undefined}, [keep_group_level]). + +parse_body_and_query(Req, {Props}, Keys) -> + Args = #mrargs{keys=Keys, group=undefined, group_level=undefined}, + BodyArgs = parse_params(Props, Keys, Args, [decoded]), + parse_params(chttpd:qs(Req), Keys, BodyArgs, [keep_group_level]). + +parse_params(#httpd{}=Req, Keys) -> + parse_params(chttpd:qs(Req), Keys); +parse_params(Props, Keys) -> + Args = #mrargs{}, + parse_params(Props, Keys, Args). + + +parse_params(Props, Keys, Args) -> + parse_params(Props, Keys, Args, []). + + +parse_params([{"bookmark", Bookmark}], _Keys, #mrargs{}, _Options) -> + bookmark_decode(Bookmark); + +parse_params(Props, Keys, #mrargs{}=Args, Options) -> + case couch_util:get_value("bookmark", Props, nil) of + nil -> + ok; + _ -> + throw({bad_request, "Cannot use `bookmark` with other options"}) + end, + couch_mrview_http:parse_params(Props, Keys, Args, Options). + + +row_to_obj(Row) -> + Id = couch_util:get_value(id, Row), + row_to_obj(Id, Row). + + +row_to_obj(Id, Row) -> + couch_mrview_http:row_to_obj(Id, Row). + + +view_cb(Msg, #vacc{paginated = false}=Acc) -> + couch_mrview_http:view_cb(Msg, Acc); +view_cb(Msg, #vacc{paginated = true}=Acc) -> + paginated_cb(Msg, Acc). + + +paginated_cb({row, Row}, #vacc{buffer=Buf}=Acc) -> + {ok, Acc#vacc{buffer = [row_to_obj(Row) | Buf]}}; + +paginated_cb({error, Reason}, #vacc{}=_Acc) -> + throw({error, Reason}); + +paginated_cb(complete, #vacc{buffer=Buf}=Acc) -> + {ok, Acc#vacc{buffer=lists:reverse(Buf)}}; + +paginated_cb({meta, Meta}, #vacc{}=VAcc) -> + MetaMap = lists:foldl(fun(MetaData, Acc) -> + case MetaData of + {_Key, undefined} -> + Acc; + {total, _Value} -> + %% We set total_rows elsewere + Acc; + {Key, Value} -> + maps:put(list_to_binary(atom_to_list(Key)), Value, Acc) + end + end, #{}, Meta), + {ok, VAcc#vacc{meta=MetaMap}}. + + +paginated(Req, EtagTerm, #mrargs{page_size = PageSize} = Args, KeyFun, Fun) -> + Etag = couch_httpd:make_etag(EtagTerm), + chttpd:etag_respond(Req, Etag, fun() -> + hd(do_paginated(PageSize, [Args], KeyFun, Fun)) + end). + + +paginated(Req, EtagTerm, PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> + Etag = couch_httpd:make_etag(EtagTerm), + chttpd:etag_respond(Req, Etag, fun() -> + Results = do_paginated(PageSize, QueriesArgs, KeyFun, Fun), + #{results => Results} + end). + + +do_paginated(PageSize, QueriesArgs, KeyFun, Fun) when is_list(QueriesArgs) -> + {_N, Results} = lists:foldl(fun(Args0, {Limit, Acc}) -> + case Limit > 0 of + true -> + {OriginalLimit, Args} = set_limit(Args0#mrargs{page_size = Limit}), + {Meta, Items} = Fun(Args), + Result0 = maybe_add_next_bookmark( + OriginalLimit, PageSize, Args, Meta, Items, KeyFun), + Result = maybe_add_previous_bookmark(Args, Result0, KeyFun), + #{total_rows := Total} = Result, + {Limit - Total, [Result | Acc]}; + false -> + Bookmark = bookmark_encode(Args0), + Result = #{ + rows => [], + next => Bookmark, + total_rows => 0 + }, + {Limit, [Result | Acc]} + end + end, {PageSize, []}, QueriesArgs), + lists:reverse(Results). + + +maybe_add_next_bookmark(OriginalLimit, PageSize, Args0, Response, Items, KeyFun) -> + #mrargs{ + page_size = RequestedLimit, + extra = Extra0 + } = Args0, + case check_completion(OriginalLimit, RequestedLimit, Items) of + {Rows, nil} -> + maps:merge(Response, #{ + rows => Rows, + total_rows => length(Rows) + }); + {Rows, Next} -> + {FirstId, FirstKey} = first_key(KeyFun, Rows), + {NextId, NextKey} = KeyFun(Next), + Extra1 = lists:keystore(fid, 1, Extra0, {fid, FirstId}), + Extra2 = lists:keystore(fk, 1, Extra1, {fk, FirstKey}), + Args = Args0#mrargs{ + page_size = PageSize, + start_key = NextKey, + start_key_docid = NextId, + extra = Extra2 + }, + Bookmark = bookmark_encode(Args), + maps:merge(Response, #{ + rows => Rows, + next => Bookmark, + total_rows => length(Rows) + }) + end. + + +maybe_add_previous_bookmark(#mrargs{extra = Extra} = Args, #{rows := Rows} = Result, KeyFun) -> + StartKey = couch_util:get_value(fk, Extra), + StartId = couch_util:get_value(fid, Extra), + case {{StartId, StartKey}, first_key(KeyFun, Rows)} of + {{undefined, undefined}, {_, _}} -> + Result; + {{_, _}, {undefined, undefined}} -> + Result; + {{StartId, _}, {StartId, _}} -> + Result; + {{undefined, StartKey}, {undefined, StartKey}} -> + Result; + {{StartId, StartKey}, {EndId, EndKey}} -> + Bookmark = bookmark_encode( + Args#mrargs{ + start_key = StartKey, + start_key_docid = StartId, + end_key = EndKey, + end_key_docid = EndId, + inclusive_end = false + } + ), + maps:put(previous, Bookmark, Result) + end. + + +first_key(_KeyFun, []) -> + {undefined, undefined}; + +first_key(KeyFun, [First | _]) -> + KeyFun(First). + + +set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) + when is_integer(PageSize) andalso Limit > PageSize -> + {Limit, Args#mrargs{limit = PageSize + 1}}; + +set_limit(#mrargs{page_size = PageSize, limit = Limit} = Args) + when is_integer(PageSize) -> + {Limit, Args#mrargs{limit = Limit + 1}}. + + +check_completion(OriginalLimit, RequestedLimit, Items) + when is_integer(OriginalLimit) andalso OriginalLimit =< RequestedLimit -> + {Rows, _} = split(OriginalLimit, Items), + {Rows, nil}; + +check_completion(_OriginalLimit, RequestedLimit, Items) -> + split(RequestedLimit, Items). + + +split(Limit, Items) when length(Items) > Limit -> + case lists:split(Limit, Items) of + {Head, [NextItem | _]} -> + {Head, NextItem}; + {Head, []} -> + {Head, nil} + end; + +split(_Limit, Items) -> + {Items, nil}. + + +bookmark_encode(Args0) -> + Defaults = #mrargs{}, + {RevTerms, Mask, _} = lists:foldl(fun(Value, {Acc, Mask, Idx}) -> + case element(Idx, Defaults) of + Value -> + {Acc, Mask, Idx + 1}; + _Default when Idx == #mrargs.bookmark -> + {Acc, Mask, Idx + 1}; + _Default -> + % Its `(Idx - 1)` because the initial `1` + % value already accounts for one bit. + {[Value | Acc], (1 bsl (Idx - 1)) bor Mask, Idx + 1} + end + end, {[], 0, 1}, tuple_to_list(Args0)), + Terms = lists:reverse(RevTerms), + TermBin = term_to_binary(Terms, [compressed, {minor_version, 2}]), + MaskBin = binary:encode_unsigned(Mask), + RawBookmark = <<?BOOKMARK_VSN, MaskBin/binary, TermBin/binary>>, + couch_util:encodeBase64Url(RawBookmark). + + +bookmark_decode(Bookmark) -> + try + RawBin = couch_util:decodeBase64Url(Bookmark), + <<?BOOKMARK_VSN, MaskBin:4/binary, TermBin/binary>> = RawBin, + Mask = binary:decode_unsigned(MaskBin), + Index = mask_to_index(Mask, 1, []), + Terms = binary_to_term(TermBin, [safe]), + lists:foldl(fun({Idx, Value}, Acc) -> + setelement(Idx, Acc, Value) + end, #mrargs{}, lists:zip(Index, Terms)) + catch _:_ -> + throw({bad_request, <<"Invalid bookmark">>}) + end. + + +mask_to_index(0, _Pos, Acc) -> + lists:reverse(Acc); +mask_to_index(Mask, Pos, Acc) when is_integer(Mask), Mask > 0 -> + NewAcc = case Mask band 1 of + 0 -> Acc; + 1 -> [Pos | Acc] + end, + mask_to_index(Mask bsr 1, Pos + 1, NewAcc). + + +-ifdef(TEST). + +-include_lib("eunit/include/eunit.hrl"). + +bookmark_encode_decode_test() -> + ?assertEqual( + #mrargs{page_size = 5}, + bookmark_decode(bookmark_encode(#mrargs{page_size = 5})) + ), + + Randomized = lists:foldl(fun(Idx, Acc) -> + if Idx == #mrargs.bookmark -> Acc; true -> + setelement(Idx, Acc, couch_uuids:random()) + end + end, #mrargs{}, lists:seq(1, record_info(size, mrargs))), + + ?assertEqual( + Randomized, + bookmark_decode(bookmark_encode(Randomized)) + ). + + +check_completion_test() -> + ?assertEqual( + {[], nil}, + check_completion(100, 1, []) + ), + ?assertEqual( + {[1], nil}, + check_completion(100, 1, [1]) + ), + ?assertEqual( + {[1], 2}, + check_completion(100, 1, [1, 2]) + ), + ?assertEqual( + {[1], 2}, + check_completion(100, 1, [1, 2, 3]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(100, 3, [1, 2]) + ), + ?assertEqual( + {[1, 2, 3], nil}, + check_completion(100, 3, [1, 2, 3]) + ), + ?assertEqual( + {[1, 2, 3], 4}, + check_completion(100, 3, [1, 2, 3, 4]) + ), + ?assertEqual( + {[1, 2, 3], 4}, + check_completion(100, 3, [1, 2, 3, 4, 5]) + ), + ?assertEqual( + {[1], nil}, + check_completion(1, 1, [1]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(2, 3, [1, 2]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(2, 3, [1, 2, 3]) + ), + ?assertEqual( + {[1, 2], nil}, + check_completion(2, 3, [1, 2, 3, 4, 5]) + ), + ok. +-endif.
\ No newline at end of file diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl new file mode 100644 index 000000000..2735f66b7 --- /dev/null +++ b/src/couch_views/src/couch_views_indexer.erl @@ -0,0 +1,609 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_indexer). + +-export([ + spawn_link/0 +]). + + +-export([ + init/0, + map_docs/2, + write_docs/4 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + +-include("couch_views.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +-define(KEY_SIZE_LIMIT, 8000). +-define(VALUE_SIZE_LIMIT, 64000). + +% These are all of the errors that we can fix by using +% a smaller batch size. +-define(IS_RECOVERABLE_ERROR(Code), ( + (Code == 1004) % timed_out + orelse (Code == 1007) % transaction_too_old + orelse (Code == 1031) % transaction_timed_out + orelse (Code == 2101) % transaction_too_large +)). + + +spawn_link() -> + proc_lib:spawn_link(?MODULE, init, []). + + +init() -> + Opts = #{no_schedule => true}, + {ok, Job, Data0} = couch_jobs:accept(?INDEX_JOB_TYPE, Opts), + + couch_views_server:accepted(self()), + + Data = upgrade_data(Data0), + #{ + <<"db_name">> := DbName, + <<"db_uuid">> := DbUUID, + <<"ddoc_id">> := DDocId, + <<"sig">> := JobSig, + <<"retries">> := Retries + } = Data, + + {ok, Db} = try + fabric2_db:open(DbName, [?ADMIN_CTX, {uuid, DbUUID}]) + catch error:database_does_not_exist -> + fail_job(Job, Data, db_deleted, "Database was deleted") + end, + + {ok, DDoc} = case fabric2_db:open_doc(Db, DDocId) of + {ok, DDoc0} -> + {ok, DDoc0}; + {not_found, _} -> + fail_job(Job, Data, ddoc_deleted, "Design document was deleted") + end, + + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + HexSig = fabric2_util:to_hex(Mrst#mrst.sig), + + if HexSig == JobSig -> ok; true -> + fail_job(Job, Data, sig_changed, "Design document was modified") + end, + + State = #{ + tx_db => undefined, + db_uuid => DbUUID, + db_seq => undefined, + view_seq => undefined, + last_seq => undefined, + view_vs => undefined, + job => Job, + job_data => Data, + count => 0, + changes_done => 0, + doc_acc => [], + design_opts => Mrst#mrst.design_opts, + update_stats => #{} + }, + + try + update(Db, Mrst, State) + catch + exit:normal -> + ok; + error:database_does_not_exist -> + fail_job(Job, Data, db_deleted, "Database was deleted"); + Error:Reason -> + Stack = erlang:get_stacktrace(), + Fmt = "Error building view for ddoc ~s in ~s: ~p:~p ~p", + couch_log:error(Fmt, [DbName, DDocId, Error, Reason, Stack]), + + NewRetry = Retries + 1, + RetryLimit = retry_limit(), + + case should_retry(NewRetry, RetryLimit, Reason) of + true -> + DataErr = Data#{<<"retries">> := NewRetry}, + % Set the last_seq to 0 so that it doesn't trigger a + % successful view build for anyone listening to the + % couch_views_jobs:wait_for_job + % Note this won't cause the view to rebuild from 0 again + StateErr = State#{job_data := DataErr, last_seq := <<"0">>}, + report_progress(StateErr, update); + false -> + fail_job(Job, Data, Error, Reason) + end + end. + + +upgrade_data(Data) -> + Defaults = [ + {<<"retries">>, 0}, + {<<"db_uuid">>, undefined} + ], + lists:foldl(fun({Key, Default}, Acc) -> + case maps:is_key(Key, Acc) of + true -> Acc; + false -> maps:put(Key, Default, Acc) + end + end, Data, Defaults), + % initialize active task + fabric2_active_tasks:update_active_task_info(Data, #{}). + + +% Transaction limit exceeded don't retry +should_retry(_, _, {erlfdb_error, 2101}) -> + false; + +should_retry(Retries, RetryLimit, _) when Retries < RetryLimit -> + true; + +should_retry(_, _, _) -> + false. + + +add_error(error, {erlfdb_error, Code}, Data) -> + CodeBin = couch_util:to_binary(Code), + CodeString = erlfdb:get_error_string(Code), + Data#{ + error => foundationdb_error, + reason => list_to_binary([CodeBin, <<"-">>, CodeString]) + }; + +add_error(Error, Reason, Data) -> + Data#{ + error => couch_util:to_binary(Error), + reason => couch_util:to_binary(Reason) + }. + + +update(#{} = Db, Mrst0, State0) -> + Limit = couch_views_batch:start(Mrst0), + {Mrst1, State1} = try + do_update(Db, Mrst0, State0#{limit => Limit}) + catch + error:{erlfdb_error, Error} when ?IS_RECOVERABLE_ERROR(Error) -> + couch_views_batch:failure(Mrst0), + update(Db, Mrst0, State0) + end, + case State1 of + finished -> + couch_eval:release_map_context(Mrst1#mrst.qserver); + _ -> + #{ + update_stats := UpdateStats + } = State1, + couch_views_batch:success(Mrst1, UpdateStats), + update(Db, Mrst1, State1) + end. + + +do_update(Db, Mrst0, State0) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx + } = TxDb, + + State1 = get_update_start_state(TxDb, Mrst0, State0), + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + + {ok, State2} = fold_changes(State1), + + #{ + count := Count, + limit := Limit, + doc_acc := DocAcc, + last_seq := LastSeq, + view_vs := ViewVS, + changes_done := ChangesDone0, + design_opts := DesignOpts + } = State2, + + DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), + + {Mrst2, MappedDocs} = map_docs(Mrst0, DocAcc1), + TotalKVs = write_docs(TxDb, Mrst1, MappedDocs, State2), + + ChangesDone = ChangesDone0 + length(DocAcc), + + UpdateStats = #{ + docs_read => length(DocAcc), + tx_size => erlfdb:wait(erlfdb:get_approximate_size(Tx)), + total_kvs => TotalKVs + }, + + case Count < Limit of + true -> + maybe_set_build_status(TxDb, Mrst2, ViewVS, + ?INDEX_READY), + report_progress(State2#{changes_done := ChangesDone}, + finished), + {Mrst2, finished}; + false -> + State3 = report_progress(State2, update), + {Mrst2, State3#{ + tx_db := undefined, + count := 0, + doc_acc := [], + changes_done := ChangesDone, + view_seq := LastSeq, + update_stats := UpdateStats + }} + end + end). + + +maybe_set_build_status(_TxDb, _Mrst1, not_found, _State) -> + ok; + +maybe_set_build_status(TxDb, Mrst1, _ViewVS, State) -> + couch_views_fdb:set_build_status(TxDb, Mrst1, State). + + +% In the first iteration of update we need +% to populate our db and view sequences +get_update_start_state(TxDb, Mrst, #{db_seq := undefined} = State) -> + #{ + view_vs := ViewVS, + view_seq := ViewSeq + } = couch_views_fdb:get_view_state(TxDb, Mrst), + + State#{ + tx_db := TxDb, + db_seq := fabric2_db:get_update_seq(TxDb), + view_vs := ViewVS, + view_seq := ViewSeq, + last_seq := ViewSeq + }; + +get_update_start_state(TxDb, _Idx, State) -> + State#{ + tx_db := TxDb + }. + + +fold_changes(State) -> + #{ + view_seq := SinceSeq, + limit := Limit, + tx_db := TxDb + } = State, + + Fun = fun process_changes/2, + Opts = [{limit, Limit}, {restart_tx, false}], + fabric2_db:fold_changes(TxDb, SinceSeq, Fun, State, Opts). + + +process_changes(Change, Acc) -> + #{ + doc_acc := DocAcc, + count := Count, + design_opts := DesignOpts, + view_vs := ViewVS + } = Acc, + + #{ + id := Id, + sequence := LastSeq + } = Change, + + IncludeDesign = lists:keymember(<<"include_design">>, 1, DesignOpts), + + Acc1 = case {Id, IncludeDesign} of + {<<?DESIGN_DOC_PREFIX, _/binary>>, false} -> + maps:merge(Acc, #{ + count => Count + 1, + last_seq => LastSeq + }); + _ -> + Acc#{ + doc_acc := DocAcc ++ [Change], + count := Count + 1, + last_seq := LastSeq + } + end, + + DocVS = fabric2_fdb:seq_to_vs(LastSeq), + + Go = maybe_stop_at_vs(ViewVS, DocVS), + {Go, Acc1}. + + +maybe_stop_at_vs({versionstamp, _} = ViewVS, DocVS) when DocVS >= ViewVS -> + stop; + +maybe_stop_at_vs(_, _) -> + ok. + + +map_docs(Mrst, []) -> + {Mrst, []}; + +map_docs(Mrst, Docs) -> + % Run all the non deleted docs through the view engine and + Mrst1 = start_query_server(Mrst), + QServer = Mrst1#mrst.qserver, + + {Deleted0, NotDeleted0} = lists:partition(fun(Doc) -> + #{deleted := Deleted} = Doc, + Deleted + end, Docs), + + Deleted1 = lists:map(fun(Doc) -> + Doc#{results => [[] || _ <- Mrst1#mrst.views]} + end, Deleted0), + + DocsToMap = lists:map(fun(Doc) -> + #{doc := DocRec} = Doc, + DocRec + end, NotDeleted0), + + {ok, AllResults} = couch_eval:map_docs(QServer, DocsToMap), + + % The expanded function head here is making an assertion + % that the results match the given doc + NotDeleted1 = lists:zipwith(fun(#{id := DocId} = Doc, {DocId, Results}) -> + Doc#{results => Results} + end, NotDeleted0, AllResults), + + % I'm being a bit careful here resorting the docs + % in order of the changes feed. Theoretically this is + % unnecessary since we're inside a single transaction. + % However, I'm concerned if we ever split this up + % into multiple transactions that this detail might + % be important but forgotten. + MappedDocs = lists:sort(fun(A, B) -> + #{sequence := ASeq} = A, + #{sequence := BSeq} = B, + ASeq =< BSeq + end, Deleted1 ++ NotDeleted1), + + {Mrst1, MappedDocs}. + + +write_docs(TxDb, Mrst, Docs0, State) -> + #mrst{ + sig = Sig + } = Mrst, + + #{ + last_seq := LastSeq + } = State, + + KeyLimit = key_size_limit(), + ValLimit = value_size_limit(), + + {Docs1, TotalKVCount} = lists:mapfoldl(fun(Doc0, KVCount) -> + Doc1 = check_kv_size_limit(Mrst, Doc0, KeyLimit, ValLimit), + {Doc1, KVCount + count_kvs(Doc1)} + end, 0, Docs0), + + couch_views_trees:update_views(TxDb, Mrst, Docs1), + + if LastSeq == false -> ok; true -> + couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq) + end, + + TotalKVCount. + + +fetch_docs(Db, DesignOpts, Changes) -> + {Deleted, NotDeleted} = lists:partition(fun(Doc) -> + #{deleted := Deleted} = Doc, + Deleted + end, Changes), + + RevState = lists:foldl(fun(Change, Acc) -> + #{id := Id} = Change, + RevFuture = fabric2_fdb:get_winning_revs_future(Db, Id, 1), + Acc#{ + RevFuture => {Id, Change} + } + end, #{}, NotDeleted), + + RevFutures = maps:keys(RevState), + BodyState = lists:foldl(fun(RevFuture, Acc) -> + {Id, Change} = maps:get(RevFuture, RevState), + Revs = fabric2_fdb:get_revs_wait(Db, RevFuture), + + % I'm assuming that in this changes transaction that the winning + % doc body exists since it is listed in the changes feed as not deleted + #{winner := true} = RevInfo = lists:last(Revs), + BodyFuture = fabric2_fdb:get_doc_body_future(Db, Id, RevInfo), + Acc#{ + BodyFuture => {Id, RevInfo, Change} + } + end, #{}, erlfdb:wait_for_all(RevFutures)), + + AddLocalSeq = fabric2_util:get_value(<<"local_seq">>, DesignOpts, false), + + BodyFutures = maps:keys(BodyState), + ChangesWithDocs = lists:map(fun (BodyFuture) -> + {Id, RevInfo, Change} = maps:get(BodyFuture, BodyState), + Doc = fabric2_fdb:get_doc_body_wait(Db, Id, RevInfo, BodyFuture), + + Doc1 = case maps:get(branch_count, RevInfo, 1) of + 1 when AddLocalSeq -> + {ok, DocWithLocalSeq} = fabric2_db:apply_open_doc_opts(Doc, + [RevInfo], [local_seq]), + DocWithLocalSeq; + 1 -> + Doc; + _ -> + RevConflicts = fabric2_fdb:get_all_revs(Db, Id), + DocOpts = if not AddLocalSeq -> []; true -> [local_seq] end, + + {ok, DocWithConflicts} = fabric2_db:apply_open_doc_opts(Doc, + RevConflicts, [conflicts | DocOpts]), + DocWithConflicts + end, + Change#{doc => Doc1} + end, erlfdb:wait_for_all(BodyFutures)), + + % This combines the deleted changes with the changes that contain docs + % Important to note that this is now unsorted. Which is fine for now + % But later could be an issue if we split this across transactions + Deleted ++ ChangesWithDocs. + + +start_query_server(#mrst{qserver = nil} = Mrst) -> + #mrst{ + db_name = DbName, + idx_name = DDocId, + language = Language, + sig = Sig, + lib = Lib, + views = Views + } = Mrst, + {ok, QServer} = couch_eval:acquire_map_context( + DbName, + DDocId, + Language, + Sig, + Lib, + [View#mrview.def || View <- Views] + ), + Mrst#mrst{qserver = QServer}; + +start_query_server(#mrst{} = Mrst) -> + Mrst. + + +check_kv_size_limit(Mrst, Doc, KeyLimit, ValLimit) -> + #mrst{ + db_name = DbName, + idx_name = IdxName + } = Mrst, + #{ + results := Results + } = Doc, + try + lists:foreach(fun(ViewRows) -> + lists:foreach(fun({K, V}) -> + KeySize = couch_ejson_size:encoded_size(K), + ValSize = couch_ejson_size:encoded_size(V), + + if KeySize =< KeyLimit -> ok; true -> + throw({size_error, key}) + end, + + if ValSize =< ValLimit -> ok; true -> + throw({size_error, value}) + end + end, ViewRows) + end, Results), + Doc + catch throw:{size_error, Type} -> + #{id := DocId} = Doc, + Fmt = "View ~s size error for docid `~s`, excluded from indexing " + "in db `~s` for design doc `~s`", + couch_log:error(Fmt, [Type, DocId, DbName, IdxName]), + Doc#{ + deleted := true, + results := [[] || _ <- Mrst#mrst.views], + kv_sizes => [] + } + end. + + +count_kvs(Doc) -> + #{ + results := Results + } = Doc, + lists:foldl(fun(ViewRows, Count) -> + Count + length(ViewRows) + end, 0, Results). + + +report_progress(State, UpdateType) -> + #{ + tx_db := TxDb, + job := Job1, + job_data := JobData, + last_seq := LastSeq, + db_seq := DBSeq, + changes_done := ChangesDone + } = State, + + #{ + <<"db_name">> := DbName, + <<"db_uuid">> := DbUUID, + <<"ddoc_id">> := DDocId, + <<"sig">> := Sig, + <<"retries">> := Retries + } = JobData, + + ActiveTasks = fabric2_active_tasks:get_active_task_info(JobData), + TotalDone = case maps:get(<<"changes_done">>, ActiveTasks, 0) of + 0 -> ChangesDone; + N -> N + ChangesDone + end, + + NewActiveTasks = couch_views_util:active_tasks_info(TotalDone, + DbName, DDocId, LastSeq, DBSeq), + + % Reconstruct from scratch to remove any + % possible existing error state. + NewData0 = #{ + <<"db_name">> => DbName, + <<"db_uuid">> => DbUUID, + <<"ddoc_id">> => DDocId, + <<"sig">> => Sig, + <<"view_seq">> => LastSeq, + <<"retries">> => Retries + }, + NewData = fabric2_active_tasks:update_active_task_info(NewData0, + NewActiveTasks), + + case UpdateType of + update -> + case couch_jobs:update(TxDb, Job1, NewData) of + {ok, Job2} -> + State#{job := Job2}; + {error, halt} -> + couch_log:error("~s job halted :: ~w", [?MODULE, Job1]), + exit(normal) + end; + finished -> + case couch_jobs:finish(TxDb, Job1, NewData) of + ok -> + State; + {error, halt} -> + couch_log:error("~s job halted :: ~w", [?MODULE, Job1]), + exit(normal) + end + end. + + +fail_job(Job, Data, Error, Reason) -> + NewData = add_error(Error, Reason, Data), + couch_jobs:finish(undefined, Job, NewData), + exit(normal). + + +retry_limit() -> + config:get_integer("couch_views", "retry_limit", 3). + + +key_size_limit() -> + config:get_integer("couch_views", "key_size_limit", ?KEY_SIZE_LIMIT). + + +value_size_limit() -> + config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). diff --git a/src/couch_views/src/couch_views_jobs.erl b/src/couch_views/src/couch_views_jobs.erl new file mode 100644 index 000000000..4b0aa2660 --- /dev/null +++ b/src/couch_views/src/couch_views_jobs.erl @@ -0,0 +1,163 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_jobs). + +-export([ + set_timeout/0, + build_view/3, + build_view_async/2, + remove/2, + job_state/2 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + + +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include("couch_views.hrl"). + + +set_timeout() -> + couch_jobs:set_type_timeout(?INDEX_JOB_TYPE, 26). + + +build_view(TxDb, Mrst, UpdateSeq) -> + {ok, JobId} = build_view_async(TxDb, Mrst), + case wait_for_job(JobId, Mrst#mrst.idx_name, UpdateSeq) of + ok -> ok; + retry -> build_view(TxDb, Mrst, UpdateSeq) + end. + + +build_view_async(TxDb0, Mrst) -> + JobId = job_id(TxDb0, Mrst), + JobData = job_data(TxDb0, Mrst), + DbUUID = fabric2_db:get_uuid(TxDb0), + TxDb1 = ensure_correct_tx(TxDb0), + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(TxDb1), fun(JTx) -> + case couch_jobs:get_job_data(JTx, ?INDEX_JOB_TYPE, JobId) of + {error, not_found} -> + ok; + {ok, #{} = OldJobData} -> + case maps:get(<<"db_uuid">>, OldJobData, undefined) of + DbUUID -> ok; + _ -> couch_jobs:remove(JTx, ?INDEX_JOB_TYPE, JobId) + end + end, + ok = couch_jobs:add(JTx, ?INDEX_JOB_TYPE, JobId, JobData) + end), + {ok, JobId}. + + +remove(TxDb, Sig) -> + DbName = fabric2_db:name(TxDb), + JobId = job_id(DbName, Sig), + couch_jobs:remove(TxDb, ?INDEX_JOB_TYPE, JobId). + + +job_state(#{} = TxDb, #mrst{} = Mrst) -> + JobId = job_id(TxDb, Mrst), + couch_jobs:get_job_state(TxDb, ?INDEX_JOB_TYPE, JobId). + + +ensure_correct_tx(#{tx := undefined} = TxDb) -> + TxDb; + +ensure_correct_tx(#{tx := Tx} = TxDb) -> + case erlfdb:is_read_only(Tx) of + true -> TxDb#{tx := undefined}; + false -> TxDb + end. + + +wait_for_job(JobId, DDocId, UpdateSeq) -> + case couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId) of + {ok, Subscription, _State, _Data} -> + wait_for_job(JobId, Subscription, DDocId, UpdateSeq); + {ok, finished, Data} -> + case Data of + #{<<"view_seq">> := ViewSeq} when ViewSeq >= UpdateSeq -> + ok; + _ -> + retry + end + end. + + +wait_for_job(JobId, Subscription, DDocId, UpdateSeq) -> + case wait(Subscription) of + {not_found, not_found} -> + erlang:error(index_not_found); + {error, Error} -> + erlang:error(Error); + {finished, #{<<"error">> := <<"ddoc_deleted">>} = Data} -> + case maps:get(<<"ddoc_id">>, Data) of + DDocId -> + couch_jobs:remove(undefined, ?INDEX_JOB_TYPE, JobId), + erlang:error({ddoc_deleted, maps:get(<<"reason">>, Data)}); + _OtherDocId -> + % A different design doc wiht the same signature + % was deleted. Resubmit this job which will overwrite + % the ddoc_id in the job. + retry + end; + {finished, #{<<"error">> := Error, <<"reason">> := Reason}} -> + couch_jobs:remove(undefined, ?INDEX_JOB_TYPE, JobId), + erlang:error({binary_to_existing_atom(Error, latin1), Reason}); + {finished, #{<<"view_seq">> := ViewSeq}} when ViewSeq >= UpdateSeq -> + ok; + {finished, _} -> + wait_for_job(JobId, DDocId, UpdateSeq); + {_State, #{<<"view_seq">> := ViewSeq}} when ViewSeq >= UpdateSeq -> + couch_jobs:unsubscribe(Subscription), + ok; + {_, _} -> + wait_for_job(JobId, Subscription, DDocId, UpdateSeq) + end. + + +job_id(#{name := DbName}, #mrst{sig = Sig}) -> + job_id(DbName, Sig); + +job_id(DbName, Sig) -> + HexSig = fabric2_util:to_hex(Sig), + % Put signature first in order to be able to use the no_schedule + % couch_jobs:accept/2 option + <<HexSig/binary, "-", DbName/binary>>. + + +job_data(Db, Mrst) -> + #mrst{ + idx_name = DDocId, + sig = Sig + } = Mrst, + + #{ + db_name => fabric2_db:name(Db), + db_uuid => fabric2_db:get_uuid(Db), + ddoc_id => DDocId, + sig => fabric2_util:to_hex(Sig), + retries => 0 + }. + + +wait(Subscription) -> + case couch_jobs:wait(Subscription, infinity) of + {?INDEX_JOB_TYPE, _JobId, JobState, JobData} -> + {JobState, JobData}; + timeout -> + {error, timeout} + end. diff --git a/src/couch_views/src/couch_views_plugin.erl b/src/couch_views/src/couch_views_plugin.erl new file mode 100644 index 000000000..f8169179a --- /dev/null +++ b/src/couch_views/src/couch_views_plugin.erl @@ -0,0 +1,40 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_plugin). + + +-export([ + after_interactive_write/4 +]). + + +-define(SERVICE_ID, couch_views). + + +after_interactive_write(Db, Mrst, Result, DocNumber) -> + with_pipe(after_interactive_write, [Db, Mrst, Result, DocNumber]), + ok. + + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +with_pipe(Func, Args) -> + do_apply(Func, Args, [pipe]). + + +do_apply(Func, Args, Opts) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + couch_epi:apply(Handle, ?SERVICE_ID, Func, Args, Opts).
\ No newline at end of file diff --git a/src/couch_views/src/couch_views_reader.erl b/src/couch_views/src/couch_views_reader.erl new file mode 100644 index 000000000..3c5862749 --- /dev/null +++ b/src/couch_views/src/couch_views_reader.erl @@ -0,0 +1,346 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_reader). + +-export([ + read/6 +]). + + +-include("couch_views.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +read(Db, Mrst, ViewName, UserCallback, UserAcc, Args) -> + ReadFun = case Args of + #mrargs{view_type = map} -> fun read_map_view/6; + #mrargs{view_type = red} -> fun read_red_view/6 + end, + ReadFun(Db, Mrst, ViewName, UserCallback, UserAcc, Args). + + +read_map_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + #mrst{ + language = Lang, + views = Views + } = Mrst = couch_views_trees:open(TxDb, Mrst0), + + View = get_map_view(Lang, Args, ViewName, Views), + Fun = fun handle_map_row/4, + + Meta = get_map_meta(TxDb, Mrst, View, Args), + UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)), + + Acc0 = #{ + db => TxDb, + skip => Args#mrargs.skip, + limit => Args#mrargs.limit, + mrargs => undefined, + callback => UserCallback, + acc => UserAcc1 + }, + + Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) -> + Opts = mrargs_to_fdb_options(KeyArgs), + KeyAcc1 = KeyAcc0#{ + mrargs := KeyArgs + }, + couch_views_trees:fold_map_idx(TxDb, View, Opts, Fun, KeyAcc1) + end, Acc0, expand_keys_args(Args)), + + #{ + acc := UserAcc2 + } = Acc1, + {ok, maybe_stop(UserCallback(complete, UserAcc2))} + end) + catch + throw:{complete, Out} -> + {_, Final} = UserCallback(complete, Out), + {ok, Final}; + throw:{done, Out} -> + {ok, Out} + end. + + +read_red_view(Db, Mrst0, ViewName, UserCallback, UserAcc0, Args) -> + #mrst{ + language = Lang, + views = Views + } = Mrst0, + {Idx, Lang, View0} = get_red_view(Lang, Args, ViewName, Views), + Mrst1 = Mrst0#mrst{views = [View0]}, + ReadOpts = [{read_only, Idx}], + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + #mrst{ + language = Lang, + views = [View1] + } = Mrst = couch_views_trees:open(TxDb, Mrst1, ReadOpts), + + #mrargs{ + extra = Extra + } = Args, + + Fun = fun handle_red_row/3, + + Meta = get_red_meta(TxDb, Mrst, View1, Args), + UserAcc1 = maybe_stop(UserCallback(Meta, UserAcc0)), + + Finalizer = case couch_util:get_value(finalizer, Extra) of + undefined -> + {_, FunSrc} = lists:nth(Idx, View1#mrview.reduce_funs), + FunSrc; + CustomFun-> + CustomFun + end, + + Acc0 = #{ + db => TxDb, + skip => Args#mrargs.skip, + limit => Args#mrargs.limit, + mrargs => undefined, + finalizer => Finalizer, + red_idx => Idx, + language => Lang, + callback => UserCallback, + acc => UserAcc1 + }, + + Acc1 = lists:foldl(fun(KeyArgs, KeyAcc0) -> + Opts = mrargs_to_fdb_options(KeyArgs), + KeyAcc1 = KeyAcc0#{ + mrargs := KeyArgs + }, + couch_views_trees:fold_red_idx( + TxDb, + View1, + Idx, + Opts, + Fun, + KeyAcc1 + ) + end, Acc0, expand_keys_args(Args)), + + #{ + acc := UserAcc2 + } = Acc1, + {ok, maybe_stop(UserCallback(complete, UserAcc2))} + end) + catch + throw:{complete, Out} -> + {_, Final} = UserCallback(complete, Out), + {ok, Final}; + throw:{done, Out} -> + {ok, Out} + end. + + +get_map_meta(TxDb, Mrst, View, #mrargs{update_seq = true}) -> + TotalRows = couch_views_trees:get_row_count(TxDb, View), + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + {meta, [{update_seq, ViewSeq}, {total, TotalRows}, {offset, null}]}; + +get_map_meta(TxDb, _Mrst, View, #mrargs{}) -> + TotalRows = couch_views_trees:get_row_count(TxDb, View), + {meta, [{total, TotalRows}, {offset, null}]}. + + +get_red_meta(TxDb, Mrst, _View, #mrargs{update_seq = true}) -> + ViewSeq = couch_views_fdb:get_update_seq(TxDb, Mrst), + {meta, [{update_seq, ViewSeq}]}; + +get_red_meta(_TxDb, _Mrst, _View, #mrargs{}) -> + {meta, []}. + + +handle_map_row(_DocId, _Key, _Value, #{skip := Skip} = Acc) when Skip > 0 -> + Acc#{skip := Skip - 1}; + +handle_map_row(_DocID, _Key, _Value, #{limit := 0, acc := UserAcc}) -> + throw({complete, UserAcc}); + +handle_map_row(DocId, Key, Value, Acc) -> + #{ + db := TxDb, + limit := Limit, + mrargs := Args, + callback := UserCallback, + acc := UserAcc0 + } = Acc, + + BaseRow = [ + {id, DocId}, + {key, Key}, + {value, Value} + ], + + Row = BaseRow ++ if not Args#mrargs.include_docs -> []; true -> + DocOpts0 = Args#mrargs.doc_options, + DocOpts1 = DocOpts0 ++ case Args#mrargs.conflicts of + true -> [conflicts]; + _ -> [] + end, + + {TargetDocId, Rev} = get_doc_id(DocId, Value), + DocObj = load_doc(TxDb, TargetDocId, Rev, DocOpts1), + [{doc, DocObj}] + end, + + UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)), + Acc#{limit := Limit - 1, acc := UserAcc1}. + + +handle_red_row(_Key, _Red, #{skip := Skip} = Acc) when Skip > 0 -> + Acc#{skip := Skip - 1}; + +handle_red_row(_Key, _Value, #{limit := 0, acc := UserAcc}) -> + throw({complete, UserAcc}); + +handle_red_row(Key0, Value0, Acc) -> + #{ + limit := Limit, + finalizer := Finalizer, + callback := UserCallback, + acc := UserAcc0 + } = Acc, + + Key1 = case Key0 of + undefined -> null; + _ -> Key0 + end, + Value1 = maybe_finalize(Finalizer, Value0), + Row = [{key, Key1}, {value, Value1}], + + UserAcc1 = maybe_stop(UserCallback({row, Row}, UserAcc0)), + Acc#{limit := Limit - 1, acc := UserAcc1}. + + +maybe_finalize(null, Red) -> + Red; +maybe_finalize(Finalizer, Red) -> + {ok, Finalized} = couch_query_servers:finalize(Finalizer, Red), + Finalized. + + +get_map_view(Lang, Args, ViewName, Views) -> + case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of + {map, View, _Args} -> View; + {red, {_Idx, _Lang, View}, _} -> View + end. + + +get_red_view(Lang, Args, ViewName, Views) -> + case couch_mrview_util:extract_view(Lang, Args, ViewName, Views) of + {red, {Idx, Lang, View}, _} -> {Idx, Lang, View}; + _ -> throw({not_found, missing_named_view}) + end. + + +expand_keys_args(#mrargs{keys = undefined} = Args) -> + [Args]; + +expand_keys_args(#mrargs{keys = Keys} = Args) -> + lists:map(fun(Key) -> + Args#mrargs{ + start_key = Key, + end_key = Key + } + end, Keys). + + +mrargs_to_fdb_options(Args) -> + #mrargs{ + view_type = ViewType, + start_key = StartKey, + start_key_docid = StartKeyDocId, + end_key = EndKey, + end_key_docid = EndKeyDocId0, + direction = Direction, + inclusive_end = InclusiveEnd, + group_level = GroupLevel + } = Args, + + StartKeyOpts = if StartKey == undefined -> []; true -> + [{start_key, {StartKey, StartKeyDocId}}] + end, + + EndKeyDocId = case {Direction, EndKeyDocId0} of + {fwd, <<255>>} when InclusiveEnd -> <<255>>; + {fwd, <<255>>} when not InclusiveEnd -> <<>>; + {rev, <<>>} when InclusiveEnd -> <<>>; + {rev, <<>>} when not InclusiveEnd -> <<255>>; + _ -> EndKeyDocId0 + end, + + EndKeyOpts = if EndKey == undefined -> []; true -> + [{end_key, {EndKey, EndKeyDocId}}] + end, + + GroupFunOpt = make_group_key_fun(ViewType, GroupLevel), + + [ + {dir, Direction}, + {inclusive_end, InclusiveEnd} + ] ++ StartKeyOpts ++ EndKeyOpts ++ GroupFunOpt. + + +make_group_key_fun(map, _) -> + []; + +make_group_key_fun(red, exact) -> + [ + {group_key_fun, fun({Key, _DocId}) -> Key end} + ]; + +make_group_key_fun(red, 0) -> + [ + {group_key_fun, group_all} + ]; + +make_group_key_fun(red, N) when is_integer(N), N > 0 -> + GKFun = fun + ({Key, _DocId}) when is_list(Key) -> lists:sublist(Key, N); + ({Key, _DocId}) -> Key + end, + [{group_key_fun, GKFun}]. + + +maybe_stop({ok, Acc}) -> Acc; +maybe_stop({stop, Acc}) -> throw({done, Acc}). + + +get_doc_id(Id, {Props}) -> + DocId = couch_util:get_value(<<"_id">>, Props, Id), + Rev = couch_util:get_value(<<"_rev">>, Props, null), + {DocId, Rev}; + +get_doc_id(Id, _Value) -> + {Id, null}. + + +load_doc(TxDb, Id, null, DocOpts) -> + case fabric2_db:open_doc(TxDb, Id, DocOpts) of + {ok, Doc} -> couch_doc:to_json_obj(Doc, DocOpts); + {not_found, _} -> null + end; + +load_doc(TxDb, Id, Rev, DocOpts) -> + Rev1 = couch_doc:parse_rev(Rev), + case fabric2_db:open_doc_revs(TxDb, Id, [Rev1], DocOpts) of + {ok, [{ok, Doc}]} -> couch_doc:to_json_obj(Doc, DocOpts); + {ok, [_Else]} -> null + end. diff --git a/src/couch_views/src/couch_views_server.erl b/src/couch_views/src/couch_views_server.erl new file mode 100644 index 000000000..71a4abb8d --- /dev/null +++ b/src/couch_views/src/couch_views_server.erl @@ -0,0 +1,176 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_server). + + +-behaviour(gen_server). + + +-export([ + start_link/0 +]). + +-export([ + accepted/1 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3, + format_status/2 +]). + +-define(MAX_ACCEPTORS, 5). +-define(MAX_WORKERS, 100). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +accepted(Worker) when is_pid(Worker) -> + gen_server:call(?MODULE, {accepted, Worker}, infinity). + + +init(_) -> + process_flag(trap_exit, true), + couch_views_jobs:set_timeout(), + St = #{ + acceptors => #{}, + workers => #{}, + max_acceptors => max_acceptors(), + max_workers => max_workers() + }, + {ok, spawn_acceptors(St)}. + + +terminate(_, _St) -> + ok. + + +handle_call({accepted, Pid}, _From, St) -> + #{ + acceptors := Acceptors, + workers := Workers + } = St, + case maps:is_key(Pid, Acceptors) of + true -> + St1 = St#{ + acceptors := maps:remove(Pid, Acceptors), + workers := Workers#{Pid => true} + }, + {reply, ok, spawn_acceptors(St1)}; + false -> + LogMsg = "~p : unknown acceptor processs ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), + {stop, {unknown_acceptor_pid, Pid}, St} + end; + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info({'EXIT', Pid, Reason}, St) -> + #{ + acceptors := Acceptors, + workers := Workers + } = St, + + % In Erlang 21+ could check map keys directly in the function head + case {maps:is_key(Pid, Acceptors), maps:is_key(Pid, Workers)} of + {true, false} -> handle_acceptor_exit(St, Pid, Reason); + {false, true} -> handle_worker_exit(St, Pid, Reason); + {false, false} -> handle_unknown_exit(St, Pid, Reason) + end; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +format_status(_Opt, [_PDict, State]) -> + #{ + workers := Workers, + acceptors := Acceptors + } = State, + Scrubbed = State#{ + workers => {map_size, maps:size(Workers)}, + acceptors => {map_size, maps:size(Acceptors)} + }, + [{data, [{"State", + Scrubbed + }]}]. + + +% Worker process exit handlers + +handle_acceptor_exit(#{acceptors := Acceptors} = St, Pid, Reason) -> + St1 = St#{acceptors := maps:remove(Pid, Acceptors)}, + LogMsg = "~p : acceptor process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {noreply, spawn_acceptors(St1)}. + + +handle_worker_exit(#{workers := Workers} = St, Pid, normal) -> + St1 = St#{workers := maps:remove(Pid, Workers)}, + {noreply, spawn_acceptors(St1)}; + +handle_worker_exit(#{workers := Workers} = St, Pid, Reason) -> + St1 = St#{workers := maps:remove(Pid, Workers)}, + LogMsg = "~p : indexer process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {noreply, spawn_acceptors(St1)}. + + +handle_unknown_exit(St, Pid, Reason) -> + LogMsg = "~p : unknown process ~p exited with ~p", + couch_log:error(LogMsg, [?MODULE, Pid, Reason]), + {stop, {unknown_pid_exit, Pid}, St}. + + +spawn_acceptors(St) -> + #{ + workers := Workers, + acceptors := Acceptors, + max_acceptors := MaxAcceptors, + max_workers := MaxWorkers + } = St, + ACnt = maps:size(Acceptors), + WCnt = maps:size(Workers), + case ACnt < MaxAcceptors andalso (ACnt + WCnt) < MaxWorkers of + true -> + Pid = couch_views_indexer:spawn_link(), + NewSt = St#{acceptors := Acceptors#{Pid => true}}, + spawn_acceptors(NewSt); + false -> + St + end. + + +max_acceptors() -> + config:get_integer("couch_views", "max_acceptors", ?MAX_ACCEPTORS). + + +max_workers() -> + config:get_integer("couch_views", "max_workers", ?MAX_WORKERS). diff --git a/src/couch_views/src/couch_views_sup.erl b/src/couch_views/src/couch_views_sup.erl new file mode 100644 index 000000000..94531893d --- /dev/null +++ b/src/couch_views/src/couch_views_sup.erl @@ -0,0 +1,66 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_sup). + + +-behaviour(supervisor). + + +-export([ + start_link/0 +]). + + +-export([ + init/1 +]). + + +start_link() -> + ok = register_views_index(), + Arg = case fabric2_node_types:is_type(view_indexing) of + true -> normal; + false -> builds_disabled + end, + supervisor:start_link({local, ?MODULE}, ?MODULE, Arg). + + +init(normal) -> + Children = [ + #{ + id => couch_views_server, + start => {couch_views_server, start_link, []} + } + ] ++ couch_epi:register_service(couch_views_epi, []), + {ok, {flags(), Children}}; + +init(builds_disabled) -> + couch_log:notice("~p : view_indexing disabled", [?MODULE]), + couch_views_jobs:set_timeout(), + {ok, {flags(), []}}. + + +register_views_index() -> + case fabric2_node_types:is_type(api_frontend) of + true -> fabric2_index:register_index(couch_views); + false -> ok + end. + + +flags() -> + #{ + strategy => one_for_one, + intensity => 1, + period => 5 + }. diff --git a/src/couch_views/src/couch_views_trees.erl b/src/couch_views/src/couch_views_trees.erl new file mode 100644 index 000000000..b45750be9 --- /dev/null +++ b/src/couch_views/src/couch_views_trees.erl @@ -0,0 +1,587 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_trees). + +-export([ + open/2, + open/3, + + get_row_count/2, + get_kv_size/2, + + fold_map_idx/5, + fold_red_idx/6, + + update_views/3 +]). + +-ifdef(TEST). +-compile(export_all). +-compile(nowarn_export_all). +-endif. + + +-include("couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + + +open(TxDb, Mrst) -> + open(TxDb, Mrst, []). + + +open(TxDb, Mrst, Options) -> + #mrst{ + sig = Sig, + language = Lang, + views = Views + } = Mrst, + Mrst#mrst{ + id_btree = open_id_tree(TxDb, Sig), + views = [open_view_tree(TxDb, Sig, Lang, V, Options) || V <- Views] + }. + + +get_row_count(TxDb, View) -> + #{ + tx := Tx + } = TxDb, + {Count, _, _} = ebtree:full_reduce(Tx, View#mrview.btree), + Count. + + +get_kv_size(TxDb, View) -> + #{ + tx := Tx + } = TxDb, + {_, TotalSize, _} = ebtree:full_reduce(Tx, View#mrview.btree), + TotalSize. + + +fold_map_idx(TxDb, View, Options, Callback, Acc0) -> + #{ + tx := Tx + } = TxDb, + #mrview{ + btree = Btree + } = View, + + CollateFun = couch_views_util:collate_fun(View), + + {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options), + + Wrapper = fun(KVs0, WAcc) -> + % Remove any keys that match Start or End key + % depending on direction + KVs1 = case InclusiveEnd of + true -> + KVs0; + false when Dir == fwd -> + lists:filter(fun({K, _V}) -> + case CollateFun(K, EndKey) of + lt -> true; + eq -> false; + gt -> false + end + end, KVs0); + false when Dir == rev -> + lists:filter(fun({K, _V}) -> + case CollateFun(K, EndKey) of + lt -> false; + eq -> false; + gt -> true + end + end, KVs0) + end, + % Expand dups + KVs2 = lists:flatmap(fun({K, V}) -> + case V of + {dups, Dups} when Dir == fwd -> + [{K, D} || D <- Dups]; + {dups, Dups} when Dir == rev -> + [{K, D} || D <- lists:reverse(Dups)]; + _ -> + [{K, V}] + end + end, KVs1), + lists:foldl(fun({{Key, DocId}, Value}, WAccInner) -> + Callback(DocId, Key, Value, WAccInner) + end, WAcc, KVs2) + end, + + case Dir of + fwd -> + ebtree:range(Tx, Btree, StartKey, EndKey, Wrapper, Acc0); + rev -> + % Start/End keys swapped on purpose because ebtree + ebtree:reverse_range(Tx, Btree, EndKey, StartKey, Wrapper, Acc0) + end. + + +fold_red_idx(TxDb, View, Idx, Options, Callback, Acc0) -> + #{ + tx := Tx + } = TxDb, + #mrview{ + btree = Btree + } = View, + + {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun} = to_red_opts(Options), + + Wrapper = fun({GroupKey, Reduction}, WAcc) -> + {_RowCount, _RowSize, UserReds} = Reduction, + RedValue = lists:nth(Idx, UserReds), + Callback(GroupKey, RedValue, WAcc) + end, + + case {GroupKeyFun, Dir} of + {group_all, fwd} -> + EBtreeOpts = [ + {dir, fwd}, + {inclusive_end, InclusiveEnd} + ], + Reduction = ebtree:reduce(Tx, Btree, StartKey, EndKey, EBtreeOpts), + Wrapper({null, Reduction}, Acc0); + {F, fwd} when is_function(F) -> + EBtreeOpts = [ + {dir, fwd}, + {inclusive_end, InclusiveEnd} + ], + ebtree:group_reduce( + Tx, + Btree, + StartKey, + EndKey, + GroupKeyFun, + Wrapper, + Acc0, + EBtreeOpts + ); + {group_all, rev} -> + % Start/End keys swapped on purpose because ebtree. Also + % inclusive_start for same reason. + EBtreeOpts = [ + {dir, rev}, + {inclusive_start, InclusiveEnd} + ], + Reduction = ebtree:reduce(Tx, Btree, EndKey, StartKey, EBtreeOpts), + Wrapper({null, Reduction}, Acc0); + {F, rev} when is_function(F) -> + % Start/End keys swapped on purpose because ebtree. Also + % inclusive_start for same reason. + EBtreeOpts = [ + {dir, rev}, + {inclusive_start, InclusiveEnd} + ], + ebtree:group_reduce( + Tx, + Btree, + EndKey, + StartKey, + GroupKeyFun, + Wrapper, + Acc0, + EBtreeOpts + ) + end. + + +update_views(TxDb, Mrst, Docs) -> + #{ + tx := Tx + } = TxDb, + + % Get initial KV size + OldKVSize = lists:foldl(fun(View, SizeAcc) -> + {_, Size, _} = ebtree:full_reduce(Tx, View#mrview.btree), + SizeAcc + Size + end, 0, Mrst#mrst.views), + + % Collect update information + #{ + ids := IdMap, + views := ViewMaps, + delete_ref := DeleteRef + } = gather_update_info(Tx, Mrst, Docs), + + % Update the IdBtree + update_btree(Tx, Mrst#mrst.id_btree, IdMap, DeleteRef), + + % Update each view's BTree + lists:foreach(fun(View) -> + #mrview{ + id_num = ViewId, + btree = BTree + } = View, + + ViewMap = maps:get(ViewId, ViewMaps, #{}), + update_btree(Tx, BTree, ViewMap, DeleteRef) + end, Mrst#mrst.views), + + % Get new KV size after update + NewKVSize = lists:foldl(fun(View, SizeAcc) -> + {_, Size, _} = ebtree:full_reduce(Tx, View#mrview.btree), + SizeAcc + Size + end, 0, Mrst#mrst.views), + + couch_views_fdb:update_kv_size(TxDb, Mrst#mrst.sig, OldKVSize, NewKVSize). + + +open_id_tree(TxDb, Sig) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + Prefix = id_tree_prefix(DbPrefix, Sig), + TreeOpts = [ + {persist_fun, fun couch_views_fdb:persist_chunks/3}, + {cache_fun, create_cache_fun(id_tree)} + ], + ebtree:open(Tx, Prefix, get_order(id_btree), TreeOpts). + + +open_view_tree(TxDb, Sig, Lang, View, Options) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + #mrview{ + id_num = ViewId + } = View, + Prefix = view_tree_prefix(DbPrefix, Sig, ViewId), + BaseOpts = [ + {collate_fun, couch_views_util:collate_fun(View)}, + {persist_fun, fun couch_views_fdb:persist_chunks/3} + ], + ExtraOpts = case lists:keyfind(read_only, 1, Options) of + {read_only, Idx} -> + RedFun = make_read_only_reduce_fun(Lang, View, Idx), + [{reduce_fun, RedFun}]; + false -> + [ + {reduce_fun, make_reduce_fun(Lang, View)}, + {cache_fun, create_cache_fun({view, ViewId})} + ] + end, + TreeOpts = BaseOpts ++ ExtraOpts, + View#mrview{ + btree = ebtree:open(Tx, Prefix, get_order(view_btree), TreeOpts) + }. + + +get_order(id_btree) -> + min_order(config:get_integer("couch_views", "id_btree_node_size", 100)); +get_order(view_btree) -> + min_order(config:get_integer("couch_views", "view_btree_node_size", 100)). + + +min_order(V) when is_integer(V), V < 2 -> + 2; +min_order(V) when is_integer(V), V rem 2 == 0 -> + V; +min_order(V) -> + V + 1. + + +make_read_only_reduce_fun(Lang, View, NthRed) -> + RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs], + if RedFuns /= [] -> ok; true -> + io:format(standard_error, "~p~n", [process_info(self(), current_stacktrace)]) + end, + LPad = lists:duplicate(NthRed - 1, []), + RPad = lists:duplicate(length(RedFuns) - NthRed, []), + FunSrc = lists:nth(NthRed, RedFuns), + fun + (KVs0, _ReReduce = false) -> + KVs1 = detuple_kvs(expand_dupes(KVs0)), + {ok, Result} = couch_query_servers:reduce(Lang, [FunSrc], KVs1), + {0, 0, LPad ++ Result ++ RPad}; + (Reductions, _ReReduce = true) -> + ExtractFun = fun(Reds) -> + {_Count, _Size, UReds} = Reds, + [lists:nth(NthRed, UReds)] + end, + UReds = lists:map(ExtractFun, Reductions), + {ok, Result} = case UReds of + [RedVal] -> + {ok, RedVal}; + _ -> + couch_query_servers:rereduce(Lang, [FunSrc], UReds) + end, + {0, 0, LPad ++ Result ++ RPad} + end. + + +make_reduce_fun(Lang, #mrview{} = View) -> + RedFuns = [Src || {_, Src} <- View#mrview.reduce_funs], + fun + (KVs0, _ReReduce = false) -> + KVs1 = expand_dupes(KVs0), + TotalSize = lists:foldl(fun({{K, _DocId}, V}, Acc) -> + KSize = couch_ejson_size:encoded_size(K), + VSize = couch_ejson_size:encoded_size(V), + KSize + VSize + Acc + end, 0, KVs1), + KVs2 = detuple_kvs(KVs1), + {ok, UserReds} = couch_query_servers:reduce(Lang, RedFuns, KVs2), + {length(KVs1), TotalSize, UserReds}; + (Reductions, _ReReduce = true) -> + FoldFun = fun({Count, Size, UserReds}, {CAcc, SAcc, URedAcc}) -> + NewCAcc = Count + CAcc, + NewSAcc = Size + SAcc, + NewURedAcc = [UserReds | URedAcc], + {NewCAcc, NewSAcc, NewURedAcc} + end, + InitAcc = {0, 0, []}, + FinalAcc = lists:foldl(FoldFun, InitAcc, Reductions), + {FinalCount, FinalSize, UReds} = FinalAcc, + {ok, Result} = couch_query_servers:rereduce(Lang, RedFuns, UReds), + {FinalCount, FinalSize, Result} + end. + + +create_cache_fun(TreeId) -> + CacheTid = case get(TreeId) of + undefined -> + Tid = ets:new(?MODULE, [protected, set]), + put(TreeId, {ebtree_cache, Tid}), + Tid; + {ebtree_cache, Tid} -> + Tid + end, + fun + (set, [Id, Node]) -> + true = ets:insert_new(CacheTid, {Id, Node}), + ok; + (clear, Id) -> + ets:delete(CacheTid, Id), + ok; + (get, Id) -> + case ets:lookup(CacheTid, Id) of + [{Id, Node}] -> Node; + [] -> undefined + end + end. + + +to_map_opts(Options) -> + Dir = case lists:keyfind(dir, 1, Options) of + {dir, D} -> D; + _ -> fwd + end, + + InclusiveEnd = case lists:keyfind(inclusive_end, 1, Options) of + {inclusive_end, IE} -> IE; + _ -> true + end, + + StartKey = case lists:keyfind(start_key, 1, Options) of + {start_key, SK} -> SK; + false when Dir == fwd -> ebtree:min(); + false when Dir == rev -> ebtree:max() + end, + + EndKey = case lists:keyfind(end_key, 1, Options) of + {end_key, EK} -> EK; + false when Dir == fwd -> ebtree:max(); + false when Dir == rev -> ebtree:min() + end, + + {Dir, StartKey, EndKey, InclusiveEnd}. + + +to_red_opts(Options) -> + {Dir, StartKey, EndKey, InclusiveEnd} = to_map_opts(Options), + + GroupKeyFun = case lists:keyfind(group_key_fun, 1, Options) of + {group_key_fun, GKF} -> GKF; + false -> fun({_Key, _DocId}) -> global_group end + end, + + {Dir, StartKey, EndKey, InclusiveEnd, GroupKeyFun}. + + +gather_update_info(Tx, Mrst, Docs) -> + % A special token used to indicate that the row should be deleted + DeleteRef = erlang:make_ref(), + + AllDocIds = [DocId || #{id := DocId} <- Docs], + + BaseIdMap = lists:foldl(fun(DocId, Acc) -> + maps:put(DocId, DeleteRef, Acc) + end, #{}, AllDocIds), + + % Build the initial set of rows to delete + % ExistingViewKeys is a list of {DocId, [{ViewId, [Key | _]} | _]} + ExistingViewKeys = ebtree:lookup_multi(Tx, Mrst#mrst.id_btree, AllDocIds), + + % For each view, create an initial map that contains the + % list of keys to delete. The final result is a map of + % maps: + % #{ViewId => #{Key => DeleteRef}} + BaseViewMaps = lists:foldl(fun({DocId, ViewIdKeys}, ViewIdAcc1) -> + lists:foldl(fun({ViewId, Keys}, ViewIdAcc2) -> + OldViewMap = maps:get(ViewId, ViewIdAcc2, #{}), + NewViewMap = lists:foldl(fun(Key, ViewMapAcc) -> + maps:put({Key, DocId}, DeleteRef, ViewMapAcc) + end, OldViewMap, Keys), + maps:put(ViewId, NewViewMap, ViewIdAcc2) + end, ViewIdAcc1, ViewIdKeys) + end, #{}, ExistingViewKeys), + + % Build our base accumulator + InfoAcc1 = #{ + ids => BaseIdMap, + views => BaseViewMaps, + delete_ref => DeleteRef + }, + + % Insert results from each document into the map of + % maps which leaves us with a final shape of: + % #{ViewId => #{Key => Value}} + % where Value may be a copy of `DeleteRef` which flags + % that the Key should be deleted from the view. + lists:foldl(fun(Doc, InfoAcc2) -> + insert_doc(Mrst, Doc, InfoAcc2) + end, InfoAcc1, Docs). + + +insert_doc(_Mrst, #{deleted := true} = _Doc, InfoAcc) -> + InfoAcc; +insert_doc(Mrst, Doc, InfoAcc0) -> + #{ + id := DocId, + results := Results + } = Doc, + + FinalAcc = lists:foldl(fun({View, RawNewRows}, {IdKeyAcc, InfoAcc1}) -> + #mrview{ + id_num = ViewId + } = View, + #{ + views := ViewMaps + } = InfoAcc1, + + DedupedRows = dedupe_rows(View, RawNewRows), + IdKeys = lists:usort([K || {K, _V} <- DedupedRows]), + + OldViewMap = maps:get(ViewId, ViewMaps, #{}), + NewViewMap = lists:foldl(fun({K, V}, ViewMapAcc) -> + maps:put({K, DocId}, V, ViewMapAcc) + end, OldViewMap, DedupedRows), + + {[{ViewId, IdKeys} | IdKeyAcc], InfoAcc1#{ + views := maps:put(ViewId, NewViewMap, ViewMaps) + }} + end, {[], InfoAcc0}, lists:zip(Mrst#mrst.views, Results)), + + {IdRows, #{ids := IdMap} = InfoAcc2} = FinalAcc, + + % Don't store a row in the id_btree if it hasn't got any + % keys that will need to be deleted. + NonEmptyRows = [1 || {_ViewId, Rows} <- IdRows, Rows /= []], + if length(NonEmptyRows) == 0 -> InfoAcc2; true -> + InfoAcc2#{ids := maps:put(DocId, IdRows, IdMap)} + end. + + +update_btree(Tx, BTree, Map, DeleteRef) -> + {ToRemove, ToInsert} = maps:fold(fun(Key, Value, {Keys, Rows}) -> + case Value of + DeleteRef -> {[Key | Keys], Rows}; + _ -> {Keys, [{Key, Value} | Rows]} + end + end, {[], []}, Map), + + lists:foreach(fun(Key) -> + ebtree:delete(Tx, BTree, Key) + end, ToRemove), + + ebtree:insert_multi(Tx, BTree, ToInsert). + + +dedupe_rows(View, KVs0) -> + CollateFun = couch_views_util:collate_fun(View), + KVs1 = lists:sort(fun({KeyA, ValA}, {KeyB, ValB}) -> + case CollateFun({KeyA, <<>>}, {KeyB, <<>>}) of + lt -> true; + eq -> ValA =< ValB; + gt -> false + end + end, KVs0), + dedupe_rows_int(CollateFun, KVs1). + + +dedupe_rows_int(_CollateFun, []) -> + []; + +dedupe_rows_int(_CollateFun, [KV]) -> + [KV]; + +dedupe_rows_int(CollateFun, [{K1, V1} | RestKVs]) -> + RestDeduped = dedupe_rows_int(CollateFun, RestKVs), + case RestDeduped of + [{K2, V2} | RestRestDeduped] -> + case CollateFun({K1, <<>>}, {K2, <<>>}) of + eq -> [{K1, combine_vals(V1, V2)} | RestRestDeduped]; + _ -> [{K1, V1} | RestDeduped] + end; + [] -> + [{K1, V1}] + end. + + +combine_vals(V1, {dups, V2}) -> + {dups, [V1 | V2]}; +combine_vals(V1, V2) -> + {dups, [V1, V2]}. + + +expand_dupes([]) -> + []; +expand_dupes([{K, {dups, Dups}} | Rest]) -> + Expanded = [{K, D} || D <- Dups], + Expanded ++ expand_dupes(Rest); +expand_dupes([{K, V} | Rest]) -> + [{K, V} | expand_dupes(Rest)]. + + +detuple_kvs([]) -> + []; +detuple_kvs([KV | Rest]) -> + {{Key, Id}, Value} = KV, + [[[Key, Id], Value] | detuple_kvs(Rest)]. + + +id_tree_prefix(DbPrefix, Sig) -> + Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ID_TREE}, + erlfdb_tuple:pack(Key, DbPrefix). + + +view_tree_prefix(DbPrefix, Sig, ViewId) -> + Key = {?DB_VIEWS, ?VIEW_TREES, Sig, ?VIEW_ROW_TREES, ViewId}, + erlfdb_tuple:pack(Key, DbPrefix). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +dedupe_basic_test() -> + View = #mrview{}, + ?assertEqual([{1, 1}], dedupe_rows(View, [{1, 1}])). + +dedupe_simple_test() -> + View = #mrview{}, + ?assertEqual([{1, {dups, [1, 2]}}], dedupe_rows(View, [{1, 1}, {1, 2}])). + +-endif. diff --git a/src/couch_views/src/couch_views_updater.erl b/src/couch_views/src/couch_views_updater.erl new file mode 100644 index 000000000..7e5466eb8 --- /dev/null +++ b/src/couch_views/src/couch_views_updater.erl @@ -0,0 +1,111 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. +-module(couch_views_updater). + +-export([ + index/6 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + +% If the doc revision doesn't not match the NewRevId passed here we can ignore +% the document since it is then a conflict document and it doesn't need +% to be indexed. +index(Db, #doc{id = Id, revs = Revs} = Doc, _NewWinner, _OldWinner, NewRevId, + Seq) -> + try + {Depth, [FirstRev | _]} = Revs, + DocRev = {Depth, FirstRev}, + if DocRev /= NewRevId -> ok; true -> + index_int(Db, Doc, Seq) + end + catch + error:{erlfdb_error, ErrCode} when is_integer(ErrCode) -> + Stack = erlang:get_stacktrace(), + DbName = fabric2_db:name(Db), + couch_log:error("Mango index erlfdb error Db ~s Doc ~p ~p", + [DbName, Id, ErrCode]), + erlang:raise(error, {erlfdb_error, ErrCode}, Stack); + Error:Reason -> + DbName = fabric2_db:name(Db), + couch_log:error("Mango index error for Db ~s Doc ~p ~p ~p", + [DbName, Id, Error, Reason]) + end. + + +% Check if design doc is an interactive index and kick off background worker +% to build the new index up to the creation_vs +index_int(Db, #doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>, + deleted = false} = DDoc, Seq) -> + DbName = fabric2_db:name(Db), + + case couch_views_ddoc:is_interactive(DDoc) of + true -> + {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + case couch_views_fdb:get_creation_vs(Db, Mrst) of + not_found -> + couch_views_fdb:new_interactive_index(Db, Mrst, Seq), + {ok, _} = couch_views_jobs:build_view_async(Db, Mrst); + _ -> + ok + end; + false -> + ok + end, + write_doc(Db, DDoc); + + +index_int(Db, #doc{} = Doc, _Seq) -> + write_doc(Db, Doc). + + +write_doc(Db, #doc{deleted = Deleted} = Doc) -> + DbName = fabric2_db:name(Db), + DDocs = couch_views_ddoc:get_interactive_list(Db), + + Result0 = [#{ + id => Doc#doc.id, + results => [], + deleted => Deleted, + doc => Doc + }], + + %% Interactive updates do not update the views update_seq + State = #{ + last_seq => false + }, + + lists:foreach(fun(DDoc) -> + {ok, Mrst0} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + Mrst1 = couch_views_trees:open(Db, Mrst0), + + case should_index_doc(Doc, Mrst1) of + true -> + {Mrst2, Result1} = couch_views_indexer:map_docs(Mrst1, Result0), + DocNumber = couch_views_indexer:write_docs(Db, Mrst2, + Result1, State), + couch_views_plugin:after_interactive_write(Db, Mrst2, + Result1, DocNumber), + couch_eval:release_map_context(Mrst2#mrst.qserver); + false -> + ok + end + end, DDocs). + + +should_index_doc(<<?DESIGN_DOC_PREFIX, _/binary>>, Mrst) -> + lists:keymember(<<"include_design">>, 1, Mrst#mrst.design_opts); + +should_index_doc(_, _) -> + true. diff --git a/src/couch_views/src/couch_views_util.erl b/src/couch_views/src/couch_views_util.erl new file mode 100644 index 000000000..1e3e4beef --- /dev/null +++ b/src/couch_views/src/couch_views_util.erl @@ -0,0 +1,340 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_util). + + +-export([ + ddoc_to_mrst/2, + collate_fun/1, + validate_args/1, + validate_args/2, + is_paginated/1, + active_tasks_info/5 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include("couch_views.hrl"). + + +ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> + MakeDict = fun({Name, {MRFuns}}, DictBySrcAcc) -> + case couch_util:get_value(<<"map">>, MRFuns) of + MapSrc when MapSrc /= undefined -> + RedSrc = couch_util:get_value(<<"reduce">>, MRFuns, null), + {ViewOpts} = couch_util:get_value(<<"options">>, MRFuns, {[]}), + View = case dict:find({MapSrc, ViewOpts}, DictBySrcAcc) of + {ok, View0} -> View0; + error -> #mrview{def=MapSrc, options=ViewOpts} + end, + {MapNames, RedSrcs} = case RedSrc of + null -> + MNames = [Name | View#mrview.map_names], + {MNames, View#mrview.reduce_funs}; + _ -> + RedFuns = [{Name, RedSrc} | View#mrview.reduce_funs], + {View#mrview.map_names, RedFuns} + end, + View2 = View#mrview{map_names=MapNames, reduce_funs=RedSrcs}, + dict:store({MapSrc, ViewOpts}, View2, DictBySrcAcc); + undefined -> + DictBySrcAcc + end; + ({Name, Else}, DictBySrcAcc) -> + couch_log:error("design_doc_to_view_group ~s views ~p", + [Name, Else]), + DictBySrcAcc + end, + {DesignOpts} = proplists:get_value(<<"options">>, Fields, {[]}), + Partitioned = proplists:get_value(<<"partitioned">>, DesignOpts, false), + + {RawViews} = couch_util:get_value(<<"views">>, Fields, {[]}), + BySrc = lists:foldl(MakeDict, dict:new(), RawViews), + + NumViews = fun({_, View}, N) -> + {View#mrview{id_num = N}, N+1} + end, + {Views, _} = lists:mapfoldl(NumViews, 0, lists:sort(dict:to_list(BySrc))), + + Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>), + Lib = couch_util:get_value(<<"lib">>, RawViews, {[]}), + + IdxState = #mrst{ + db_name=DbName, + idx_name=Id, + lib=Lib, + views=Views, + language=Language, + design_opts=DesignOpts, + partitioned=Partitioned + }, + SigInfo = {Views, Language, DesignOpts, couch_index_util:sort_lib(Lib)}, + {ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}. + + +collate_fun(View) -> + #mrview{ + options = Options + } = View, + case couch_util:get_value(<<"collation">>, Options) of + <<"raw">> -> fun collate_raw/2; + _ -> fun collate_rows/2 + end. + + +collate_raw(A, A) -> eq; +collate_raw(A, B) when A < B -> lt; +collate_raw(A, B) when A > B -> gt. + + +collate_rows({KeyA, DocIdA}, {KeyB, DocIdB}) -> + case couch_ejson_compare:less(KeyA, KeyB) of + N when N < 0 -> lt; + 0 when DocIdA < DocIdB -> lt; + 0 when DocIdA == DocIdB -> eq; + 0 -> gt; % when DocIdA > DocIdB + N when N > 0 -> gt + end; + +collate_rows(KeyA, KeyB) -> + % When collating reduce group keys they don't + % come with a docid. + case couch_ejson_compare:less(KeyA, KeyB) of + N when N < 0 -> lt; + 0 -> eq; + N when N > 0 -> gt + end. + + +validate_args(Args) -> + validate_args(Args, []). + + +% This is mostly a copy of couch_mrview_util:validate_args/1 but it doesn't +% update start / end keys and also throws a not_implemented error for reduce +% +validate_args(#mrargs{} = Args, Opts) -> + GroupLevel = determine_group_level(Args), + Reduce = Args#mrargs.reduce, + + case Reduce == undefined orelse is_boolean(Reduce) of + true -> ok; + _ -> mrverror(<<"Invalid `reduce` value.">>) + end, + + case {Args#mrargs.view_type, Reduce} of + {map, true} -> mrverror(<<"Reduce is invalid for map-only views.">>); + _ -> ok + end, + + case {Args#mrargs.view_type, GroupLevel, Args#mrargs.keys} of + {red, exact, _} -> ok; + {red, _, KeyList} when is_list(KeyList) -> + Msg = <<"Multi-key fetchs for reduce views must use `group=true`">>, + mrverror(Msg); + _ -> ok + end, + + case Args#mrargs.keys of + Keys when is_list(Keys) -> ok; + undefined -> ok; + _ -> mrverror(<<"`keys` must be an array of strings.">>) + end, + + case {Args#mrargs.keys, Args#mrargs.start_key, + Args#mrargs.end_key} of + {undefined, _, _} -> ok; + {[], _, _} -> ok; + {[_|_], undefined, undefined} -> ok; + _ -> mrverror(<<"`keys` is incompatible with `key`" + ", `start_key` and `end_key`">>) + end, + + case Args#mrargs.start_key_docid of + undefined -> ok; + SKDocId0 when is_binary(SKDocId0) -> ok; + _ -> mrverror(<<"`start_key_docid` must be a string.">>) + end, + + case Args#mrargs.end_key_docid of + undefined -> ok; + EKDocId0 when is_binary(EKDocId0) -> ok; + _ -> mrverror(<<"`end_key_docid` must be a string.">>) + end, + + case Args#mrargs.direction of + fwd -> ok; + rev -> ok; + _ -> mrverror(<<"Invalid direction.">>) + end, + + case {Args#mrargs.limit >= 0, Args#mrargs.limit == undefined} of + {true, _} -> ok; + {_, true} -> ok; + _ -> mrverror(<<"`limit` must be a positive integer.">>) + end, + + case Args#mrargs.skip < 0 of + true -> mrverror(<<"`skip` must be >= 0">>); + _ -> ok + end, + + case {Args#mrargs.view_type, GroupLevel} of + {red, exact} -> ok; + {_, 0} -> ok; + {red, Int} when is_integer(Int), Int >= 0 -> ok; + {red, _} -> mrverror(<<"`group_level` must be >= 0">>); + {map, _} -> mrverror(<<"Invalid use of grouping on a map view.">>) + end, + + case Args#mrargs.stable of + true -> ok; + false -> ok; + _ -> mrverror(<<"Invalid value for `stable`.">>) + end, + + case Args#mrargs.update of + true -> ok; + false -> ok; + lazy -> ok; + _ -> mrverror(<<"Invalid value for `update`.">>) + end, + + case is_boolean(Args#mrargs.inclusive_end) of + true -> ok; + _ -> mrverror(<<"Invalid value for `inclusive_end`.">>) + end, + + case {Args#mrargs.view_type, Args#mrargs.include_docs} of + {red, true} -> mrverror(<<"`include_docs` is invalid for reduce">>); + {_, ID} when is_boolean(ID) -> ok; + _ -> mrverror(<<"Invalid value for `include_docs`">>) + end, + + case {Args#mrargs.view_type, Args#mrargs.conflicts} of + {_, undefined} -> ok; + {map, V} when is_boolean(V) -> ok; + {red, undefined} -> ok; + {map, _} -> mrverror(<<"Invalid value for `conflicts`.">>); + {red, _} -> mrverror(<<"`conflicts` is invalid for reduce views.">>) + end, + + case is_boolean(Args#mrargs.sorted) of + true -> ok; + _ -> mrverror(<<"Invalid value for `sorted`.">>) + end, + + MaxPageSize = couch_util:get_value(page_size, Opts, 0), + case {Args#mrargs.page_size, MaxPageSize} of + {_, 0} -> ok; + {Value, _} -> validate_limit(<<"page_size">>, Value, 1, MaxPageSize) + end, + + case {Args#mrargs.skip, MaxPageSize} of + {_, 0} -> ok; + {Skip, _} -> validate_limit(<<"skip">>, Skip, 0, MaxPageSize) + end, + + case {is_list(Args#mrargs.keys), is_integer(Args#mrargs.page_size)} of + {true, true} -> + mrverror(<<"`page_size` is incompatible with `keys`">>); + _ -> + ok + end, + + case {Reduce, Args#mrargs.view_type} of + {false, _} -> ok; + {_, red} -> throw(not_implemented); + _ -> ok + end, + + Args#mrargs{group_level=GroupLevel}. + +validate_limit(Name, Value, _Min, _Max) when not is_integer(Value) -> + mrverror(<<"`", Name/binary, "` should be an integer">>); + +validate_limit(Name, Value, Min, Max) when Value > Max -> + range_error_msg(Name, Min, Max); + +validate_limit(Name, Value, Min, Max) when Value < Min -> + range_error_msg(Name, Min, Max); + +validate_limit(_Name, _Value, _Min, _Max) -> + ok. + +range_error_msg(Name, Min, Max) -> + MinBin = list_to_binary(integer_to_list(Min)), + MaxBin = list_to_binary(integer_to_list(Max)), + mrverror(<< + "`", + Name/binary, + "` should be an integer in range [", + MinBin/binary, + " .. ", + MaxBin/binary, + "]" + >>). + + +determine_group_level(#mrargs{group=undefined, group_level=undefined}) -> + 0; + +determine_group_level(#mrargs{group=false, group_level=undefined}) -> + 0; + +determine_group_level(#mrargs{group=false, group_level=Level}) when Level > 0 -> + mrverror(<<"Can't specify group=false and group_level>0 at the same time">>); + +determine_group_level(#mrargs{group=true, group_level=undefined}) -> + exact; + +determine_group_level(#mrargs{group_level=GroupLevel}) -> + GroupLevel. + + +mrverror(Mesg) -> + throw({query_parse_error, Mesg}). + + +is_paginated(#mrargs{page_size = PageSize}) when is_integer(PageSize) -> + true; + +is_paginated(_) -> + false. + + +active_tasks_info(ChangesDone, DbName, DDocId, LastSeq, DBSeq) -> + #{ + <<"type">> => <<"indexer">>, + <<"database">> => DbName, + <<"changes_done">> => ChangesDone, + <<"design_document">> => DDocId, + <<"current_version_stamp">> => convert_seq_to_stamp(LastSeq), + <<"db_version_stamp">> => convert_seq_to_stamp(DBSeq), + <<"node">> => erlang:atom_to_binary(node(), utf8), + <<"pid">> => list_to_binary(pid_to_list(self())) + }. + + +convert_seq_to_stamp(<<"0">>) -> + <<"0-0-0">>; + +convert_seq_to_stamp(undefined) -> + <<"0-0-0">>; + +convert_seq_to_stamp(Seq) -> + {_, Stamp, Batch, DocNumber} = fabric2_fdb:seq_to_vs(Seq), + VS = integer_to_list(Stamp) ++ "-" ++ integer_to_list(Batch) ++ "-" + ++ integer_to_list(DocNumber), + list_to_binary(VS). diff --git a/src/couch_views/test/couch_views_active_tasks_test.erl b/src/couch_views/test/couch_views_active_tasks_test.erl new file mode 100644 index 000000000..b7f36a343 --- /dev/null +++ b/src/couch_views/test/couch_views_active_tasks_test.erl @@ -0,0 +1,171 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_active_tasks_test). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). +-define(INDEX_FOO, <<"_design/foo">>). +-define(INDEX_BAR, <<"_design/bar">>). +-define(TOTAL_DOCS, 1000). + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_ddoc(?INDEX_FOO, ?MAP_FUN1), + Docs = make_docs(?TOTAL_DOCS), + fabric2_db:update_docs(Db, [DDoc | Docs]), + + meck:new(couch_views_batch, [passthrough]), + meck:expect(couch_views_batch, success, fun(_, _) -> 100 end), + + {Db, DDoc}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +active_tasks_test_() -> + { + "Active Tasks test", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(verify_basic_active_tasks), + ?TDEF_FE(verify_muliple_active_tasks) + ] + } + } + }. + + +verify_basic_active_tasks({Db, DDoc}) -> + pause_indexer_for_changes(self()), + couch_views:build_indices(Db, [DDoc]), + {IndexerPid, {changes_done, ChangesDone}} = wait_to_reach_changes(10000), + [ActiveTask] = fabric2_active_tasks:get_active_tasks(), + ChangesDone1 = maps:get(<<"changes_done">>, ActiveTask), + Type = maps:get(<<"type">>, ActiveTask), + DbName = maps:get(<<"database">>, ActiveTask), + DDocId = maps:get(<<"design_document">>, ActiveTask), + Node = maps:get(<<"node">>, ActiveTask), + PidBin = maps:get(<<"pid">>, ActiveTask), + Pid = erlang:list_to_pid(binary_to_list(PidBin)), + ?assertEqual(<<"indexer">>, Type), + ?assertEqual(fabric2_db:name(Db), DbName), + ?assertEqual(?INDEX_FOO, DDocId), + ?assertEqual(atom_to_binary(node(), utf8), Node), + ?assert(is_pid(Pid)), + ?assert(is_process_alive(Pid)), + ?assertEqual(IndexerPid, Pid), + IndexerPid ! continue, + % we assume the indexer has run for a bit so it has to > 0 + ?assert(ChangesDone1 > 0), + ?assert(ChangesDone1 =< ChangesDone), + ?assertEqual(ChangesDone, ?TOTAL_DOCS). + + +verify_muliple_active_tasks({Db, DDoc}) -> + DDoc2 = create_ddoc(?INDEX_BAR, ?MAP_FUN2), + fabric2_db:update_doc(Db, DDoc2, []), + pause_indexer_for_changes(self()), + couch_views:build_indices(Db, [DDoc, DDoc2]), + + {IndexerPid, {changes_done, ChangesDone}} = wait_to_reach_changes(10000), + {IndexerPid2, {changes_done, ChangesDone2}} = wait_to_reach_changes(10000), + + ActiveTasks = fabric2_active_tasks:get_active_tasks(), + + ?assertEqual(length(ActiveTasks), 2), + + IndexerPid ! continue, + IndexerPid2 ! continue, + + ?assertEqual(ChangesDone, ?TOTAL_DOCS), + ?assertEqual(ChangesDone2, ?TOTAL_DOCS). + + +create_ddoc(DDocId, IndexName) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DDocId}, + {<<"views">>, {[ + {IndexName, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}} + ]}} + ]}). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +make_docs(Count) -> + [doc(I, Count) || I <- lists:seq(1, Count)]. + + +pause_indexer_for_changes(ParentPid) -> + meck:new(couch_views_util, [passthrough]), + meck:expect(couch_views_util, active_tasks_info, fun(ChangesDone, + DbName, DDocId, LastSeq, DBSeq) -> + case ChangesDone of + ?TOTAL_DOCS -> + ParentPid ! {self(), {changes_done, ChangesDone}}, + receive continue -> ok end; + _ -> + ok + end, + meck:passthrough([ChangesDone, DbName, DDocId, LastSeq, + DBSeq]) + end). + + +wait_to_reach_changes(Timeout) -> + receive + {Pid, {changes_done, ChangesDone}} when is_pid(Pid) -> + {Pid, {changes_done, ChangesDone}} + after Timeout -> + error(timeout_in_pause_indexer_for_changes) + end. diff --git a/src/couch_views/test/couch_views_batch_test.erl b/src/couch_views/test/couch_views_batch_test.erl new file mode 100644 index 000000000..78e68925e --- /dev/null +++ b/src/couch_views/test/couch_views_batch_test.erl @@ -0,0 +1,90 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_batch_test). + + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +batch_test_() -> + { + "Test view batch sizing", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(basic), + ?TDEF(search_success), + ?TDEF(sense_success), + ?TDEF(failure), + ?TDEF(failure_switches_to_sense) + ]) + } + }. + + +setup() -> + test_util:start_couch(). + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +basic(_) -> + erase(couch_views_batch), + ?assertEqual(100, couch_views_batch:start(#mrst{})). + + +search_success(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + couch_views_batch:success(#mrst{}, ustats(0, 0, 0)), + ?assertEqual(600, couch_views_batch:start(#mrst{})). + + +sense_success(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + % Exceeding our threshold switches from search to sense + couch_views_batch:success(#mrst{}, ustats(5000, 10000000, 10000)), + ?assertEqual(80, couch_views_batch:start(#mrst{})), + couch_views_batch:success(#mrst{}, ustats(0, 0, 0)), + ?assertEqual(180, couch_views_batch:start(#mrst{})). + + +failure(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + couch_views_batch:failure(#mrst{}), + ?assertEqual(50, couch_views_batch:start(#mrst{})). + + +failure_switches_to_sense(_) -> + erase(couch_views_batch), + couch_views_batch:start(#mrst{}), + couch_views_batch:failure(#mrst{}), + couch_views_batch:start(#mrst{}), + couch_views_batch:success(#mrst{}, ustats(0, 0, 0)), + ?assertEqual(150, couch_views_batch:start(#mrst{})). + + +ustats(DocsRead, TxSize, TotalKVs) -> + #{ + docs_read => DocsRead, + tx_size => TxSize, + total_kvs => TotalKVs + }. diff --git a/src/couch_views/test/couch_views_cleanup_test.erl b/src/couch_views/test/couch_views_cleanup_test.erl new file mode 100644 index 000000000..54048c968 --- /dev/null +++ b/src/couch_views/test/couch_views_cleanup_test.erl @@ -0,0 +1,411 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_cleanup_test). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +clean_old_indices_test_() -> + { + "Test cleanup of stale indices", + { + setup, + fun setup_all/0, + fun cleanup_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(empty_db), + ?TDEF_FE(db_with_no_ddocs), + ?TDEF_FE(db_with_ddoc), + ?TDEF_FE(db_with_many_ddocs), + ?TDEF_FE(after_ddoc_deletion), + ?TDEF_FE(all_ddocs_deleted), + ?TDEF_FE(after_ddoc_recreated), + ?TDEF_FE(refcounted_sigs), + ?TDEF_FE(removes_old_jobs), + ?TDEF_FE(after_job_accepted_initial_build), + ?TDEF_FE(after_job_accepted_rebuild), + ?TDEF_FE(during_index_initial_build), + ?TDEF_FE(during_index_rebuild) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]). + + +cleanup_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + Opts = [{user_ctx, ?ADMIN_USER}], + {ok, Db} = fabric2_db:create(?tempdb(), Opts), + Db. + + +cleanup(Db) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_db(Db) -> + ?assertEqual(ok, fabric2_index:cleanup(Db)). + + +db_with_no_ddocs(Db) -> + create_docs(Db, 10), + ?assertEqual(ok, fabric2_index:cleanup(Db)). + + +db_with_ddoc(Db) -> + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + ?assertEqual(10, length(run_query(Db, DDoc))), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + ?assertEqual(10, length(run_query(Db, DDoc))). + + +db_with_many_ddocs(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 5), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + ?assertEqual(ok, fabric2_index:cleanup(Db)). + + +after_ddoc_deletion(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 2), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + [ToDel | RestDDocs] = DDocs, + delete_doc(Db, ToDel), + % Not yet cleaned up + ?assertEqual(true, view_has_data(Db, ToDel)), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + ?assertError({ddoc_deleted, _}, run_query(Db, ToDel)), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, RestDDocs). + + +all_ddocs_deleted(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 5), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + lists:foreach(fun(DDoc) -> + delete_doc(Db, DDoc) + end, DDocs), + % Not yet cleaned up + lists:foreach(fun(DDoc) -> + ?assertEqual(true, view_has_data(Db, DDoc)) + end, DDocs), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + lists:foreach(fun(DDoc) -> + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc)) + end, DDocs). + + +after_ddoc_recreated(Db) -> + create_docs(Db, 10), + DDocs = create_ddocs(Db, 3), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + [ToDel | RestDDocs] = DDocs, + Deleted = delete_doc(Db, ToDel), + % Not yet cleaned up + ?assertEqual(true, view_has_data(Db, ToDel)), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + ?assertError({ddoc_deleted, _}, run_query(Db, ToDel)), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, RestDDocs), + recreate_doc(Db, Deleted), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + lists:foreach(fun(DDoc) -> + ?assertEqual(10, length(run_query(Db, DDoc))) + end, DDocs). + + +refcounted_sigs(Db) -> + create_docs(Db, 10), + DDoc1 = create_ddoc(Db, <<"1">>), + DDoc2 = create_doc(Db, <<"_design/2">>, DDoc1#doc.body), + ?assertEqual(10, length(run_query(Db, DDoc1))), + ?assertEqual(10, length(run_query(Db, DDoc2))), + + ?assertEqual(true, view_has_data(Db, DDoc1)), + ?assertEqual(true, view_has_data(Db, DDoc2)), + + delete_doc(Db, DDoc1), + ok = fabric2_index:cleanup(Db), + + ?assertEqual(true, view_has_data(Db, DDoc1)), + ?assertEqual(true, view_has_data(Db, DDoc2)), + + delete_doc(Db, DDoc2), + ok = fabric2_index:cleanup(Db), + + ?assertEqual(false, view_has_data(Db, DDoc1)), + ?assertEqual(false, view_has_data(Db, DDoc2)). + + +removes_old_jobs(Db) -> + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + + ?assertEqual(10, length(run_query(Db, DDoc))), + ?assertEqual(true, view_has_data(Db, DDoc)), + ?assertEqual(true, job_exists(Db, DDoc)), + + delete_doc(Db, DDoc), + ?assertEqual(ok, fabric2_index:cleanup(Db)), + + ?assertEqual(false, view_has_data(Db, DDoc)), + ?assertEqual(false, job_exists(Db, DDoc)). + + +after_job_accepted_initial_build(Db) -> + cleanup_during_initial_build(Db, fun meck_intercept_job_accept/2). + + +after_job_accepted_rebuild(Db) -> + cleanup_during_rebuild(Db, fun meck_intercept_job_accept/2). + + +during_index_initial_build(Db) -> + cleanup_during_initial_build(Db, fun meck_intercept_job_update/2). + + +during_index_rebuild(Db) -> + cleanup_during_rebuild(Db, fun meck_intercept_job_update/2). + + +cleanup_during_initial_build(Db, InterruptFun) -> + InterruptFun(fabric2_db:name(Db), self()), + + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + + {_, Ref1} = spawn_monitor(fun() -> run_query(Db, DDoc) end), + + receive {JobPid, triggered} -> ok end, + delete_doc(Db, DDoc), + ok = fabric2_index:cleanup(Db), + JobPid ! continue, + + receive {'DOWN', Ref1, _, _, _} -> ok end, + + ok = fabric2_index:cleanup(Db), + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc)), + + ?assertEqual(false, view_has_data(Db, DDoc)), + ?assertEqual(false, job_exists(Db, DDoc)). + + +cleanup_during_rebuild(Db, InterruptFun) -> + create_docs(Db, 10), + DDoc = create_ddoc(Db, <<"foo">>), + ?assertEqual(10, length(run_query(Db, DDoc))), + + InterruptFun(fabric2_db:name(Db), self()), + + create_docs(Db, 10, 10), + + {_, Ref1} = spawn_monitor(fun() -> run_query(Db, DDoc) end), + + receive {JobPid, triggered} -> ok end, + delete_doc(Db, DDoc), + ok = fabric2_index:cleanup(Db), + JobPid ! continue, + + receive {'DOWN', Ref1, _, _, _} -> ok end, + + ok = fabric2_index:cleanup(Db), + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc)), + + ?assertEqual(false, view_has_data(Db, DDoc)), + ?assertEqual(false, job_exists(Db, DDoc)). + + + +run_query(Db, DDocId) when is_binary(DDocId) -> + {ok, DDoc} = fabric2_db:open_doc(Db, <<"_design/", DDocId/binary>>), + run_query(Db, DDoc); + +run_query(Db, DDoc) -> + Fun = fun default_cb/2, + {ok, Result} = couch_views:query(Db, DDoc, <<"bar">>, Fun, [], #{}), + Result. + + +default_cb(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_cb({final, Info}, []) -> + {ok, [Info]}; +default_cb({final, _}, Acc) -> + {ok, Acc}; +default_cb({meta, _}, Acc) -> + {ok, Acc}; +default_cb(ok, ddoc_updated) -> + {ok, ddoc_updated}; +default_cb(Row, Acc) -> + {ok, [Row | Acc]}. + + +view_has_data(Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + SigKeyTuple = {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}, + SigKey = erlfdb_tuple:pack(SigKeyTuple, DbPrefix), + SigVal = erlfdb:wait(erlfdb:get(Tx, SigKey)), + + RangeKeyTuple = {?DB_VIEWS, ?VIEW_TREES, Sig}, + RangeKey = erlfdb_tuple:pack(RangeKeyTuple, DbPrefix), + Range = erlfdb:wait(erlfdb:get_range_startswith(Tx, RangeKey)), + + SigVal /= not_found andalso Range /= [] + end). + + +meck_intercept_job_accept(TgtDbName, ParentPid) -> + meck:new(fabric2_db, [passthrough]), + meck:expect(fabric2_db, open, fun + (DbName, Opts) when DbName == TgtDbName -> + Result = meck:passthrough([DbName, Opts]), + ParentPid ! {self(), triggered}, + receive continue -> ok end, + meck:unload(), + Result; + (DbName, Opts) -> + meck:passthrough([DbName, Opts]) + end). + + +meck_intercept_job_update(_DbName, ParentPid) -> + meck:new(couch_jobs, [passthrough]), + meck:expect(couch_jobs, finish, fun(Tx, Job, Data) -> + ParentPid ! {self(), triggered}, + receive continue -> ok end, + Result = meck:passthrough([Tx, Job, Data]), + meck:unload(), + Result + end). + + +create_ddoc(Db, Id) -> + MapFunFmt = "function(doc) {var f = \"~s\"; emit(doc.val, f)}", + MapFun = io_lib:format(MapFunFmt, [Id]), + Body = {[ + {<<"views">>, {[ + {<<"bar">>, {[{<<"map">>, iolist_to_binary(MapFun)}]}} + ]}} + ]}, + create_doc(Db, <<"_design/", Id/binary>>, Body). + + +recreate_doc(Db, #doc{deleted = true} = Doc) -> + #doc{ + id = DDocId, + body = Body + } = Doc, + create_doc(Db, DDocId, Body). + + +create_ddocs(Db, Count) when is_integer(Count), Count > 1 -> + lists:map(fun(Seq) -> + Id = io_lib:format("~6..0b", [Seq]), + create_ddoc(Db, iolist_to_binary(Id)) + end, lists:seq(1, Count)). + + +create_doc(Db, Id) -> + create_doc(Db, Id, {[{<<"value">>, Id}]}). + + +create_doc(Db, Id, Body) -> + Doc = #doc{ + id = Id, + body = Body + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc), + Doc#doc{revs = {Pos, [Rev]}}. + + +create_docs(Db, Count) -> + create_docs(Db, Count, 0). + + +create_docs(Db, Count, Offset) -> + lists:map(fun(Seq) -> + Id = io_lib:format("~6..0b", [Seq]), + create_doc(Db, iolist_to_binary(Id)) + end, lists:seq(Offset + 1, Offset + Count)). + + +delete_doc(Db, DDoc) -> + #doc{ + revs = {_, [_ | _] = Revs} + } = DDoc, + {ok, {NewPos, Rev}} = fabric2_db:update_doc(Db, DDoc#doc{deleted = true}), + DDoc#doc{ + revs = {NewPos, [Rev | Revs]}, + deleted = true + }. + + +job_exists(Db, DDoc) -> + JobId = job_id(Db, DDoc), + case couch_jobs:get_job_data(Db, ?INDEX_JOB_TYPE, JobId) of + {ok, _} -> true; + {error, not_found} -> false + end. + + +job_id(Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, #mrst{sig = Sig}} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + HexSig = fabric2_util:to_hex(Sig), + <<HexSig/binary, "-", DbName/binary>>. diff --git a/src/couch_views/test/couch_views_encoding_test.erl b/src/couch_views/test/couch_views_encoding_test.erl new file mode 100644 index 000000000..d15f616cb --- /dev/null +++ b/src/couch_views/test/couch_views_encoding_test.erl @@ -0,0 +1,117 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_encoding_test). + +-include_lib("eunit/include/eunit.hrl"). + +val_encoding_test() -> + Values = [ + null, + true, + 1.0, + <<"a">>, + {[{<<"a">>, 1.0}, {<<"b">>, <<"hello">>}]} + ], + lists:foreach(fun (Val) -> + EncVal = couch_views_encoding:encode(Val), + ?assertEqual(Val, couch_views_encoding:decode(EncVal)) + end, Values). + + +setup() -> + % Load the ICU driver for couch_util:get_sort_key/1 + {ok, CfgPid} = gen_server:start_link(config, [], []), + {ok, DrvPid} = gen_server:start_link(couch_drv, [], []), + {CfgPid, DrvPid}. + + +teardown({CfgPid, DrvPid}) -> + unlink(CfgPid), + unlink(DrvPid), + exit(CfgPid, kill), + exit(DrvPid, kill). + + +correct_ordering_test_() -> + { + setup, + fun setup/0, + fun teardown/1, + [ + fun t_correct_ordering/0 + ] + }. + + +t_correct_ordering() -> + ?_test(begin + Ordered = [ + % Special values sort before all other types + null, + false, + true, + + % Then numbers + 1, + 2, + 3.0, + 4, + + % Then text, case sensitive + <<"a">>, + <<"A">>, + <<"aa">>, + <<"b">>, + <<"B">>, + <<"ba">>, + <<"bb">>, + + % Then arrays, compared element by element until different. + % Longer arrays sort after their prefixes + [<<"a">>], + [<<"b">>], + [<<"b">>, <<"c">>], + [<<"b">>, <<"c">>, <<"a">>], + [<<"b">>, <<"d">>], + [<<"b">>, <<"d">>, <<"e">>], + + % Then objects, compared each key value in the list until different. + % Larger objects sort after their subset objects + {[{<<"a">>, 1}]}, + {[{<<"a">>, 2}]}, + {[{<<"b">>, 1}]}, + {[{<<"b">>, 2}]}, + + % Member order does matter for collation + {[{<<"b">>, 2}, {<<"a">>, 1}]}, + {[{<<"b">>, 2}, {<<"c">>, 2}]} + ], + + Encoded = lists:map(fun(Elem) -> + K = couch_views_encoding:encode(Elem, key), + V = couch_views_encoding:encode(Elem, value), + {K, V} + end, Ordered), + Shuffled = shuffle(Encoded), + Reordered = lists:sort(Shuffled), + + lists:foreach(fun({Original, {_K, ViewEncoded}}) -> + ?assertEqual(Original, couch_views_encoding:decode(ViewEncoded)) + end, lists:zip(Ordered, Reordered)) + end). + + +shuffle(List) when is_list(List) -> + Tagged = [{rand:uniform(), Item} || Item <- List], + {_, Randomized} = lists:unzip(lists:sort(Tagged)), + Randomized. diff --git a/src/couch_views/test/couch_views_error_test.erl b/src/couch_views/test/couch_views_error_test.erl new file mode 100644 index 000000000..8b6399e0e --- /dev/null +++ b/src/couch_views/test/couch_views_error_test.erl @@ -0,0 +1,102 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_error_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + +-define(USER, "chttpd_db_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). + + +error_test_() -> + { + "Test views report errors", + { + setup, + fun setup/0, + fun teardown/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(view_reports_error) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + chttpd, + couch_jobs, + couch_js, + couch_views + ]), + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + Ctx. + + +teardown(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DbName = fabric2_db:name(Db), + Url = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(DbName)]), + {Db, Url}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +view_reports_error({Db, Url}) -> + meck:new(couch_views_batch, [passthrough]), + meck:expect(couch_views_batch, start, fun(_) -> + erlang:error({erlfdb_error, 2101}) + end), + + {ok, _} = fabric2_db:update_doc(Db, ddoc(), []), + + ViewUrl = lists:concat([Url, "/_design/foo/_view/bar"]), + {ok, Status, _Headers, Body} = test_request:get(ViewUrl, [?AUTH]), + + ?assertEqual(500, Status), + {Props} = couch_util:json_decode(Body), + {<<"error">>, Error} = lists:keyfind(<<"error">>, 1, Props), + ?assertEqual(<<"foundationdb_error">>, Error). + + +ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/foo">>}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"bar">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.value, doc.value);}">>} + ]}} + ]}} + ]}). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl new file mode 100644 index 000000000..75be2459f --- /dev/null +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -0,0 +1,699 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_indexer_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). + + +indexer_test_() -> + { + "Test view indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(indexed_empty_db), + ?TDEF_FE(indexed_single_doc), + ?TDEF_FE(updated_docs_are_reindexed), + ?TDEF_FE(updated_docs_without_changes_are_reindexed), + ?TDEF_FE(deleted_docs_not_indexed), + ?TDEF_FE(deleted_docs_are_unindexed), + ?TDEF_FE(multipe_docs_with_same_key), + ?TDEF_FE(multipe_keys_from_same_doc), + ?TDEF_FE(multipe_identical_keys_from_same_doc), + ?TDEF_FE(fewer_multipe_identical_keys_from_same_doc), + ?TDEF_FE(multiple_design_docs), + ?TDEF_FE(handle_size_key_limits), + ?TDEF_FE(handle_size_value_limits), + ?TDEF_FE(index_autoupdater_callback), + ?TDEF_FE(handle_db_recreated_when_running), + ?TDEF_FE(handle_db_recreated_after_finished), + ?TDEF_FE(index_can_recover_from_crash, 60) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + config:delete("couch_views", "change_limit"), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +indexed_empty_db(Db) -> + DDoc = create_ddoc(), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + ?assertEqual({ok, []}, run_query(Db, DDoc, ?MAP_FUN1)). + + +indexed_single_doc(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out). + + +updated_docs_are_reindexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out1), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out2} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 1, 1)], Out2), + + % Check that our id index is updated properly + % as well. + DbName = fabric2_db:name(Db), + {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>), + ?assertEqual({<<"0">>, [{1, []}, {0, [1]}]}, IdRow) + end). + + +updated_docs_without_changes_are_reindexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out1), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 0}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + {ok, Out2} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"0">>, 0, 0)], Out2), + + % Check fdb directly to make sure we've also + % removed the id idx keys properly. + DbName = fabric2_db:name(Db), + {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>), + ?assertEqual({<<"0">>, [{1, []}, {0, [0]}]}, IdRow) + end). + + +deleted_docs_not_indexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + deleted = true, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + ?assertEqual({ok, []}, run_query(Db, DDoc, ?MAP_FUN1)). + + +deleted_docs_are_unindexed(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out1), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + deleted = true, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + + ?assertEqual({ok, []}, run_query(Db, DDoc, ?MAP_FUN1)), + + % Check fdb directly to make sure we've also + % removed the id idx keys properly. + DbName = fabric2_db:name(Db), + {ok, Mrst0} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + Mrst1 = couch_views_trees:open(TxDb, Mrst0), + IdRow = ebtree:lookup(Tx, Mrst1#mrst.id_btree, <<"0">>), + ?assertEqual(false, IdRow) + end). + + +multipe_docs_with_same_key(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0, 1), + Doc2 = doc(1, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_docs(Db, [Doc1, Doc2], []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"1">>, 1, 1) + ], Out). + + +multipe_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_different), + Doc = doc(0, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"0">>, <<"0">>, <<"0">>) + ], Out). + + +multipe_identical_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_same), + Doc = doc(0, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2) + ], Out). + + +fewer_multipe_identical_keys_from_same_doc(Db) -> + DDoc = create_ddoc(multi_emit_same), + Doc0 = #doc{ + id = <<"0">>, + body = {[{<<"val">>, 1}, {<<"extra">>, 3}]} + }, + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc0, []), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2), + row(<<"0">>, 1, 3) + ], Out1), + + Doc1 = #doc{ + id = <<"0">>, + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out2} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([ + row(<<"0">>, 1, 1), + row(<<"0">>, 1, 2) + ], Out2). + + +handle_size_key_limits(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun + ("couch_views", "key_size_limit", _Default) -> 15; + (_Section, _Key, Default) -> Default + end), + + DDoc = create_ddoc(multi_emit_key_limit), + Docs = [doc(1, 2)] ++ [doc(2, 1)], + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([row(<<"1">>, 2, 2)], Out), + + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + Doc2 = Doc#doc { + body = {[{<<"val">>, 2}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual([ + row(<<"1">>, 2, 2), + row(<<"2">>, 2, 2) + ], Out1). + + +handle_size_value_limits(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun + ("couch_views", "value_size_limit", _Default) -> 15; + (_Section, _Key, Default) -> Default + end), + + DDoc = create_ddoc(multi_emit_key_limit), + Docs = [doc(1, 2)] ++ [doc(2, 3)], + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN2), + + ?assertEqual([ + row(<<"1">>, 2, 2), + row(<<"2">>, 3, 3), + row(<<"1">>, 22, 2), + row(<<"2">>, 23, 3) + ], Out), + + {ok, Doc} = fabric2_db:open_doc(Db, <<"1">>), + Doc2 = Doc#doc{ + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN2), + + ?assertEqual([ + row(<<"2">>, 3, 3), + row(<<"2">>, 23, 3) + ], Out1). + + +index_autoupdater_callback(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + DbSeq = fabric2_db:get_update_seq(Db), + + Result = couch_views:build_indices(Db, [DDoc]), + ?assertMatch([{ok, <<_/binary>>}], Result), + [{ok, JobId}] = Result, + + ?assertEqual(ok, couch_views_jobs:wait_for_job(JobId, DDoc#doc.id, DbSeq)). + + +multiple_design_docs(Db) -> + Cleanup = fun() -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + ok = couch_views:cleanup_indices(TxDb, DDocs) + end) + end, + + DDoc1 = create_ddoc(simple, <<"_design/bar1">>), + DDoc2 = create_ddoc(simple, <<"_design/bar2">>), + + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), + ?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc1, ?MAP_FUN1)), + + % Because run_query/3 can return, and unsubscribe from the job, + % before it actually finishes, ensure we wait for the job to + % finish so we get a deterministic setup every time. + JobId = get_job_id(Db, DDoc1), + ?assertEqual(ok, wait_job_finished(JobId, 5000)), + + % Add the second ddoc with same view as first one. + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, DDoc2, []), + + DDoc1Del = DDoc1#doc{revs = {Pos1, [Rev1]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc1Del, []), + + Cleanup(), + + % Assert that no updates are applied + meck:new(couch_views_fdb, [passthrough]), + meck:expect(couch_views_trees, update_views, fun(TxDb, Mrst, Docs) -> + case Docs of + [] -> meck:passthrough([TxDb, Mrst, Docs]); + [_ | _] -> erlang:error(update_triggered) + end + end), + ?assertEqual({ok, [row(<<"0">>, 0, 0)]}, run_query(Db, DDoc2, ?MAP_FUN1)), + ?assertEqual(ok, wait_job_finished(JobId, 5000)), + + DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), + + Cleanup(), + + % After the last ddoc is deleted we should get an error + ?assertError({ddoc_deleted, _}, run_query(Db, DDoc2, ?MAP_FUN1)). + + +handle_db_recreated_when_running(Db) -> + DbName = fabric2_db:name(Db), + + DDoc = create_ddoc(), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, _} = fabric2_db:update_doc(Db, doc(1), []), + + % To intercept job building while it is running ensure updates happen one + % row at a time. + config:set("couch_views", "batch_initial_size", "1", false), + + meck_intercept_job_update(self()), + + [{ok, JobId}] = couch_views:build_indices(Db, [DDoc]), + + {Indexer, _Job, _Data} = wait_indexer_update(10000), + + {ok, State} = couch_jobs:get_job_state(undefined, ?INDEX_JOB_TYPE, JobId), + ?assertEqual(running, State), + + {ok, SubId, running, _} = couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId), + + ok = fabric2_db:delete(DbName, []), + {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), + + Indexer ! continue, + + ?assertMatch({ + ?INDEX_JOB_TYPE, + JobId, + finished, + #{<<"error">> := <<"db_deleted">>} + }, couch_jobs:wait(SubId, infinity)), + + {ok, _} = fabric2_db:update_doc(Db1, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db1, doc(2), []), + {ok, _} = fabric2_db:update_doc(Db1, doc(3), []), + + reset_intercept_job_update(Indexer), + + {ok, Out2} = run_query(Db1, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"2">>, 2, 2), + row(<<"3">>, 3, 3) + ], Out2). + + +handle_db_recreated_after_finished(Db) -> + DbName = fabric2_db:name(Db), + + DDoc = create_ddoc(), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, _} = fabric2_db:update_doc(Db, doc(1), []), + + {ok, Out1} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"0">>, 0, 0), + row(<<"1">>, 1, 1) + ], Out1), + + ok = fabric2_db:delete(DbName, []), + + ?assertError(database_does_not_exist, run_query(Db, DDoc, ?MAP_FUN1)), + + {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), + + {ok, _} = fabric2_db:update_doc(Db1, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db1, doc(2), []), + {ok, _} = fabric2_db:update_doc(Db1, doc(3), []), + + ?assertError(database_does_not_exist, run_query(Db, DDoc, ?MAP_FUN1)), + + {ok, Out2} = run_query(Db1, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"2">>, 2, 2), + row(<<"3">>, 3, 3) + ], Out2). + + +index_can_recover_from_crash(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> + case Section == "couch_views" andalso Key == "change_limit" of + true -> 1; + _ -> Default + end + end), + meck:new(couch_eval, [passthrough]), + meck:expect(couch_eval, map_docs, fun(State, Docs) -> + Doc = hd(Docs), + case Doc#doc.id == <<"2">> of + true -> + % remove the mock so that next time the doc is processed + % it will work + meck:unload(couch_eval), + throw({fake_crash, test_jobs_restart}); + false -> + meck:passthrough([State, Docs]) + end + end), + + DDoc = create_ddoc(), + Docs = make_docs(3), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_docs(Db, Docs, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([ + row(<<"1">>, 1, 1), + row(<<"2">>, 2, 2), + row(<<"3">>, 3, 3) + ], Out). + + +row(Id, Key, Value) -> + {row, [ + {id, Id}, + {key, Key}, + {value, Value} + ]}. + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. + + +create_ddoc() -> + create_ddoc(simple). + + +create_ddoc(Type) -> + create_ddoc(Type, <<"_design/bar">>). + + +create_ddoc(simple, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_different, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc._id, doc._id); " + "emit(doc.val, doc.val); " + "}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_same, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val, doc.val * 2); " + "emit(doc.val, doc.val); " + "if(doc.extra) {" + " emit(doc.val, doc.extra);" + "}" + "}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_key_limit, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) { " + "if (doc.val === 1) { " + "emit('a very long string to be limited', doc.val);" + "} else {" + "emit(doc.val, doc.val)" + "}" + "}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val + 20, doc.val);" + "if (doc.val === 1) { " + "emit(doc.val, 'a very long string to be limited');" + "} else {" + "emit(doc.val, doc.val)" + "}" + "}">>} + ]}} + ]}} + ]}). + + +make_docs(Count) -> + [doc(I) || I <- lists:seq(1, Count)]. + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). + + +get_job_id(#{} = Db, DDoc) -> + DbName = fabric2_db:name(Db), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + couch_views_jobs:job_id(Db, Mrst). + + +wait_job_finished(JobId, Timeout) -> + case couch_jobs:subscribe(?INDEX_JOB_TYPE, JobId) of + {ok, Sub, _, _} -> + case couch_jobs:wait(Sub, finished, Timeout) of + {?INDEX_JOB_TYPE, _, _, _} -> ok; + timeout -> timeout + end; + {ok, finished, _} -> + ok + end. + + +meck_intercept_job_update(ParentPid) -> + meck:new(couch_jobs, [passthrough]), + meck:expect(couch_jobs, update, fun(Db, Job, Data) -> + ParentPid ! {self(), Job, Data}, + receive continue -> ok end, + meck:passthrough([Db, Job, Data]) + end). + + +reset_intercept_job_update(IndexerPid) -> + meck:expect(couch_jobs, update, fun(Db, Job, Data) -> + meck:passthrough([Db, Job, Data]) + end), + IndexerPid ! continue. + + +wait_indexer_update(Timeout) -> + receive + {Pid, Job, Data} when is_pid(Pid) -> {Pid, Job, Data} + after Timeout -> + error(timeout_in_wait_indexer_update) + end. diff --git a/src/couch_views/test/couch_views_info_test.erl b/src/couch_views/test/couch_views_info_test.erl new file mode 100644 index 000000000..993801a0d --- /dev/null +++ b/src/couch_views/test/couch_views_info_test.erl @@ -0,0 +1,174 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_info_test). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DDoc = create_ddoc(), + Doc1 = doc(0, 1), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + run_query(Db, DDoc, ?MAP_FUN1), + {Db, DDoc}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +views_info_test_() -> + { + "Views index info test", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(sig_is_binary), + ?TDEF_FE(language_is_js), + ?TDEF_FE(update_seq_is_binary), + ?TDEF_FE(updater_running_is_boolean), + ?TDEF_FE(active_size_is_non_neg_int), + ?TDEF_FE(update_opts_is_bin_list) + ] + } + } + }. + + +sig_is_binary({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assert(is_binary(prop(signature, Info))). + + +language_is_js({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assertEqual(<<"javascript">>, prop(language, Info)). + + +active_size_is_non_neg_int({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assert(check_non_neg_int([sizes, active], Info)). + + +updater_running_is_boolean({Db, DDoc}) -> + meck:new(couch_jobs, [passthrough]), + + meck:expect(couch_jobs, get_job_state, 3, meck:val({ok, running})), + {ok, Info1} = couch_views:get_info(Db, DDoc), + ?assert(prop(updater_running, Info1)), + + meck:expect(couch_jobs, get_job_state, 3, meck:val({ok, pending})), + {ok, Info2} = couch_views:get_info(Db, DDoc), + ?assert(prop(updater_running, Info2)), + + meck:expect(couch_jobs, get_job_state, 3, meck:val({ok, finished})), + {ok, Info3} = couch_views:get_info(Db, DDoc), + ?assert(not prop(updater_running, Info3)), + + meck:expect(couch_jobs, get_job_state, 3, meck:val({error, not_found})), + {ok, Info4} = couch_views:get_info(Db, DDoc), + ?assert(not prop(updater_running, Info4)). + + +update_seq_is_binary({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + ?assert(is_binary(prop(update_seq, Info))). + + +update_opts_is_bin_list({Db, DDoc}) -> + {ok, Info} = couch_views:get_info(Db, DDoc), + Opts = prop(update_options, Info), + ?assert(is_list(Opts) andalso + (Opts == [] orelse lists:all([is_binary(B) || B <- Opts]))). + + +check_non_neg_int(Key, Info) -> + Size = prop(Key, Info), + is_integer(Size) andalso Size >= 0. + + +prop(Key, {Props}) when is_list(Props) -> + prop(Key, Props); + +prop([Key], Info) -> + prop(Key, Info); + +prop([Key | Rest], Info) -> + prop(Rest, prop(Key, Info)); + +prop(Key, Info) when is_atom(Key), is_list(Info) -> + couch_util:get_value(Key, Info). + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}} + ]}} + ]}). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; + +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; + +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). diff --git a/src/couch_views/test/couch_views_map_test.erl b/src/couch_views/test/couch_views_map_test.erl new file mode 100644 index 000000000..c419546e1 --- /dev/null +++ b/src/couch_views/test/couch_views_map_test.erl @@ -0,0 +1,610 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_map_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("couch_views.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +setup() -> + test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]). + + +teardown(State) -> + test_util:stop_couch(State). + + +map_views_test_() -> + { + "Map views", + { + setup, + fun setup/0, + fun teardown/1, + [ + ?TDEF(should_map), + ?TDEF(should_map_with_startkey), + ?TDEF(should_map_with_endkey), + ?TDEF(should_map_with_endkey_not_inclusive), + ?TDEF(should_map_reverse_and_limit), + ?TDEF(should_map_with_range_reverse), + ?TDEF(should_map_with_limit_and_skip), + ?TDEF(should_map_with_limit_and_skip_reverse), + ?TDEF(should_map_with_include_docs), + ?TDEF(should_map_with_include_docs_reverse), + ?TDEF(should_map_with_startkey_with_key_array), + ?TDEF(should_map_with_startkey_and_endkey_with_key_array), + ?TDEF(should_map_empty_views), + ?TDEF(should_map_duplicate_keys), + ?TDEF(should_map_with_doc_emit), + ?TDEF(should_map_update_is_false), + ?TDEF(should_map_update_is_lazy), + ?TDEF(should_map_wait_for_interactive), + ?TDEF(should_map_local_seq) + % fun should_give_ext_size_seq_indexed_test/1 + ] + } + }. + + +should_map() -> + Result = run_query(<<"baz">>, #{}), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_startkey() -> + Result = run_query(<<"baz">>, #{start_key => 4}), + Expect = {ok, [ + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_endkey() -> + Result = run_query(<<"baz">>, #{end_key => 5}), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_endkey_not_inclusive() -> + Result = run_query(<<"baz">>, #{ + end_key => 5, + inclusive_end => false + }), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_reverse_and_limit() -> + Result = run_query(<<"baz">>, #{ + direction => rev, + limit => 3 + }), + Expect = {ok, [ + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_range_reverse() -> + Result = run_query(<<"baz">>, #{ + direction => rev, + start_key => 5, + end_key => 3, + inclusive_end => true + }), + Expect = {ok, [ + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"4">>}, {key, 4}, {value, 4}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_limit_and_skip() -> + Result = run_query(<<"baz">>, #{ + start_key => 2, + limit => 3, + skip => 3 + }), + Expect = {ok, [ + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_limit_and_skip_reverse() -> + Result = run_query(<<"baz">>, #{ + start_key => 10, + limit => 3, + skip => 3, + direction => rev + }), + Expect = {ok, [ + {row, [{id, <<"7">>}, {key, 7}, {value, 7}]}, + {row, [{id, <<"6">>}, {key, 6}, {value, 6}]}, + {row, [{id, <<"5">>}, {key, 5}, {value, 5}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_include_docs() -> + Result = run_query(<<"baz">>, #{ + start_key => 8, + end_key => 8, + include_docs => true + }), + Doc = {[ + {<<"_id">>, <<"8">>}, + {<<"_rev">>, <<"1-55b9a29311341e07ec0a7ca13bc1b59f">>}, + {<<"val">>, 8} + ]}, + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}, {doc, Doc}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_include_docs_reverse() -> + Result = run_query(<<"baz">>, #{ + start_key => 8, + end_key => 8, + include_docs => true, + direction => rev + }), + Doc = {[ + {<<"_id">>, <<"8">>}, + {<<"_rev">>, <<"1-55b9a29311341e07ec0a7ca13bc1b59f">>}, + {<<"val">>, 8} + ]}, + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}, {doc, Doc}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_with_startkey_with_key_array() -> + Rows = [ + {row, [{id, <<"4">>}, {key, [<<"4">>, 4]}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, [<<"5">>, 5]}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, [<<"6">>, 6]}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, [<<"7">>, 7]}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, [<<"8">>, 8]}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, [<<"9">>, 9]}, {value, 9}]} + ], + + Result = run_query(<<"boom">>, #{ + start_key => [<<"4">>] + }), + + ?assertEqual({ok, Rows}, Result), + + ResultRev = run_query(<<"boom">>, #{ + start_key => [<<"9">>, 9], + direction => rev, + limit => 6 + }), + + ?assertEqual({ok, lists:reverse(Rows)}, ResultRev). + + +should_map_with_startkey_and_endkey_with_key_array() -> + Rows1 = [ + {row, [{id, <<"4">>}, {key, [<<"4">>, 4]}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, [<<"5">>, 5]}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, [<<"6">>, 6]}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, [<<"7">>, 7]}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, [<<"8">>, 8]}, {value, 8}]} + ], + + Rows2 = [ + {row, [{id, <<"4">>}, {key, [<<"4">>, 4]}, {value, 4}]}, + {row, [{id, <<"5">>}, {key, [<<"5">>, 5]}, {value, 5}]}, + {row, [{id, <<"6">>}, {key, [<<"6">>, 6]}, {value, 6}]}, + {row, [{id, <<"7">>}, {key, [<<"7">>, 7]}, {value, 7}]}, + {row, [{id, <<"8">>}, {key, [<<"8">>, 8]}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, [<<"9">>, 9]}, {value, 9}]} + ], + + Result = run_query(<<"boom">>, #{ + start_key => [<<"4">>], + end_key => [<<"8">>, []] + }), + + ?assertEqual({ok, Rows1}, Result), + + ResultRev = run_query(<<"boom">>, #{ + start_key => [<<"8">>, []], + end_key => [<<"4">>], + direction => rev + }), + + ?assertEqual({ok, lists:reverse(Rows1)}, ResultRev), + + ResultRev2 = run_query(<<"boom">>, #{ + start_key => [<<"9">>, 9], + end_key => [<<"4">>], + direction => rev, + inclusive_end => false + }), + + % Here, [<<"4">>] is less than [<<"4">>, 4] so we + % expect rows 9-4 + ?assertEqual({ok, lists:reverse(Rows2)}, ResultRev2), + + ResultRev3 = run_query(<<"boom">>, #{ + start_key => [<<"9">>, 9], + end_key => [<<"4">>, 4], + direction => rev, + inclusive_end => false + }), + + % Here, specifying [<<"4">>, 4] as the key will prevent + % us from including that row which leaves rows 9-5 + ?assertEqual({ok, lists:reverse(lists:nthtail(1, Rows2))}, ResultRev3). + + +should_map_empty_views() -> + Result = run_query(<<"bing">>, #{}), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_map_with_doc_emit() -> + Result = run_query(<<"doc_emit">>, #{ + start_key => 8, + limit => 1 + }), + Doc = {[ + {<<"_id">>, <<"8">>}, + {<<"_rev">>, <<"1-55b9a29311341e07ec0a7ca13bc1b59f">>}, + {<<"val">>, 8} + ]}, + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, Doc}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_duplicate_keys() -> + Result = run_query(<<"duplicate_keys">>, #{ + limit => 6 + }), + Expect = {ok, [ + {row, [{id, <<"1">>}, {key, <<"1">>}, {value, 1}]}, + {row, [{id, <<"1">>}, {key, <<"1">>}, {value, 2}]}, + {row, [{id, <<"10">>}, {key, <<"10">>}, {value, 10}]}, + {row, [{id, <<"10">>}, {key, <<"10">>}, {value, 11}]}, + {row, [{id, <<"2">>}, {key, <<"2">>}, {value, 2}]}, + {row, [{id, <<"2">>}, {key, <<"2">>}, {value, 3}]} + ]}, + ?assertEqual(Expect, Result). + + +should_map_update_is_false() -> + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + + Expect1 = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]}, + {row, [{id, <<"11">>}, {key, 11}, {value, 11}]} + ]}, + + Idx = <<"baz">>, + DbName = ?tempdb(), + + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_ddoc(), + Docs = make_docs(10), + fabric2_db:update_docs(Db, [DDoc | Docs]), + + Args1 = #{ + start_key => 8 + }, + + Result1 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args1), + ?assertEqual(Expect, Result1), + + Doc = doc(11), + fabric2_db:update_doc(Db, Doc), + + Args2 = #{ + start_key => 8, + update => false + }, + + Result2 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args2), + ?assertEqual(Expect, Result2), + + Result3 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args1), + ?assertEqual(Expect1, Result3). + + +should_map_update_is_lazy() -> + Expect = {ok, [ + {row, [{id, <<"8">>}, {key, 8}, {value, 8}]}, + {row, [{id, <<"9">>}, {key, 9}, {value, 9}]}, + {row, [{id, <<"10">>}, {key, 10}, {value, 10}]} + ]}, + + Idx = <<"baz">>, + DbName = ?tempdb(), + + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_ddoc(), + Docs = make_docs(10), + + fabric2_db:update_docs(Db, [DDoc | Docs]), + + Args1 = #{ + start_key => 8, + update => lazy + }, + + Result1 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args1), + ?assertEqual({ok, []}, Result1), + + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + JobId = couch_views_jobs:job_id(Db, Mrst), + UpdateSeq = fabric2_db:get_update_seq(Db), + ok = couch_views_jobs:wait_for_job(JobId, DDoc#doc.id, UpdateSeq), + + Args2 = #{ + start_key => 8, + update => false + }, + + Result2 = couch_views:query(Db, DDoc, Idx, fun default_cb/2, + [], Args2), + ?assertEqual(Expect, Result2). + + +should_map_wait_for_interactive() -> + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_interactive_ddoc(), + Docs = make_docs(101), + + fabric2_db:update_docs(Db, Docs), + fabric2_db:update_docs(Db, [DDoc]), + + Result = couch_views:query(Db, DDoc, <<"idx_01">>, fun default_cb/2, [], + #{limit => 3}), + ?assertEqual({ok, [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]} + ]}, Result). + + +should_map_local_seq() -> + ExpectedTrue = [ + {row, [{id, <<"1">>}, {key, 1}, {value, 1}]}, + {row, [{id, <<"2">>}, {key, 2}, {value, 2}]}, + {row, [{id, <<"3">>}, {key, 3}, {value, 3}]} + ], + check_local_seq(true, ExpectedTrue), + + ExpectedFalse = [], + check_local_seq(false, ExpectedFalse), + + Error = {bad_request,invalid_design_doc, + <<"`options.local_seq` field must have boolean type">>}, + ?assertThrow(Error, check_local_seq(something_else, null)). + + +check_local_seq(Val, Expected) -> + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_local_seq_ddoc(Val), + Docs = make_docs(5), + fabric2_db:update_docs(Db, [DDoc | Docs]), + + {ok, Result} = couch_views:query(Db, DDoc, <<"idx_01">>, fun default_cb/2, [], + #{limit => 3}), + + ?assertEqual(Expected, Result). + + +% should_give_ext_size_seq_indexed_test(Db) -> +% DDoc = couch_doc:from_json_obj({[ +% {<<"_id">>, <<"_design/seqdoc">>}, +% {<<"options">>, {[{<<"seq_indexed">>, true}]}}, +% {<<"views">>, {[ +% {<<"view1">>, {[ +% {<<"map">>, <<"function(doc){emit(doc._id, doc._id);}">>} +% ]}} +% ]} +% } +% ]}), +% {ok, _} = couch_db:update_doc(Db, DDoc, []), +% {ok, Db1} = couch_db:open_int(couch_db:name(Db), []), +% {ok, DDoc1} = couch_db:open_doc(Db1, <<"_design/seqdoc">>, [ejson_body]), +% couch_mrview:query_view(Db1, DDoc1, <<"view1">>, [{update, true}]), +% {ok, Info} = couch_mrview:get_info(Db1, DDoc), +% Size = couch_util:get_nested_json_value({Info}, [sizes, external]), +% ok = couch_db:close(Db1), +% ?assert(is_number(Size)). + + +run_query(Idx, Args) -> + run_query(Idx, Args, false). + + +run_query(Idx, Args, DebugCluster) -> + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + DDoc = create_ddoc(), + Docs = make_docs(10), + fabric2_db:update_docs(Db, [DDoc | Docs]), + if not DebugCluster -> ok; true -> + couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], #{}), + fabric2_fdb:debug_cluster(), + ok + end, + couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], Args). + + +default_cb(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_cb({final, Info}, []) -> + {ok, [Info]}; +default_cb({final, _}, Acc) -> + {ok, Acc}; +default_cb({meta, _}, Acc) -> + {ok, Acc}; +default_cb(ok, ddoc_updated) -> + {ok, ddoc_updated}; +default_cb(Row, Acc) -> + {ok, [Row | Acc]}. + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"baz">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"boom">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit([doc.val.toString(), doc.val], doc.val);\n" + "}" + >>} + ]}}, + {<<"bing">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}}, + {<<"doc_emit">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc)}">>} + ]}}, + {<<"duplicate_keys">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit(doc._id, doc.val);\n" + " emit(doc._id, doc.val + 1);\n" + "}">>} + ]}}, + {<<"zing">>, {[ + {<<"map">>, << + "function(doc) {\n" + " if(doc.foo !== undefined)\n" + " emit(doc.foo, 0);\n" + "}" + >>} + ]}} + ]}} + ]}). + +create_interactive_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc_interactive">>}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, << + "function(doc) {" + "if (doc.val) {" + "emit(doc.val, doc.val);" + "}" + "}">>} + ]}} + ]}}, + {<<"autoupdate">>, false}, + {<<"interactive">>, true} + ]}). + + +create_local_seq_ddoc(Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc_local_seq">>}, + {<<"options">>, {[{<<"local_seq">>, Val}]}}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, << + "function(doc) {" + "if (doc._local_seq) {" + "emit(doc.val, doc.val);" + "}" + "}">>} + ]}} + ]}} + ]}). + + +make_docs(Count) -> + [doc(I) || I <- lists:seq(1, Count)]. + + +doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Id} + ]}). diff --git a/src/couch_views/test/couch_views_red_test.erl b/src/couch_views/test/couch_views_red_test.erl new file mode 100644 index 000000000..707611f6e --- /dev/null +++ b/src/couch_views/test/couch_views_red_test.erl @@ -0,0 +1,745 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_red_test). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). +-include("couch_views.hrl"). + + +-define(NUM_DOCS, 2000). + + +reduce_views_shraed_db_test_() -> + { + "Reduce views", + { + setup, + fun setup_db_with_docs/0, + fun teardown_db/1, + with([ + ?TDEF(should_reduce), + ?TDEF(should_reduce_rev), + ?TDEF(should_reduce_start_key), + ?TDEF(should_reduce_start_key_rev), + ?TDEF(should_reduce_end_key), + ?TDEF(should_reduce_end_key_rev), + ?TDEF(should_reduce_inclusive_end_false), + ?TDEF(should_reduce_inclusive_end_false_rev), + ?TDEF(should_reduce_start_and_end_key), + ?TDEF(should_reduce_start_and_end_key_rev), + ?TDEF(should_reduce_empty_range), + ?TDEF(should_reduce_empty_range_rev), + ?TDEF(should_reduce_grouped), + ?TDEF(should_reduce_grouped_rev), + ?TDEF(should_reduce_grouped_start_key), + ?TDEF(should_reduce_grouped_start_key_rev), + ?TDEF(should_reduce_grouped_end_key), + ?TDEF(should_reduce_grouped_end_key_rev), + ?TDEF(should_reduce_grouped_inclusive_end_false), + ?TDEF(should_reduce_grouped_inclusive_end_false_rev), + ?TDEF(should_reduce_grouped_start_and_end_key), + ?TDEF(should_reduce_grouped_start_and_end_key_rev), + ?TDEF(should_reduce_grouped_empty_range), + ?TDEF(should_reduce_grouped_empty_range_rev), + ?TDEF(should_reduce_array_keys), + ?TDEF(should_reduce_grouped_array_keys), + ?TDEF(should_reduce_group_1_array_keys), + ?TDEF(should_reduce_group_1_array_keys_start_key), + ?TDEF(should_reduce_group_1_array_keys_start_key_rev), + ?TDEF(should_reduce_group_1_array_keys_end_key), + ?TDEF(should_reduce_group_1_array_keys_end_key_rev), + ?TDEF(should_reduce_group_1_array_keys_inclusive_end_false), + ?TDEF(should_reduce_group_1_array_keys_inclusive_end_false_rev), + ?TDEF(should_reduce_group_1_array_keys_start_and_end_key), + ?TDEF(should_reduce_group_1_array_keys_start_and_end_key_rev), + ?TDEF(should_reduce_group_1_array_keys_sub_array_select), + ?TDEF(should_reduce_group_1_array_keys_sub_array_select_rev), + ?TDEF(should_reduce_group_1_array_keys_sub_array_inclusive_end), + ?TDEF(should_reduce_group_1_array_keys_empty_range), + ?TDEF(should_reduce_group_1_array_keys_empty_range_rev) + ]) + } + }. + + +reduce_views_collation_test_() -> + { + "Reduce collation test", + { + setup, + fun setup_db/0, + fun teardown_db/1, + with([ + ?TDEF(should_collate_group_keys) + ]) + } + }. + + +setup_db() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +setup_db_with_docs() -> + {Db, Ctx} = setup_db(), + fabric2_db:update_docs(Db, [create_ddoc()]), + make_docs(Db, ?NUM_DOCS), + run_query(Db, <<"baz">>, #{limit => 0}), + {Db, Ctx}. + + +teardown_db({Db, Ctx}) -> + fabric2_db:delete(fabric2_db:name(Db), [{user_ctx, ?ADMIN_USER}]), + test_util:stop_couch(Ctx). + + +should_reduce({Db, _}) -> + Result = run_query(Db, <<"baz_count">>, #{}), + Expect = {ok, [row(null, ?NUM_DOCS)]}, + ?assertEqual(Expect, Result). + + +should_reduce_rev({Db, _}) -> + Args = #{ + direction => rev + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_key({Db, _}) -> + Args = #{ + start_key => 4 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS - 3)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_key_rev({Db, _}) -> + Args = #{ + direction => rev, + start_key => 4 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 4)]}, + ?assertEqual(Expect, Result). + + +should_reduce_end_key({Db, _}) -> + Args = #{ + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 6)]}, + ?assertEqual(Expect, Result). + + +should_reduce_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS - 5)]}, + ?assertEqual(Expect, Result). + + +should_reduce_inclusive_end_false({Db, _}) -> + Args = #{ + end_key => 6, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 5)]}, + ?assertEqual(Expect, Result). + + +should_reduce_inclusive_end_false_rev({Db, _}) -> + Args = #{ + direction => rev, + end_key => 6, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, ?NUM_DOCS - 6)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_and_end_key({Db, _}) -> + Args = #{ + start_key => 3, + end_key => 5 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 3)]}, + ?assertEqual(Expect, Result). + + +should_reduce_start_and_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + start_key => 5, + end_key => 3 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 3)]}, + ?assertEqual(Expect, Result). + + +should_reduce_empty_range({Db, _}) -> + Args = #{ + start_key => 100000, + end_key => 100001 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 0)]}, + ?assertEqual(Expect, Result). + + +should_reduce_empty_range_rev({Db, _}) -> + Args = #{ + direction => rev, + start_key => 100001, + end_key => 100000 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [row(null, 0)]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped({Db, _}) -> + Args = #{ + group_level => exact + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(1, ?NUM_DOCS) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(?NUM_DOCS, 1, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_key({Db, _}) -> + Args = #{ + group_level => exact, + start_key => 3 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(3, ?NUM_DOCS) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + start_key => 3 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(3, 1), + row(2, 1), + row(1, 1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_end_key({Db, _}) -> + Args = #{ + group_level => exact, + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(1, 6) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + end_key => 6 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(?NUM_DOCS, 6, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_inclusive_end_false({Db, _}) -> + Args = #{ + group_level => exact, + end_key => 4, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(1, 3) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_inclusive_end_false_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + end_key => 4, + inclusive_end => false + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(?NUM_DOCS, 5, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_and_end_key({Db, _}) -> + Args = #{ + group_level => exact, + start_key => 2, + end_key => 4 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(2, 4) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_start_and_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + start_key => 4, + end_key => 2 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, [ + row(I, 1) || I <- lists:seq(4, 2, -1) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_empty_range({Db, _}) -> + Args = #{ + group_level => exact, + start_key => 100000, + end_key => 100001 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_empty_range_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => exact, + start_key => 100001, + end_key => 100000 + }, + Result = run_query(Db, <<"baz_count">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_reduce_array_keys({Db, _}) -> + Result = run_query(Db, <<"boom">>, #{}), + Expect = {ok, [row(null, 1.5 * ?NUM_DOCS)]}, + ?assertEqual(Expect, Result). + + +should_reduce_grouped_array_keys({Db, _}) -> + Args = #{ + group_level => exact + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, lists:sort([ + row([I rem 3, I], 1.5) || I <- lists:seq(1, ?NUM_DOCS) + ])}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys({Db, _}) -> + Args = #{ + group_level => 1 + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], rem_count(0, ?NUM_DOCS) * 1.5), + row([1], rem_count(1, ?NUM_DOCS) * 1.5), + row([2], rem_count(2, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_key({Db, _}) -> + Args = #{ + group_level => 1, + start_key => [1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5), + row([2], rem_count(2, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + start_key => [1, ?NUM_DOCS + 1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5), + row([0], rem_count(0, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_end_key({Db, _}) -> + Args = #{ + group_level => 1, + end_key => [1, ?NUM_DOCS + 1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], rem_count(0, ?NUM_DOCS) * 1.5), + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + end_key => [1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([2], rem_count(2, ?NUM_DOCS) * 1.5), + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_inclusive_end_false({Db, _}) -> + Args = #{ + group_level => 1, + end_key => [1], + inclusive_end => false + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], rem_count(0, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_inclusive_end_false_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + end_key => [1, ?NUM_DOCS + 1], + inclusive_end => false + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([2], rem_count(2, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_and_end_key({Db, _}) -> + Args = #{ + group_level => 1, + start_key => [1], + end_key => [1, ?NUM_DOCS + 1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_start_and_end_key_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + start_key => [1, ?NUM_DOCS + 1], + end_key => [1] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], rem_count(1, ?NUM_DOCS) * 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_sub_array_select({Db, _}) -> + % Test that keys are applied below the key grouping + Args = #{ + group_level => 1, + start_key => [0, ?NUM_DOCS - 6], + end_key => [1, 4] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], 3.0), + row([1], 3.0) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_sub_array_select_rev({Db, _}) -> + % Test that keys are applied below the key grouping + Args = #{ + direction => rev, + group_level => 1, + start_key => [1, 4], + end_key => [0, ?NUM_DOCS - 6] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([1], 3.0), + row([0], 3.0) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_sub_array_inclusive_end({Db, _}) -> + % Test that keys are applied below the key grouping + Args = #{ + group_level => 1, + start_key => [0, ?NUM_DOCS - 6], + end_key => [1, 4], + inclusive_end => false + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, [ + row([0], 3.0), + row([1], 1.5) + ]}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_empty_range({Db, _}) -> + Args = #{ + group_level => 1, + start_key => [100], + end_key => [101] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_reduce_group_1_array_keys_empty_range_rev({Db, _}) -> + Args = #{ + direction => rev, + group_level => 1, + start_key => [101], + end_key => [100] + }, + Result = run_query(Db, <<"boom">>, Args), + Expect = {ok, []}, + ?assertEqual(Expect, Result). + + +should_collate_group_keys({Db, _}) -> + DDoc = couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"group">>, {[ + {<<"map">>, <<"function(doc) {emit([doc.val], 1);}">>}, + {<<"reduce">>, <<"_count">>} + ]}} + ]}} + ]}), + + % val is "föö" without combining characters + Doc1 = couch_doc:from_json_obj({[ + {<<"_id">>, <<"a">>}, + {<<"val">>, <<16#66, 16#C3, 16#B6, 16#C3, 16#B6>>} + ]}), + + % val is "föö" without combining characters + Doc2 = couch_doc:from_json_obj({[ + {<<"_id">>, <<"b">>}, + {<<"val">>, <<16#66, 16#6F, 16#CC, 16#88, 16#6F, 16#CC, 16#88>>} + ]}), + {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1, Doc2]), + + % An implementation detail we have is that depending on + % the direction of the view read we'll get the first + % or last key to represent a group. In this particular + % implementation the document ID breaks the sort tie + % in the map view data. + + ArgsFwd = #{ + group_level => exact + }, + ResultFwd = run_query(Db, DDoc, <<"group">>, ArgsFwd), + ExpectFwd = {ok, [ + row([<<16#66, 16#C3, 16#B6, 16#C3, 16#B6>>], 2) + ]}, + ?assertEqual(ExpectFwd, ResultFwd), + + ArgsRev = #{ + direction => rev, + group_level => exact + }, + ResultRev = run_query(Db, DDoc, <<"group">>, ArgsRev), + ExpectRev = {ok, [ + row([<<16#66, 16#6F, 16#CC, 16#88, 16#6F, 16#CC, 16#88>>], 2) + ]}, + ?assertEqual(ExpectRev, ResultRev). + + +rem_count(Rem, Count) -> + Members = [I || I <- lists:seq(1, Count), I rem 3 == Rem], + length(Members). + + +run_query(Db, Idx, Args) -> + DDoc = create_ddoc(), + run_query(Db, DDoc, Idx, Args). + + +run_query(Db, DDoc, Idx, Args) -> + couch_views:query(Db, DDoc, Idx, fun default_cb/2, [], Args). + + +default_cb(complete, Acc) -> + {ok, lists:reverse(Acc)}; +default_cb({final, Info}, []) -> + {ok, [Info]}; +default_cb({final, _}, Acc) -> + {ok, Acc}; +default_cb({meta, _}, Acc) -> + {ok, Acc}; +default_cb(ok, ddoc_updated) -> + {ok, ddoc_updated}; +default_cb(Row, Acc) -> + {ok, [Row | Acc]}. + + +row(Key, Value) -> + {row, [{key, Key}, {value, Value}]}. + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"baz">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"baz_count">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>}, + {<<"reduce">>, <<"_count">>} + ]}}, + {<<"baz_size">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>}, + {<<"reduce">>, <<"_sum">>} + ]}}, + {<<"boom">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit([doc.val % 3, doc.val], 1.5);\n" + "}" + >>}, + {<<"reduce">>, <<"_sum">>} + ]}}, + {<<"bing">>, {[ + {<<"map">>, <<"function(doc) {}">>}, + {<<"reduce">>, <<"_count">>} + ]}}, + {<<"bing_hyper">>, {[ + {<<"map">>, <<"function(doc) {}">>}, + {<<"reduce">>, <<"_approx_count_distinct">>} + ]}}, + {<<"doc_emit">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc)}">>} + ]}}, + {<<"duplicate_keys">>, {[ + {<<"map">>, << + "function(doc) {\n" + " emit(doc._id, doc.val);\n" + " emit(doc._id, doc.val + 1);\n" + "}">>}, + {<<"reduce">>, <<"_count">>} + ]}}, + {<<"zing">>, {[ + {<<"map">>, << + "function(doc) {\n" + " if(doc.foo !== undefined)\n" + " emit(doc.foo, 0);\n" + "}" + >>} + ]}} + ]}} + ]}). + + +make_docs(Db, TotalDocs) when TotalDocs > 0 -> + make_docs(Db, TotalDocs, 0). + + +make_docs(Db, TotalDocs, DocsMade) when TotalDocs > DocsMade -> + DocCount = min(TotalDocs - DocsMade, 500), + Docs = [doc(I + DocsMade) || I <- lists:seq(1, DocCount)], + fabric2_db:update_docs(Db, Docs), + make_docs(Db, TotalDocs, DocsMade + DocCount); + +make_docs(_Db, TotalDocs, DocsMade) when TotalDocs =< DocsMade -> + ok. + + +doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Id} + ]}). diff --git a/src/couch_views/test/couch_views_server_test.erl b/src/couch_views/test/couch_views_server_test.erl new file mode 100644 index 000000000..3c0c0a86a --- /dev/null +++ b/src/couch_views/test/couch_views_server_test.erl @@ -0,0 +1,217 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_server_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +couch_views_server_test_() -> + { + "Test couch_views_server", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(max_acceptors_started), + ?TDEF_FE(acceptors_become_workers), + ?TDEF_FE(handle_worker_death), + ?TDEF_FE(handle_acceptor_death), + ?TDEF_FE(handle_unknown_process_death), + ?TDEF_FE(max_workers_limit_works), + ?TDEF_FE(max_acceptors_greater_than_max_workers) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_eval + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + config:set("couch_views", "max_acceptors", "2", false), + config:set("couch_views", "max_workers", "4", false), + meck:new(couch_views_server, [passthrough]), + meck:new(couch_views_indexer, [passthrough]), + meck:expect(couch_views_indexer, init, fun() -> + receive pls_accept -> ok end, + couch_views_server:accepted(self()), + receive pls_die -> ok end + end), + ok = application:start(couch_views). + + +foreach_teardown(_) -> + ok = application:stop(couch_views), + meck:unload(), + config:delete("couch_views", "max_acceptors", false), + config:delete("couch_views", "max_workers", false), + ok. + + +max_acceptors_started(_) -> + #{max_acceptors := MaxAcceptors, max_workers := MaxWorkers} = get_state(), + ?assertEqual(2, MaxAcceptors), + ?assertEqual(4, MaxWorkers), + + ?assertEqual(0, maps:size(workers())), + + [Pid1, Pid2] = maps:keys(acceptors()), + ?assert(is_pid(Pid1)), + ?assert(is_pid(Pid2)), + ?assert(is_process_alive(Pid1)), + ?assert(is_process_alive(Pid2)). + + +acceptors_become_workers(_) -> + ?assertEqual(0, maps:size(workers())), + + InitAcceptors = acceptors(), + accept_all(), + + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(2, maps:size(workers())), + + ?assertEqual(InitAcceptors, workers()). + + +handle_worker_death(_) -> + [Pid1, Pid2] = maps:keys(acceptors()), + accept_all(), + + % One worker exits normal + finish_normal([Pid1]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(1, maps:size(workers())), + + % The other blows up with an error + finish_error([Pid2]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(0, maps:size(workers())). + + +handle_acceptor_death(_) -> + [Pid1, Pid2] = maps:keys(acceptors()), + finish_error([Pid1]), + + NewAcceptors = acceptors(), + ?assertEqual(2, maps:size(NewAcceptors)), + ?assert(lists:member(Pid2, maps:keys(NewAcceptors))), + ?assert(not lists:member(Pid1, maps:keys(NewAcceptors))). + + +handle_unknown_process_death(_) -> + meck:reset(couch_views_server), + Pid = self(), + whereis(couch_views_server) ! {'EXIT', Pid, blah}, + meck:wait(1, couch_views_server, terminate, + [{unknown_pid_exit, Pid}, '_'], 5000). + + +max_workers_limit_works(_) -> + % Accept 2 jobs -> 2 workers + accept_all(), + ?assertEqual(2, maps:size(workers())), + + % Accept 2 more jobs -> 4 workers + accept_all(), + ?assertEqual(0, maps:size(acceptors())), + ?assertEqual(4, maps:size(workers())), + + % Kill 1 worker -> 1 acceptor and 3 workers + [Worker1 | _] = maps:keys(workers()), + finish_normal([Worker1]), + ?assertEqual(1, maps:size(acceptors())), + ?assertEqual(3, maps:size(workers())), + + % Kill 2 more workers -> 2 acceptors and 1 worker + [Worker2, Worker3 | _] = maps:keys(workers()), + finish_normal([Worker2, Worker3]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(1, maps:size(workers())), + + % Kill 1 last worker -> 2 acceptors and 0 workers + [Worker4] = maps:keys(workers()), + finish_normal([Worker4]), + ?assertEqual(2, maps:size(acceptors())), + ?assertEqual(0, maps:size(workers())). + +max_acceptors_greater_than_max_workers(_) -> + [Pid1, Pid2] = maps:keys(acceptors()), + + sys:replace_state(couch_views_server, fun(#{} = St) -> + St#{max_workers := 1} + end), + + accept_all(), + + finish_normal([Pid1]), + finish_normal([Pid2]), + + % Only 1 acceptor should start as it is effectively limited by max_workers + ?assertEqual(1, maps:size(acceptors())), + ?assertEqual(0, maps:size(workers())). + + +% Utility functions + +accept_all() -> + Acceptors = acceptors(), + meck:reset(couch_views_server), + [Pid ! pls_accept || Pid <- maps:keys(Acceptors)], + meck:wait(maps:size(Acceptors), couch_views_server, handle_call, 3, 5000). + + +acceptors() -> + #{acceptors := Acceptors} = get_state(), + Acceptors. + + +workers() -> + #{workers := Workers} = get_state(), + Workers. + + +get_state() -> + sys:get_state(couch_views_server, infinity). + + +finish_normal(Workers) when is_list(Workers) -> + meck:reset(couch_views_server), + [Pid ! pls_die || Pid <- Workers], + meck:wait(length(Workers), couch_views_server, handle_info, + [{'_', '_', normal}, '_'], 5000). + + +finish_error(Workers) when is_list(Workers) -> + meck:reset(couch_views_server), + [exit(Pid, badness) || Pid <- Workers], + meck:wait(length(Workers), couch_views_server, handle_info, + [{'_', '_', badness}, '_'], 5000). diff --git a/src/couch_views/test/couch_views_size_test.erl b/src/couch_views/test/couch_views_size_test.erl new file mode 100644 index 000000000..e69b5b292 --- /dev/null +++ b/src/couch_views/test/couch_views_size_test.erl @@ -0,0 +1,356 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_size_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). + + +indexer_test_() -> + { + "Test view indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(empty_view), + ?TDEF_FE(single_doc), + ?TDEF_FE(multiple_docs), + ?TDEF_FE(update_no_size_change), + ?TDEF_FE(update_increases_size), + ?TDEF_FE(update_decreases_size), + ?TDEF_FE(deleting_docs_decreases_size), + ?TDEF_FE(multi_identical_keys_count_twice), + ?TDEF_FE(multiple_design_docs), + ?TDEF_FE(multiple_identical_design_docs) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + config:set("couch_views", "view_btree_node_size", "4", false), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + config:delete("couch_views", "change_limit"), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_view(Db) -> + DDoc = create_ddoc(), + ?assertEqual(0, view_size(Db)), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual(0, view_size(Db)). + + +single_doc(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row: key: 0, row: 0 + % Bytes: key: 1, row: 1 + % Total: 1 + 1 = 2 + ?assertEqual(2, view_size(Db)). + + +multiple_docs(Db) -> + DDoc = create_ddoc(), + Docs = [doc(I) || I <- lists:seq(0, 49)], + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_docs(Db, Docs, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Rows 0-9: 1 + 1 = 2 + % Rows 10->49: 2 + 2 = 4 + % 10 * 2 + 40 * 4 = 180 + ?assertEqual(180, view_size(Db)). + + +update_no_size_change(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(2, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row became: key: 1, val: 1 + % 1 + 1 = 2 so samesies + ?assertEqual(2, view_size(Db)). + + +update_increases_size(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(2, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 10}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row became: key: 10, val: 10 + % 2 + 2 = 4 + ?assertEqual(4, view_size(Db)). + + +update_decreases_size(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(10), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row: key: 10, val: 10 + % 2 + 2 = 4 + ?assertEqual(4, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + body = {[{<<"val">>, 0}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Row became: key: 0, val: 0 + % 1 + 1 = 2 + ?assertEqual(2, view_size(Db)). + + +deleting_docs_decreases_size(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc1, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(2, view_size(Db)), + + Doc2 = Doc1#doc{ + revs = {Pos, [Rev]}, + deleted = true, + body = {[{<<"val">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + {ok, []} = run_query(Db, DDoc, ?MAP_FUN1), + + ?assertEqual(0, view_size(Db)). + + +multi_identical_keys_count_twice(Db) -> + DDoc = create_ddoc(multi_emit_same), + Doc = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + {ok, _} = run_query(Db, DDoc, ?MAP_FUN1), + + % Two rows that are the same + ?assertEqual(4, view_size(Db)). + + +multiple_design_docs(Db) -> + Cleanup = fun() -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + ok = couch_views:cleanup_indices(TxDb, DDocs) + end) + end, + + DDoc1 = create_ddoc(simple, <<"_design/bar1">>), + DDoc2 = create_ddoc(multi_emit_same, <<"_design/bar2">>), + + % Simple test as before + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), + {ok, _} = run_query(Db, DDoc1, ?MAP_FUN1), + ?assertEqual(2, view_size(Db)), + + % Adding a second ddoc increases the size + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, DDoc2, []), + {ok, _} = run_query(Db, DDoc2, ?MAP_FUN1), + ?assertEqual(6, view_size(Db)), + + % Removing the first ddoc decreases the size + DDoc1Del = DDoc1#doc{revs = {Pos1, [Rev1]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc1Del, []), + Cleanup(), + ?assertEqual(4, view_size(Db)), + + % Removing the second ddoc drops the size + DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), + Cleanup(), + ?assertEqual(0, view_size(Db)). + + +multiple_identical_design_docs(Db) -> + Cleanup = fun() -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + ok = couch_views:cleanup_indices(TxDb, DDocs) + end) + end, + + DDoc1 = create_ddoc(simple, <<"_design/bar1">>), + DDoc2 = create_ddoc(simple, <<"_design/bar2">>), + + % Simple test as before + {ok, _} = fabric2_db:update_doc(Db, doc(0), []), + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, DDoc1, []), + {ok, _} = run_query(Db, DDoc1, ?MAP_FUN1), + ?assertEqual(2, view_size(Db)), + + % Adding a second ddoc with the same sig does not double the size + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, DDoc2, []), + {ok, _} = run_query(Db, DDoc2, ?MAP_FUN1), + ?assertEqual(2, view_size(Db)), + + % Removing the first ddoc does not decrease the size + DDoc1Del = DDoc1#doc{revs = {Pos1, [Rev1]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc1Del, []), + Cleanup(), + ?assertEqual(2, view_size(Db)), + + % Removing the second ddoc drops the size + DDoc2Del = DDoc2#doc{revs = {Pos2, [Rev2]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db, DDoc2Del, []), + Cleanup(), + ?assertEqual(0, view_size(Db)). + + +view_size(Db) -> + {ok, Info} = fabric2_db:get_db_info(Db), + {sizes, {Sizes}} = lists:keyfind(sizes, 1, Info), + {<<"views">>, ViewSize} = lists:keyfind(<<"views">>, 1, Sizes), + ViewSize. + + +create_ddoc() -> + create_ddoc(simple). + + +create_ddoc(Type) -> + create_ddoc(Type, <<"_design/bar">>). + + +create_ddoc(simple, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}); + +create_ddoc(multi_emit_same, DocId) when is_binary(DocId) -> + couch_doc:from_json_obj({[ + {<<"_id">>, DocId}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val, doc.val * 2); " + "emit(doc.val, doc.val); " + "if(doc.extra) {" + " emit(doc.val, doc.extra);" + "}" + "}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}.
\ No newline at end of file diff --git a/src/couch_views/test/couch_views_trace_index_test.erl b/src/couch_views/test/couch_views_trace_index_test.erl new file mode 100644 index 000000000..03c21a34a --- /dev/null +++ b/src/couch_views/test/couch_views_trace_index_test.erl @@ -0,0 +1,145 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(couch_views_trace_index_test). + + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +% Steps for this to work +% Run export FDB_NETWORK_OPTION_TRACE_ENABLE="" && +% make eunit apps=couch_views suites=couch_views_trace_index_test +% look in src/couch_views/.eunit for trace file +% Might need to add extra </Trace> to finish up file +% Analyze! + + +-define(EUNIT_FTW(Tests), [{with, [T]} || T <- Tests]). + + +indexer_test_() -> + { + "Trace view indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + ?EUNIT_FTW([ + fun trace_single_doc/1 + ]) + } + } + }. + + +setup() -> + test_util:start_couch([fabric, couch_js]). + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +trace_single_doc(Db) -> + DbName = fabric2_db:name(Db), + DDoc = create_ddoc(), + Doc = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), + + HexSig = fabric2_util:to_hex(Mrst#mrst.sig), + JobData = #{ + <<"db_name">> => DbName, + <<"db_uuid">> => fabric2_db:get_uuid(Db), + <<"ddoc_id">> => <<"_design/bar">>, + <<"sig">> => HexSig, + <<"retries">> => 0 + }, + meck:expect(couch_jobs, accept, 2, {ok, job, JobData}), + meck:expect(couch_jobs, update, 3, {ok, job}), + meck:expect(couch_jobs, finish, 3, ok), + meck:expect(couch_views_server, accepted, 1, ok), + + put(erlfdb_trace, <<"views_write_one_doc">>), + couch_views_indexer:init(), + + put(erlfdb_trace, <<"views_read_one_doc">>), + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([{row, [ + {id, <<"0">>}, + {key, 0}, + {value, 0} + ]}], Out). + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; + +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; + +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. diff --git a/src/couch_views/test/couch_views_updater_test.erl b/src/couch_views/test/couch_views_updater_test.erl new file mode 100644 index 000000000..aadbe940b --- /dev/null +++ b/src/couch_views/test/couch_views_updater_test.erl @@ -0,0 +1,240 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_updater_test). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). +-include_lib("mango/src/mango_idx.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). + + +indexer_test_() -> + { + "Test indexing", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(index_docs), + ?TDEF_FE(update_doc), + ?TDEF_FE(delete_doc), + ?TDEF_FE(includes_design_docs), + ?TDEF_FE(handle_erlfdb_errors, 15) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views, + mango + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + + DDoc = create_idx_ddoc(), + fabric2_db:update_docs(Db, [DDoc]), + % make sure the index is built for the first time so the background + % indexer doesn't build the index + wait_while_ddoc_builds(Db), + + Docs = make_docs(3), + fabric2_db:update_docs(Db, Docs), + meck:new(couch_views_trees, [passthrough]), + {Db, DDoc}. + + +foreach_teardown({Db, _}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +index_docs({Db, DDoc}) -> + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"1">>}, {value, 1}], + [{id, <<"2">>}, {value, 2}], + [{id, <<"3">>}, {value, 3}] + ], Docs). + + +update_doc({Db, DDoc}) -> + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + JsonDoc = couch_doc:to_json_obj(Doc, []), + JsonDoc2 = couch_util:json_apply_field({<<"value">>, 4}, JsonDoc), + Doc2 = couch_doc:from_json_obj(JsonDoc2), + fabric2_db:update_doc(Db, Doc2), + + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"1">>}, {value, 1}], + [{id, <<"3">>}, {value, 3}], + [{id, <<"2">>}, {value, 4}] + ], Docs). + + +delete_doc({Db, DDoc}) -> + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + JsonDoc = couch_doc:to_json_obj(Doc, []), + JsonDoc2 = couch_util:json_apply_field({<<"_deleted">>, true}, JsonDoc), + Doc2 = couch_doc:from_json_obj(JsonDoc2), + fabric2_db:update_doc(Db, Doc2), + + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"1">>}, {value, 1}], + [{id, <<"3">>}, {value, 3}] + ], Docs). + + +includes_design_docs({Db, _}) -> + DDoc = create_idx_include_ddocs(), + fabric2_db:update_docs(Db, [DDoc]), + + IndexDDoc0 = create_idx_ddoc(), + IndexDDoc = IndexDDoc0#doc{ + id = <<"_design/to_be_indexed">> + }, + + fabric2_db:update_docs(Db, [IndexDDoc]), + + Docs = run_query(Db, DDoc), + ?assertEqual([ + [{id, <<"_design/ddoc_that_indexes_ddocs">>}, {value, 1}], + [{id, <<"_design/to_be_indexed">>}, {value, 1}] + ], Docs). + + +handle_erlfdb_errors({Db, _}) -> + meck:expect(couch_views_trees, update_views, fun(_, _, _) -> + error({erlfdb_error, 1009}) + end), + ?assertError({erlfdb_error, 1009}, fabric2_db:update_docs(Db, [doc(4)])). + + +run_query(Db, DDoc) -> + Args = #mrargs{ + view_type = map, + reduce = false, + include_docs = true, + update = false + }, + CB = fun query_cb/2, + {ok, Acc} = couch_views:query(Db, DDoc, <<"idx_01">>, CB, [], Args), + lists:map(fun ({Props}) -> + [ + {id, couch_util:get_value(<<"_id">>, Props)}, + {value, couch_util:get_value(<<"value">>, Props, 1)} + ] + + end, Acc). + + +create_idx_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc1">>}, + {<<"language">>, <<"query">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, {[ + {<<"fields">>, {[{<<"value">>, <<"asc">>}]}} + ]}}, + {<<"reduce">>, <<"_count">>}, + {<<"options">>, {[ + {<<"def">>, + {[{<<"fields">>, + {[{<<"value">>, <<"asc">>}]}}]}} + ]}} + ]}} + ]} + }, + {<<"autoupdate">>, false}, + {<<"options">>, {[{<<"interactive">>, true}]}} + ]}). + + +create_idx_include_ddocs() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/ddoc_that_indexes_ddocs">>}, + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"idx_01">>, {[ + {<<"map">>, << + "function(doc) {" + "if (doc.language) {" + "emit(doc.language, 1);" + "}" + "}">>} + ]}} + ]}}, + {<<"autoupdate">>, false}, + {<<"options">>, {[ + {<<"include_design">>, true}, + {<<"interactive">>, true} + ]}} + ]}). + + +wait_while_ddoc_builds(Db) -> + Fun = fun () -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + Ready = lists:filter(fun (Idx) -> + Idx#idx.build_status == ?INDEX_READY + end, mango_idx:list(TxDb)), + + if length(Ready) > 1 -> ok; true -> + wait + end + end) + end, + test_util:wait(Fun). + + + +make_docs(Count) -> + [doc(I) || I <- lists:seq(1, Count)]. + + +doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"value">>, Id} + ]}). + + +query_cb({row, Props}, Acc) -> + Doc = couch_util:get_value(doc, Props), + {ok, Acc ++ [Doc]}; + +query_cb(_, Acc) -> + {ok, Acc}. + diff --git a/src/couch_views/test/couch_views_upgrade_test.erl b/src/couch_views/test/couch_views_upgrade_test.erl new file mode 100644 index 000000000..556a76297 --- /dev/null +++ b/src/couch_views/test/couch_views_upgrade_test.erl @@ -0,0 +1,400 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_views_upgrade_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("fabric/test/fabric2_test.hrl"). + + +-define(MAP_FUN1, <<"map_fun1">>). +-define(MAP_FUN2, <<"map_fun2">>). + + +upgrade_test_() -> + { + "Test view upgrades", + { + setup, + fun setup/0, + fun cleanup/1, + { + foreach, + fun foreach_setup/0, + fun foreach_teardown/1, + [ + ?TDEF_FE(empty_state), + ?TDEF_FE(indexed_state), + ?TDEF_FE(upgrade_non_interactive), + ?TDEF_FE(upgrade_unbuilt_interactive), + ?TDEF_FE(upgrade_partially_built_interactive), + ?TDEF_FE(upgrade_built_interactive) + ] + } + } + }. + + +setup() -> + Ctx = test_util:start_couch([ + fabric, + couch_jobs, + couch_js, + couch_views + ]), + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +foreach_setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +foreach_teardown(Db) -> + meck:unload(), + config:delete("couch_views", "change_limit"), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_state(Db) -> + DDoc = create_ddoc(), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + State = fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + Expect = #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => not_found, + build_status => not_found + }, + ?assertEqual(Expect, State), + assert_fdb_state(Db, Mrst, Expect). + + +indexed_state(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => fabric2_db:get_update_seq(Db), + view_vs => not_found, + build_status => not_found + }). + + +upgrade_non_interactive(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1], []), + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{view_seq => DbSeq}), + + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => not_found, + build_status => not_found + }). + + +upgrade_unbuilt_interactive(Db) -> + DDoc = create_ddoc(), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + Doc1 = doc(0), + + {ok, _} = fabric2_db:update_docs(Db, [DDoc, Doc1], []), + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{ + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Trigger an upgrade + fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Build the view + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }). + + +upgrade_partially_built_interactive(Db) -> + DDoc = create_ddoc(), + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + + MidSeq = fabric2_db:get_update_seq(Db), + + Doc1 = doc(0), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{ + view_seq => MidSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Trigger an upgrade + fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Build the view + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }). + + +upgrade_built_interactive(Db) -> + DDoc = create_ddoc(), + Doc1 = doc(0), + + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + {ok, _} = fabric2_db:update_doc(Db, DDoc, []), + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + + DbSeq = fabric2_db:get_update_seq(Db), + + init_fdb_state(Db, DDoc, #{ + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }), + + % Trigger an upgrade + fabric2_fdb:transactional(Db, fun(TxDb) -> + couch_views_fdb:get_view_state(TxDb, Mrst) + end), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => <<>>, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_BUILDING + }), + + % Build the view + {ok, Out} = run_query(Db, DDoc, ?MAP_FUN1), + ?assertEqual([row(<<"0">>, 0, 0)], Out), + + assert_fdb_state(Db, DDoc, #{ + version => ?CURRENT_VIEW_IMPL_VERSION, + view_seq => DbSeq, + view_vs => fabric2_fdb:seq_to_vs(DbSeq), + build_status => ?INDEX_READY + }). + + +init_fdb_state(Db, #doc{} = DDoc, Values) -> + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + init_fdb_state(Db, Mrst, Values); +init_fdb_state(Db, #mrst{sig = Sig}, Values) -> + init_fdb_state(Db, Sig, Values); +init_fdb_state(Db, Sig, Values) -> + VersionRow = case maps:get(version, Values, undefined) of + undefined -> []; + Version -> [{pack(Db, key(version, Sig)), pack({Version})}] + end, + + SeqRow = case maps:get(view_seq, Values, undefined) of + undefined -> []; + Seq -> [{pack(Db, key(seq, Sig)), Seq}] + end, + + VSRow = case maps:get(view_vs, Values, undefined) of + undefined -> []; + VS -> [{pack(Db, key(vs, Sig)), pack({VS})}] + end, + + BSRow = case maps:get(build_status, Values, undefined) of + undefined -> []; + BS -> [{pack(Db, key(bs, Sig)), BS}] + end, + + Rows = VersionRow ++ SeqRow ++ VSRow ++ BSRow, + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx + } = TxDb, + lists:foreach(fun({K, V}) -> + erlfdb:set(Tx, K, V) + end, Rows) + end). + + +assert_fdb_state(Db, #doc{} = DDoc, Expect) -> + {ok, Mrst} = couch_views_util:ddoc_to_mrst(fabric2_db:name(Db), DDoc), + assert_fdb_state(Db, Mrst, Expect); +assert_fdb_state(Db, #mrst{sig = Sig}, Expect) -> + assert_fdb_state(Db, Sig, Expect); +assert_fdb_state(Db, Sig, Expect) -> + #{ + version := Version, + view_seq := ViewSeq, + view_vs := ViewVS, + build_status := BuildStatus + } = Expect, + + VersionRow = case Version of + not_found -> []; + _ -> [{pack(Db, key(version, Sig)), pack({Version})}] + end, + + SeqRow = case ViewSeq of + <<>> -> []; + _ -> [{pack(Db, key(seq, Sig)), ViewSeq}] + end, + + VSRow = case ViewVS of + not_found -> []; + _ -> [{pack(Db, key(vs, Sig)), pack({ViewVS})}] + end, + + BSRow = case BuildStatus of + not_found -> []; + _ -> [{pack(Db, key(bs, Sig)), BuildStatus}] + end, + + ExpectRows = lists:sort(VersionRow ++ SeqRow ++ VSRow ++ BSRow), + + RawExistingRows = fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + RangePrefix = erlfdb_tuple:pack({?DB_VIEWS, ?VIEW_INFO}, DbPrefix), + erlfdb:wait(erlfdb:get_range_startswith(Tx, RangePrefix)) + end), + + % Ignore the KV size key in the view info rows + KVSizeKey = pack(Db, key(kv_size, Sig)), + ExistingRows = lists:keydelete(KVSizeKey, 1, RawExistingRows), + + ?assertEqual(ExpectRows, ExistingRows). + + +key(version, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_IMPL_VERSION, Sig}; +key(seq, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_UPDATE_SEQ, Sig}; +key(kv_size, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_KV_SIZE, Sig}; +key(vs, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_CREATION_VS, Sig}; +key(bs, Sig) -> {?DB_VIEWS, ?VIEW_INFO, ?VIEW_BUILD_STATUS, Sig}. + + +pack(Db, Key) -> + #{ + db_prefix := DbPrefix + } = Db, + erlfdb_tuple:pack(Key, DbPrefix). + + +pack(Value) -> + erlfdb_tuple:pack(Value). + + +row(Id, Key, Value) -> + {row, [ + {id, Id}, + {key, Key}, + {value, Value} + ]}. + + +fold_fun({meta, _Meta}, Acc) -> + {ok, Acc}; +fold_fun({row, _} = Row, Acc) -> + {ok, [Row | Acc]}; +fold_fun(complete, Acc) -> + {ok, lists:reverse(Acc)}. + + +create_ddoc() -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {?MAP_FUN1, {[ + {<<"map">>, <<"function(doc) {emit(doc.val, doc.val);}">>} + ]}}, + {?MAP_FUN2, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]}). + + +doc(Id) -> + doc(Id, Id). + + +doc(Id, Val) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary(integer_to_list(Id))}, + {<<"val">>, Val} + ]}). + + +run_query(#{} = Db, DDoc, <<_/binary>> = View) -> + couch_views:query(Db, DDoc, View, fun fold_fun/2, [], #mrargs{}).
\ No newline at end of file diff --git a/src/ctrace/README.md b/src/ctrace/README.md new file mode 100644 index 000000000..4b0238b14 --- /dev/null +++ b/src/ctrace/README.md @@ -0,0 +1,308 @@ +Overview +======== + +This application provides an interface to opentracing compatible +tracing systems. + +Open Tracing +------------ + +[//]: # (taken from https://github.com/opentracing/specification/blob/master/specification.md) +Traces in OpenTracing are defined implicitly by their Spans. +In particular, a Trace can be thought of as a directed acyclic +graph (DAG) of Spans, where the edges between Spans are called +References. + +Each Span encapsulates the following state: + +- An operation name +- A start timestamp +- A finish timestamp +- A set of zero or more key:value Span Tags. +- A set of zero or more Span Logs, each of which is + itself a key:value map paired with a timestamp. +- A SpanContext +- References to zero or more causally-related Spans + +Every trace is identified by unique trace_id. Every trace includes zero +or more tracing spans which are identified by a span id. + +Jaeger +------ + +Jaeger is a distributed tracing system released as open source by +Uber Technologies. It is one of implementations of open tracing specification. +Jaeger supports Trace detail view where a single trace is represented as +a tree of tracing span with detailed timing information about every span. +In order to make this feature work all tracing spans should form a lineage +from the same root span. + + +Implementation +============== + +Every operation has unique identifier. Example identifiers are: + +- all-dbs.read +- database.delete +- replication.trigger +- view.compaction + +Tracing begins with a root span that can be filtered based on +a set of configurable rules. When the root trace is created these +rules are applied to see if the trace should be generated and logged. +If a trace is disabled due to filtering then no trace data is generated. + + +Code instrumentation +-------------------- + +The span lifecycle is controled by + +- `ctrace:start_span` +- `ctrace:finish_span` +- `ctrace:with_span` + +The instrumentation can add tags and logs to a span. + +Example of instrumentation: + +``` +ctrace:with_span('database.read', #{'db.name' => <<>>}, fun() -> + ctrace:tag(#{ + peer => Peer, + 'http.method' => Method, + nonce => Nonce, + 'http.url' => Path, + 'span.kind' => <<"server">>, + component => <<"couchdb.chttpd">> + }), + ctrace:log(#{ + field0 => "value0" + }) + + handle_request(HttpReq) +end), +``` + +As you can see the `ctrace:with_span/3` function receives a function which +wraps the operation we wanted to trace: + +- `ctrace:tag/1` to add new tags to the span +- `ctrace:log/1` add log event to the span + +There are some informative functions as well: + +- `ctrace:refs/0` - returns all other spans we have references from the current +- `ctrace:operation_name/0` - returns operation name for the current span +- `ctrace:trace_id/0` - returns trace id for the current span +- `ctrace:span_id/0` - returns span id for the current span + +Instrumentation guide +--------------------- + +- Start root span at system boundaries + - httpd + - internal trigger (replication or compaction jobs) +- Start new child span when you cross layer boundaries +- Start new child span when you cross node bounadary +- Extend `<app>_httpd_handlers:handler_info/1` as needed to + have operation ids. (We as community might need to work on + naming conventions) +- Use [span conventions](https://github.com/apache/couchdb-documentation/blob/master/rfcs/011-opentracing.md#conventions) https://github.com/opentracing/specification/blob/master/semantic_conventions.md +- When in doubt consult open tracing spec + - [spec overview](https://github.com/opentracing/specification/blob/master/specification.md) + - [conventions](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#standard-span-tags-and-log-fields) + +Configuration +------------- + +Traces are configured using standard CouchDB ini file based configuration. +There is a global toggle `[tracing] enabled = true | false` that switches +tracing on or off completely. The `[tracing]` section also includes +configuration for where to send trace data. There are two reporters which we +support. + +The thrift over udp reporter (this is the default) has following configuration +options: + +- protocol = udp +- thrift_format = compact | binary +- agent_host = 127.0.0.1 +- agent_port = 6831 + +The thrift over http has following options + +- protocol = http +- endpoint = http://127.0.0.1:14268 + +An example of `[tracing]` section + +```ini +[tracing] + +enabled = true +thrift_format = compact ; compact | binary +agent_host = 127.0.0.1 +agent_port = 6831 +app_name = couchdb ; Value to use for the `location.application` tag +``` + +In the `[tracing.filters]` section we can define a set of rules for +whether to include a trace. Keys are the operation name of the root +span and values are a simple DSL for whether to include the given +span based on its tags. See below for a more thorough description +of the DSL. The `all` key is special and is used when no other +filter matches a given operation. If the `all` key is not present +then ctrace behaves as if it were defined as `(#{}) -> false`. I.e., +any trace that doesn't have a configuration entry is not generated +and logged. + +```ini +[tracing.filters] +; trace all events +; all = (#{}) -> true +; trace all events with X-B3-... headers +; all = (#{external := External}) when External == true -> true +; database-info.read = (#{'http.method' := Method}) when Method == 'GET' -> true +; view.build = (#{'view.name' := Name}) when Name == "foo" -> 0.25 +``` + +Filter DSL Description +--- + +``` +<operation_name> = ( #{<[arguments]>} ) when <[conditions]> -> <[actions]> +``` + +Where: + - operation_name is the name of the root span + - arguments is comma separated pairs of + `<tag_or_field_name> := <variable_name>` + - actions is a list which contains + - `report` + - conditions + - `<[condition]>` + - `| <[condition]> <[operator]> <[condition]>` + - condition: + - `<variable_name> <[operator]> <value>` + `| <[guard_function]>(<[variable_name]>)` + - `variable_name` - lowercase name without special characters + - guard_function: one of + - `is_atom` + - `is_float` + - `is_integer` + - `is_list` + - `is_number` + - `is_pid` + - `is_port` + - `is_reference` + - `is_tuple` + - `is_map` + - `is_binary` + - `is_function` + - `element` - `element(n, tuple)` + - `abs` + - `hd` - return head of the list + - `length` + - `map_get` + - `map_size` + - `round` + - `node` + - `size` - returns size of the tuple + - `bit_size` - returns number of bits in binary + - `byte_size` - returns number of bytes in binary + - `tl` - return tail of a list + - `trunc` + - `self` + - operator: one of + - `not` + - `and` - evaluates both expressions + - `andalso` - evaluates second only when first is true + - `or` - evaluates both expressions + - `orelse` - evaluates second only when first is false + - `xor` + - `+` + - `-` + - `*` + - `div` + - `rem` + - `band` - bitwise AND + - `bor` - bitwise OR + - `bxor` - bitwise XOR + - `bnot` - bitwise NOT + - `bsl` - arithmetic bitshift left + - `bsr` - bitshift right + - `>` + - `>=` + - `<` + - `=<` + - `=:=` + - `==` + - `=/=` + - `/=` - not equal + + +b3 propagation +-------------- + +In order to correlate spans across multiple systems the information +about parent span can be passed via headers. Currently the chttpd +application is responsible for extracting and parsing the header. +The ctrace application provides following facilities to enable this +use case: + +- `{root, RootSpan}` option for `ctrace:start_span/2` +- `ctrace:external_span/3` to convert references to a root span + +The span references could be set either via `b3` header of via +individual headers. In case when individual headers are used the +following set of headers is supported: + +- X-B3-TraceId (32 lower-hex characters) +- X-B3-SpanId (16 lower-hex characters) + (has no effect if X-B3-TraceId is not set) +- X-B3-ParentSpanId (16 lower-hex characters) + (has no effect if X-B3-TraceId is not set) + +Alternatively a single `b3` header could be used. It has to be +in the following format: + +b3={TraceId}-{SpanId}-{SamplingState}-{ParentSpanId} + +Where SamplingState is either `0` or `1`. However we ignore the value. + +Note: We only support 128 bit TraceId's. + +Developing +========== + +Here we provide a list frequently used commands +useful while working on this application. + + +1. Run all tests +``` +make setup-eunit +make && ERL_LIBS=`pwd`/src BUILDDIR=`pwd` mix test --trace src/chttpd/test/exunit/ src/ctrace/test/exunit/ +``` + +2. Run tests selectively +``` +make && ERL_LIBS=`pwd`/src BUILDDIR=`pwd` mix test --trace src/chttpd/test/exunit/ctrace_context_test.exs:59 +``` + +3. Re-run only failed tests +``` +make && ERL_LIBS=`pwd`/src BUILDDIR=`pwd` mix test --failed --trace src/chttpd/test/exunit/ src/ctrace/test/exunit/ +``` + +4. Running jaeger in docker +``` +docker run -d --net fdb-core --name jaeger.local -p 6831:6831/udp -p 16686:16686 jaegertracing/all-in-one:1.14 +``` + +If Docker isn't your cup of tea, the Jaeger project also provides +prebuilt binaries that can be downloaded. On macOS we can easily +setup a development Jaeger instance by running the prebuilt +`jaeger-all-in-one` binary without any arguments.
\ No newline at end of file diff --git a/src/ctrace/rebar.config b/src/ctrace/rebar.config new file mode 100644 index 000000000..362c8785e --- /dev/null +++ b/src/ctrace/rebar.config @@ -0,0 +1,14 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/ctrace/src/ctrace.app.src b/src/ctrace/src/ctrace.app.src new file mode 100644 index 000000000..64f4fc5df --- /dev/null +++ b/src/ctrace/src/ctrace.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + {application, ctrace, [ + {description, "Open tracer API for CouchDB"}, + {vsn, git}, + {registered, [ + ]}, + {applications, [ + kernel, + stdlib, + syntax_tools, + config, + jaeger_passage, + passage + ]}, + {mod, {ctrace_app, []}} +]}. diff --git a/src/ctrace/src/ctrace.erl b/src/ctrace/src/ctrace.erl new file mode 100644 index 000000000..5521901fd --- /dev/null +++ b/src/ctrace/src/ctrace.erl @@ -0,0 +1,361 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace). + +-vsn(1). + +-export([ + is_enabled/0, + + with_span/2, + with_span/3, + start_span/1, + start_span/2, + finish_span/0, + finish_span/1, + has_span/0, + external_span/3, + + tag/1, + log/1, + + tags/0, + refs/0, + operation_name/0, + trace_id/0, + span_id/0, + tracer/0, + context/0, + + match/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("passage/include/opentracing.hrl"). +-include("ctrace.hrl"). + + +-type operation() + :: atom() + | fun(). + +-type tags() + :: #{atom() => term()}. + +-type log_fields() + :: #{atom() => term()}. + +-type start_span_options() + :: [start_span_option()]. + +-type start_span_option() + :: {time, erlang:timespan()} + | {tags, tags()}. + +-type finish_span_options() + :: [finish_span_option()]. + +-type finish_span_option() + :: {time, erlang:timespan()}. + + +-spec is_enabled() -> boolean(). + +is_enabled() -> + case get(?IS_ENABLED_KEY) of + undefined -> + Result = ctrace_config:is_enabled(), + put(?IS_ENABLED_KEY, Result), + Result; + IsEnabled -> + IsEnabled + end. + + +%% @equiv with_span(Operation, [], Fun) +-spec with_span( + Operation :: operation(), + Fun + ) -> Result when + Fun :: fun (() -> Result), + Result :: term(). + +with_span(Operation, Fun) -> + with_span(Operation, #{}, Fun). + +-spec with_span( + Operation :: operation(), + TagsOrOptions :: tags() | start_span_options(), + Fun + ) -> Result when + Fun :: fun (() -> Result), + Result :: term(). + +with_span(Operation, ExtraTags, Fun) when is_map(ExtraTags) -> + with_span(Operation, [{tags, ExtraTags}], Fun); + +with_span(Operation, Options, Fun) -> + try + start_span(Operation, Options), + Fun() + catch Type:Reason -> + Stack = erlang:get_stacktrace(), + log(#{ + ?LOG_FIELD_ERROR_KIND => Type, + ?LOG_FIELD_MESSAGE => Reason, + ?LOG_FIELD_STACK => Stack + }, [error]), + erlang:raise(Type, Reason, Stack) + after + finish_span() + end. + +-spec start_span( + Operation :: operation() + ) -> ok. + +start_span(Operation) -> + start_span(Operation, []). + +-spec start_span( + Operation :: operation(), + Options :: start_span_options() + ) -> ok. + +start_span(Operation, Options) -> + case is_enabled() of + true -> + do_start_span(Operation, Options); + false -> + ok + end. + +do_start_span(Fun, Options) when is_function(Fun) -> + start_span(fun_to_op(Fun), Options); + +do_start_span(OperationName, Options0) -> + Options1 = add_time(Options0), + case passage_pd:current_span() of + undefined -> + put(?ORIGIN_KEY, atom_to_binary(OperationName, utf8)), + Tags = case lists:keyfind(tags, 1, Options0) of + {tags, T} -> + T; + false -> + #{} + end, + case match(OperationName, Tags) of + true -> + Options = [ + {tracer, ?MAIN_TRACER} + | maybe_start_root(Options1) + ], + passage_pd:start_span(OperationName, Options); + false -> + ok + end; + Span -> + Options = add_tags([{child_of, Span} | Options1], #{ + origin => get(?ORIGIN_KEY) + }), + passage_pd:start_span(OperationName, Options) + end. + +-spec finish_span() -> ok. + +finish_span() -> + finish_span([]). + +-spec finish_span( + Options :: finish_span_options() + ) -> ok. + +finish_span(Options0) -> + Options = add_time(Options0), + passage_pd:finish_span(Options). + +-spec tag( + Tags :: tags() + ) -> ok. + +tag(Tags) -> + passage_pd:set_tags(Tags). + +-spec log( + Fields :: log_fields() | fun (() -> log_fields()) + ) -> ok. + +log(FieldsOrFun) -> + log(FieldsOrFun, []). + +log(FieldsOrFun, Options) -> + passage_pd:log(FieldsOrFun, Options). + +-spec tags() -> tags(). + +tags() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_tags(Span) + end. + +-spec refs() -> passage:refs(). + +refs() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_refs(Span) + end. + +-spec has_span() -> boolean(). + +has_span() -> + passage_pd:current_span() =/= undefined. + +-spec operation_name() -> atom(). + +operation_name() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_operation_name(Span) + end. + +-spec trace_id() -> 0..16#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF. + +trace_id() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + Context = passage_span:get_context(Span), + jaeger_passage_span_context:get_trace_id(Context) + end. + +-spec span_id() -> 0..16#FFFFFFFFFFFFFFFF. + +span_id() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + Context = passage_span:get_context(Span), + jaeger_passage_span_context:get_span_id(Context) + end. + +-spec tracer() -> passage:tracer_id(). + +tracer() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_tracer(Span) + end. + +-spec context() -> passage_span_contest:context(). + +context() -> + case passage_pd:current_span() of + undefined -> + undefined; + Span -> + passage_span:get_context(Span) + end. + +-spec external_span( + TraceId :: passage:trace_id(), + SpanId :: undefined | passage:span_id(), + ParentSpanId :: undefined | passage:span_id() + ) -> passage:maybe_span(). + +external_span(TraceId, undefined, ParentSpanId) -> + external_span(TraceId, rand:uniform(16#FFFFFFFFFFFFFFFF), ParentSpanId); +external_span(TraceId, SpanId, undefined) -> + external_span(TraceId, SpanId, rand:uniform(16#FFFFFFFFFFFFFFFF)); +external_span(TraceId, SpanId, ParentSpanId) -> + IterFun = fun(Val) -> Val end, + Flags = <<0:32>>, + BaggageItems = <<0:32>>, + Binary = << + TraceId:128, + SpanId:64, + ParentSpanId:64, + Flags/binary, + BaggageItems/binary + >>, + State = {ok, <<"binary">>, Binary, error}, + passage:extract_span(?MAIN_TRACER, binary, IterFun, State). + + +match(OperationId, Tags) -> + OpMod = ctrace_config:filter_module_name(OperationId), + case erlang:function_exported(OpMod, match, 1) of + true -> + do_match(OpMod, Tags); + false -> + AllMod = ctrace_config:filter_module_name("all"), + case erlang:function_exported(AllMod, match, 1) of + true -> do_match(AllMod, Tags); + false -> false + end + end. + + +do_match(Mod, Tags) -> + case Mod:match(Tags) of + true -> + true; + false -> + false; + Rate when is_float(Rate) -> + rand:uniform() =< Rate + end. + + +add_tags(Options, ExtraTags) -> + case lists:keytake(tags, 1, Options) of + {value, {tags, T}, Opts} -> + [{tags, maps:merge(T, ExtraTags)} | Opts]; + false -> + [{tags, ExtraTags} | Options] + end. + +add_time(Options) -> + case lists:keymember(time, 1, Options) of + true -> + Options; + false -> + [{time, os:timestamp()} | Options] + end. + +maybe_start_root(Options) -> + case lists:keytake(root, 1, Options) of + {value, {root, Root}, NewOptions} -> + [{child_of, Root} | NewOptions]; + false -> + Options + end. + +fun_to_op(Fun) -> + {module, M} = erlang:fun_info(Fun, module), + {name, F} = erlang:fun_info(Fun, name), + {arity, A} = erlang:fun_info(Fun, arity), + Str = io_lib:format("~s:~s/~b", [M, F, A]), + list_to_atom(lists:flatten(Str)). diff --git a/src/ctrace/src/ctrace.hrl b/src/ctrace/src/ctrace.hrl new file mode 100644 index 000000000..3819bbd50 --- /dev/null +++ b/src/ctrace/src/ctrace.hrl @@ -0,0 +1,15 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(MAIN_TRACER, jaeger_passage_reporter). +-define(IS_ENABLED_KEY, ctrace_is_enabled). +-define(ORIGIN_KEY, ctrace_origin_key). diff --git a/src/ctrace/src/ctrace_app.erl b/src/ctrace/src/ctrace_app.erl new file mode 100644 index 000000000..c98b897e0 --- /dev/null +++ b/src/ctrace/src/ctrace_app.erl @@ -0,0 +1,26 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_app). + +-behaviour(application). + +-export([ + start/2, + stop/1 +]). + +start(_StartType, _StartArgs) -> + ctrace_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/ctrace/src/ctrace_config.erl b/src/ctrace/src/ctrace_config.erl new file mode 100644 index 000000000..c63c77f1b --- /dev/null +++ b/src/ctrace/src/ctrace_config.erl @@ -0,0 +1,153 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_config). + +-vsn(1). + +-behaviour(config_listener). + +-export([ + is_enabled/0, + update/0, + + filter_module_name/1 +]). + +-export([ + handle_config_change/5, + handle_config_terminate/3 +]). + +-include("ctrace.hrl"). + + +-spec is_enabled() -> boolean(). +is_enabled() -> + config:get_boolean("tracing", "enabled", false). + + +-spec update() -> ok. +update() -> + case is_enabled() of + true -> + maybe_start_main_tracer(?MAIN_TRACER), + + CompiledFilters = get_compiled_filters(), + + RemovedFilters = lists:foldl(fun({OperationId, FilterDef}, Acc) -> + case compile_filter(OperationId, FilterDef) of + true -> Acc -- [OperationId]; + false -> Acc + end + end, CompiledFilters, config:get("tracing.filters")), + + lists:foreach(fun(OperationId) -> + ModName = filter_module_name(OperationId), + code:delete(ModName), + code:purge(ModName) + end, RemovedFilters), + + case config:get("tracing.filters", "all") of + undefined -> compile_filter("all", "(#{}) -> false"); + _ -> ok + end; + + false -> + jaeger_passage:stop_tracer(?MAIN_TRACER) + end, + ok. + + +-spec filter_module_name(atom() | string()) -> atom(). +filter_module_name(OperationId) when is_atom(OperationId) -> + filter_module_name(atom_to_list(OperationId)); +filter_module_name(OperationId) -> + list_to_atom("ctrace_filter_" ++ OperationId). + + +handle_config_change("tracing", "enabled", _, _Persist, St) -> + update(), + {ok, St}; +handle_config_change("tracing.filters", _Key, _Val, _Persist, St) -> + update(), + {ok, St}; +handle_config_change(_Sec, _Key, _Val, _Persist, St) -> + {ok, St}. + +handle_config_terminate(_Server, _Reason, _State) -> + update(). + + +maybe_start_main_tracer(TracerId) -> + case passage_tracer_registry:get_reporter(TracerId) of + error -> + start_main_tracer(TracerId); + _ -> + true + end. + + +start_main_tracer(TracerId) -> + MaxQueueLen = config:get_integer("tracing", "max_queue_len", 1024), + Sampler = jaeger_passage_sampler_queue_limit:new( + passage_sampler_all:new(), TracerId, MaxQueueLen), + ServiceName = list_to_atom(config:get("tracing", "app_name", "couchdb")), + + ProtocolOptions = case config:get("tracing", "protocol", "udp") of + "udp" -> + [ + {thrift_format, list_to_atom( + config:get("tracing", "thrift_format", "compact"))}, + {agent_host, + config:get("tracing", "agent_host", "127.0.0.1")}, + {agent_port, + config:get_integer("tracing", "agent_port", 6831)}, + {protocol, udp}, + {default_service_name, ServiceName} + ]; + "http" ++ _ -> + [ + {endpoint, + config:get("tracing", "endpoint", "http://127.0.0.1:14268")}, + {protocol, http}, + {http_client, fun http_client/5}, + {default_service_name, ServiceName} + ] + end, + Options = [{default_service_name, ServiceName}|ProtocolOptions], + ok = jaeger_passage:start_tracer(TracerId, Sampler, Options). + +http_client(Endpoint, Method, Headers, Body, _ReporterOptions) -> + ibrowse:send_req(Endpoint, Headers, Method, Body, []). + +compile_filter(OperationId, FilterDef) -> + try + couch_log:info("Compiling filter : ~s", [OperationId]), + ctrace_dsl:compile(OperationId, FilterDef), + true + catch throw:{error, Reason} -> + couch_log:error("Cannot compile ~s :: ~s~n", [OperationId, Reason]), + false + end. + + +get_compiled_filters() -> + lists:foldl(fun({Mod, _Path}, Acc) -> + ModStr = atom_to_list(Mod), + case ModStr of + "ctrace_filter_" ++ OpName -> + [OpName | Acc]; + _ -> + Acc + end + end, [], code:all_loaded()). diff --git a/src/ctrace/src/ctrace_dsl.erl b/src/ctrace/src/ctrace_dsl.erl new file mode 100644 index 000000000..5e0b0f252 --- /dev/null +++ b/src/ctrace/src/ctrace_dsl.erl @@ -0,0 +1,106 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_dsl). +-include_lib("syntax_tools/include/merl.hrl"). + +-export([ + compile/2, + + % Debug + source/2 +]). + + +-type ast() :: erl_syntax:syntaxTree(). + + +-spec compile(OperationId :: string(), FilterDef :: string()) -> ok. +compile(OperationId, FilterDef) -> + AST = parse_filter(OperationId, FilterDef), + merl:compile_and_load(AST), + ok. + + +-spec source(OperationId :: string(), FilterDef :: string()) -> string(). +source(OperationId, FilterDef) -> + AST = parse_filter(OperationId, FilterDef), + Options = [{paper, 160}, {ribbon, 80}], + erl_prettypr:format(erl_syntax:form_list(AST), Options). + + +-spec parse_filter(OperationId :: string(), FilterDef :: string()) -> [ast()]. +parse_filter(OperationId, FilterDef) -> + AST = merl:quote("match" ++ FilterDef ++ "."), + case AST of + ?Q("match(_@Args) when _@__@Guard -> _@Return.") + when erl_syntax:type(Args) == map_expr -> + validate_args(Args), + validate_return(Return), + generate(OperationId, Args, Guard, Return); + ?Q("match(_@Args) when _@__@Guard -> _@@_.") -> + fail("The only argument of the filter should be map"); + ?Q("match(_@@Args) when _@__@Guard -> _@@_.") -> + fail("The arity of the filter function should be 1"); + _ -> + fail("Unknown shape of a filter function") + end. + + +-spec validate_args(MapAST :: ast()) -> ok. +validate_args(MapAST) -> + %% Unfortunatelly merl doesn't seem to support maps + %% so we had to do it manually + lists:foldl(fun(AST, Bindings) -> + erl_syntax:type(AST) == map_field_exact + orelse fail("Only #{field := Var} syntax is supported in the header"), + NameAST = erl_syntax:map_field_exact_name(AST), + erl_syntax:type(NameAST) == atom + orelse fail("Only atoms are supported as field names in the header"), + Name = erl_syntax:atom_value(NameAST), + VarAST = erl_syntax:map_field_exact_value(AST), + erl_syntax:type(VarAST) == variable + orelse fail("Only capitalized names are supported as matching variables in the header"), + Var = erl_syntax:variable_name(VarAST), + maps:is_key(Var, Bindings) + andalso fail("'~s' variable is already in use", [Var]), + Bindings#{Var => Name} + end, #{}, erl_syntax:map_expr_fields(MapAST)). + + +-spec validate_return(Return :: [ast()]) -> ok. +validate_return(Return) -> + case Return of + ?Q("true") -> ok; + ?Q("false") -> ok; + ?Q("_@AST") when erl_syntax:type(AST) == float -> ok; + _ -> + fail("Unsupported return value '~s'", [erl_prettypr:format(Return)]) + end. + + +generate(OperationId, Args, Guard, Return) -> + ModuleName = ctrace_config:filter_module_name(OperationId), + Module = ?Q("-module('@ModuleName@')."), + Export = ?Q("-export([match/1])."), + Function = erl_syntax:function(merl:term(match), [ + ?Q("(_@Args) when _@__@Guard -> _@Return"), + ?Q("(_) -> false") + ]), + lists:flatten([Module, Export, Function]). + + +fail(Msg) -> + throw({error, Msg}). + +fail(Msg, Args) -> + throw({error, lists:flatten(io_lib:format(Msg, Args))}).
\ No newline at end of file diff --git a/src/ctrace/src/ctrace_sup.erl b/src/ctrace/src/ctrace_sup.erl new file mode 100644 index 000000000..70de3c586 --- /dev/null +++ b/src/ctrace/src/ctrace_sup.erl @@ -0,0 +1,41 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_sup). +-behaviour(supervisor). +-vsn(1). + +-export([ + start_link/0, + init/1 +]). + +start_link() -> + ctrace_config:update(), + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + Flags = #{ + strategy => one_for_one, + intensity => 5, + period => 10 + }, + Children = [ + #{ + id => config_listener_mon, + type => worker, + restart => permanent, + shutdown => 5000, + start => {config_listener_mon, start_link, [ctrace_config, nil]} + } + ], + {ok, {Flags, Children}}.
\ No newline at end of file diff --git a/src/ctrace/test/ctrace_config_test.erl b/src/ctrace/test/ctrace_config_test.erl new file mode 100644 index 000000000..0827013fd --- /dev/null +++ b/src/ctrace/test/ctrace_config_test.erl @@ -0,0 +1,153 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_config_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("ctrace/src/ctrace.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +ctrace_config_test_() -> + { + "Test ctrace_config", + { + setup, + fun setup/0, + fun cleanup/1, + [ + ?TDEF(ensure_main_tracer_started), + ?TDEF(ensure_all_supported), + ?TDEF(handle_all_syntax_error_supported), + ?TDEF(ensure_filter_updated), + ?TDEF(ensure_filter_removed), + ?TDEF(ensure_bad_filter_ignored) + ] + } + }. + + +setup() -> + Ctx = test_util:start_couch([ctrace]), + + config_set("tracing", "enabled", "true"), + + Filter = "(#{method := M}) when M == get -> true", + config_set("tracing.filters", "base", Filter), + + ctrace_config:update(), + + Ctx. + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +ensure_main_tracer_started() -> + ?assertMatch( + {ok, _}, + passage_tracer_registry:get_reporter(?MAIN_TRACER) + ). + + +ensure_all_supported() -> + config:delete("tracing.filters", "all", false), + test_util:wait_value(fun() -> + config:get("tracing.filters", "all") + end, undefined), + ctrace_config:update(), + + ?assertEqual(false, ctrace:match(bam, #{gee => whiz})), + + Filter = "(#{}) -> true", + config_set("tracing.filters", "all", Filter), + ctrace_config:update(), + + ?assertEqual(true, ctrace:match(bam, #{gee => whiz})). + + +handle_all_syntax_error_supported() -> + couch_log:error("XKCD: TEST START", []), + config:delete("tracing.filters", "all", false), + test_util:wait_value(fun() -> + config:get("tracing.filters", "all") + end, undefined), + ctrace_config:update(), + + ?assertEqual(false, ctrace:match(bam, #{gee => whiz})), + + Filter = "( -> true.", + config_set("tracing.filters", "all", Filter), + ctrace_config:update(), + + % If there's a syntax in the `all` handler + % then we default to not generating traces + ?assertEqual(false, ctrace:match(bam, #{gee => whiz})), + + couch_log:error("XKCD: TEST END", []), + config:delete("tracing.filters", "all", false). + + +ensure_filter_updated() -> + Filter1 = "(#{}) -> true", + config_set("tracing.filters", "bing", Filter1), + ctrace_config:update(), + + ?assertEqual(true, ctrace:match(bing, #{gee => whiz})), + + Filter2 = "(#{}) -> false", + config_set("tracing.filters", "bing", Filter2), + ctrace_config:update(), + + ?assertEqual(false, ctrace:match(bing, #{gee => whiz})). + + +ensure_filter_removed() -> + Filter = "(#{}) -> true", + config_set("tracing.filters", "bango", Filter), + ctrace_config:update(), + + ?assertEqual(true, ctrace:match(bango, #{gee => whiz})), + + config:delete("tracing.filters", "bango", false), + test_util:wait_value(fun() -> + config:get("tracing.filters", "bango") + end, undefined), + ctrace_config:update(), + + FilterMod = ctrace_config:filter_module_name("bango"), + ?assertEqual(false, code:is_loaded(FilterMod)). + + +ensure_bad_filter_ignored() -> + Filter = "#foo stuff", + config_set("tracing.filters", "compile_error", Filter), + ctrace_config:update(), + + FilterMod = ctrace_config:filter_module_name("compile_error"), + ?assertEqual(false, code:is_loaded(FilterMod)), + + AllMod = ctrace_config:filter_module_name(all), + ?assertMatch({file, _}, code:is_loaded(AllMod)). + + +config_set(Section, Key, Value) -> + PrevValue = config:get(Section, Key), + if Value == PrevValue -> ok; true -> + config:set(Section, Key, Value, false), + test_util:wait_other_value(fun() -> + config:get(Section, Key) + end, PrevValue) + end. diff --git a/src/ctrace/test/ctrace_dsl_test.erl b/src/ctrace/test/ctrace_dsl_test.erl new file mode 100644 index 000000000..601e6cd17 --- /dev/null +++ b/src/ctrace/test/ctrace_dsl_test.erl @@ -0,0 +1,123 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_dsl_test). + + +-include_lib("eunit/include/eunit.hrl"). + + +simple_parse_and_compile_test() -> + Filter = "(#{'http.method' := Method}) when Method == get -> 1.0", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(1.0, run_filter("foo", #{'http.method' => get})), + ?assertEqual(false, run_filter("foo", #{'httpd.method' => put})). + + +empty_map_test() -> + Filter = "(#{}) -> true", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(true, run_filter("foo", #{})), + ?assertEqual(true, run_filter("foo", #{foo => bar})), + ?assertEqual(false, run_filter("foo", nil)). + + +return_false_test() -> + Filter = "(#{}) -> false", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(false, run_filter("foo", #{})), + ?assertEqual(false, run_filter("foo", nil)). + + +return_float_test() -> + Filter = "(#{}) -> 0.2", + ctrace_dsl:compile("foo", Filter), + ?assertEqual(0.2, run_filter("foo", #{})), + ?assertEqual(false, run_filter("foo", nil)). + + +bad_filter_body_is_list_test() -> + Filter = "(#{}) -> []", + Error = "Unsupported return value '[]'", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_filter_body_has_calls_test() -> + Filter = "(#{}) -> [module:function()]", + Error = "Unsupported return value '[module:function()]'", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_arg_list_too_few_test() -> + Filter = "() -> true", + Error = "The arity of the filter function should be 1", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_arg_list_too_many_test() -> + Filter = "(#{}, foo) -> true", + Error = "The arity of the filter function should be 1", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_arg_type_test() -> + Filters = [ + "(atom) -> true", + "([atom]) -> true", + "(1) -> true", + "(1.0) -> true" + ], + Error = "The only argument of the filter should be map", + lists:foreach(fun(Filter) -> + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)) + end, Filters). + + +bad_map_association_test() -> + Filter = "(#{foo => Var}) -> true", + Error = "Only #{field := Var} syntax is supported in the header", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_field_variable_test() -> + Filter = "(#{Var := Val}) -> false", + Error = "Only atoms are supported as field names in the header", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +bad_field_match_test() -> + Filter = "(#{foo := 2}) -> true", + Error = "Only capitalized names are supported" + " as matching variables in the header", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +repeated_variable_test() -> + Filter = "(#{foo := Val, bar := Val}) -> true", + Error = "'Val' variable is already in use", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +code_coverage1_test() -> + Filter = "foo(#{}) -> bar", + Error = "Unknown shape of a filter function", + ?assertThrow({error, Error}, ctrace_dsl:compile("foo", Filter)). + + +code_coverage2_test() -> + Filter = "(#{}) -> true", + ?assertMatch([_ | _], ctrace_dsl:source("foo", Filter)). + + +run_filter(OperationId, Value) -> + ModName = ctrace_config:filter_module_name(OperationId), + ModName:match(Value). diff --git a/src/ctrace/test/ctrace_test.erl b/src/ctrace/test/ctrace_test.erl new file mode 100644 index 000000000..962f9aae3 --- /dev/null +++ b/src/ctrace/test/ctrace_test.erl @@ -0,0 +1,412 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ctrace_test). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("ctrace/src/ctrace.hrl"). + + +-define(TDEF(A), {atom_to_list(A), fun A/0}). + + +ctrace_config_test_() -> + { + "Test ctrace", + { + setup, + fun setup/0, + fun cleanup/1, + [ + ?TDEF(is_enabled_cached), + ?TDEF(simple_with_span), + ?TDEF(with_span_exception), + ?TDEF(simple_start_finish_span), + ?TDEF(op_name_from_fun), + ?TDEF(skipped_when_disabled), + ?TDEF(include_or_skip_on_sampled), + ?TDEF(set_tags_on_start_span), + ?TDEF(set_time_on_start_span), + ?TDEF(skip_on_filtered), + ?TDEF(simple_child_span), + ?TDEF(update_tags), + ?TDEF(update_logs), + ?TDEF(current_span_getters), + ?TDEF(create_external_span), + ?TDEF(use_external_span) + ] + } + }. + + +setup() -> + Ctx = test_util:start_couch([ctrace]), + + config_set("tracing", "enabled", "true"), + + Filter = "(#{}) -> true", + config_set("tracing.filters", "all", Filter), + + ctrace_config:update(), + + MainReporter = passage_tracer_registry:get_reporter(?MAIN_TRACER), + + {MainReporter, Ctx}. + + +cleanup({MainReporter, Ctx}) -> + passage_tracer_registry:set_reporter(?MAIN_TRACER, MainReporter), + test_util:stop_couch(Ctx). + + +is_enabled_cached() -> + erase(?IS_ENABLED_KEY), + Result = ctrace:is_enabled(), + ?assertEqual(Result, get(?IS_ENABLED_KEY)), + ?assert(is_boolean(Result)), + + % Fake override to test that we're using the cached value + put(?IS_ENABLED_KEY, not Result), + ?assertEqual(not Result, ctrace:is_enabled()), + + % Revert to original to not mess with other tests + put(?IS_ENABLED_KEY, Result). + + +simple_with_span() -> + set_self_reporter(), + + Result = ctrace:with_span(zing, fun() -> + a_result + end), + + ?assertEqual(a_result, Result), + + receive + {span, Span} -> + ?assertEqual(zing, passage_span:get_operation_name(Span)) + end. + + +with_span_exception() -> + set_self_reporter(), + + Result = try + ctrace:with_span(zab, fun() -> + throw(foo) + end) + catch T:R -> + {T, R} + end, + + ?assertEqual({throw, foo}, Result), + + receive + {span, Span} -> + ?assertEqual(zab, passage_span:get_operation_name(Span)), + ?assertMatch( + [ + {#{ + 'error.kind' := throw, + event := error, + message := foo, + stack := [_ | _] + }, _TimeStamp} + ], + passage_span:get_logs(Span) + ) + end. + + +simple_start_finish_span() -> + set_self_reporter(), + + ctrace:start_span(foo), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual(foo, passage_span:get_operation_name(Span)) + end. + + +op_name_from_fun() -> + set_self_reporter(), + + ctrace:start_span(fun ctrace:match/2), + ctrace:finish_span(), + + receive + {span, Span} -> + OpName = passage_span:get_operation_name(Span), + ?assertEqual('ctrace:match/2', OpName) + end. + + +skipped_when_disabled() -> + set_self_reporter(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(foo), + ?assert(ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + receive {span, _Span} -> ok end, + + IsEnabled = get(?IS_ENABLED_KEY), + try + put(?IS_ENABLED_KEY, false), + + ?assert(not ctrace:has_span()), + ctrace:start_span(foo), + ?assert(not ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()) + after + put(?IS_ENABLED_KEY, IsEnabled) + end. + + +set_tags_on_start_span() -> + set_self_reporter(), + + Tags = #{foo => bar}, + ctrace:start_span(bang, [{tags, Tags}]), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual(bang, passage_span:get_operation_name(Span)), + ?assertEqual(#{foo => bar}, passage_span:get_tags(Span)) + end. + + +set_time_on_start_span() -> + set_self_reporter(), + + Time = os:timestamp(), + timer:sleep(100), + ctrace:start_span(bang, [{time, Time}]), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual(Time, passage_span:get_start_time(Span)) + end. + + +skip_on_filtered() -> + set_self_reporter(), + + config_set("tracing.filters", "do_skip", "(#{}) -> false"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(do_skip), + ?assert(not ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()). + + +include_or_skip_on_sampled() -> + set_self_reporter(), + + config_set("tracing.filters", "sample", "(#{}) -> 0.0"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(sample), + ?assert(not ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + config_set("tracing.filters", "sample", "(#{}) -> 1.0"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(sample), + ?assert(ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + receive + {span, Span1} -> + ?assertEqual(sample, passage_span:get_operation_name(Span1)) + end, + + config_set("tracing.filters", "sample", "(#{}) -> 0.5"), + ctrace_config:update(), + + ?assert(not ctrace:has_span()), + ctrace:start_span(sample), + IsSampled = ctrace:has_span(), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + if not IsSampled -> ok; true -> + receive + {span, Span2} -> + ?assertEqual( + sample, + passage_span:get_operation_name(Span2) + ) + end + end. + + +simple_child_span() -> + set_self_reporter(), + + ctrace:start_span(parent), + ctrace:start_span(child), + ctrace:finish_span(), + ctrace:finish_span(), + + receive + {span, CSpan} -> + ?assertEqual(child, passage_span:get_operation_name(CSpan)) + end, + + receive + {span, PSpan} -> + ?assertEqual(parent, passage_span:get_operation_name(PSpan)) + end. + + +update_tags() -> + set_self_reporter(), + + ctrace:start_span(foo, [{tags, #{foo => bar}}]), + ctrace:tag(#{bango => bongo}), + ctrace:finish_span(), + + receive + {span, Span} -> + ?assertEqual( + #{foo => bar, bango => bongo}, + passage_span:get_tags(Span) + ) + end. + + +update_logs() -> + set_self_reporter(), + + ctrace:start_span(foo), + ctrace:log(#{foo => bar}), + ctrace:finish_span(), + + receive + {span, Span1} -> + ?assertMatch( + [{#{foo := bar}, _TimeStamp}], + passage_span:get_logs(Span1) + ) + end, + + ctrace:start_span(foo), + ctrace:log(fun() -> + #{foo => baz} + end), + ctrace:finish_span(), + + receive + {span, Span2} -> + ?assertMatch( + [{#{foo := baz}, _TimeStamp}], + passage_span:get_logs(Span2) + ) + end. + + +current_span_getters() -> + ?assertEqual(false, ctrace:has_span()), + ?assertEqual(undefined, ctrace:tags()), + ?assertEqual(undefined, ctrace:refs()), + ?assertEqual(undefined, ctrace:operation_name()), + ?assertEqual(undefined, ctrace:trace_id()), + ?assertEqual(undefined, ctrace:span_id()), + ?assertEqual(undefined, ctrace:tracer()), + ?assertEqual(undefined, ctrace:context()), + + ctrace:start_span(parent), + ctrace:start_span(child, [{tags, #{foo => oof}}]), + + ?assertEqual(true, ctrace:has_span()), + ?assertEqual(#{foo => oof, origin => <<"parent">>}, ctrace:tags()), + ?assertMatch([{child_of, _} | _], ctrace:refs()), + ?assertEqual(child, ctrace:operation_name()), + ?assert(is_integer(ctrace:trace_id())), + ?assert(is_integer(ctrace:span_id())), + ?assertEqual(?MAIN_TRACER, ctrace:tracer()), + ?assertNotEqual(undefined, ctrace:context()), + + ctrace:finish_span(), + ctrace:finish_span(), + + receive + {span, CSpan} -> + ?assertEqual(child, passage_span:get_operation_name(CSpan)) + end, + + receive + {span, PSpan} -> + ?assertEqual(parent, passage_span:get_operation_name(PSpan)) + end. + + +create_external_span() -> + Span1 = ctrace:external_span(1, 2, 3), + Ctx1 = passage_span:get_context(Span1), + ?assertEqual(1, jaeger_passage_span_context:get_trace_id(Ctx1)), + ?assertEqual(2, jaeger_passage_span_context:get_span_id(Ctx1)), + + Span2 = ctrace:external_span(42, undefined, undefined), + Ctx2 = passage_span:get_context(Span2), + ?assertEqual(42, jaeger_passage_span_context:get_trace_id(Ctx2)), + ?assert(is_integer(jaeger_passage_span_context:get_span_id(Ctx2))). + + +use_external_span() -> + Parent = ctrace:external_span(1, 2, 3), + + ?assert(not ctrace:has_span()), + ctrace:start_span(foo, [{root, Parent}]), + ?assert(ctrace:has_span()), + ctrace:finish_span(), + ?assert(not ctrace:has_span()), + + receive + {span, Span} -> + Ctx = passage_span:get_context(Span), + TraceId = jaeger_passage_span_context:get_trace_id(Ctx), + ?assertEqual(1, TraceId) + end. + + +config_set(Section, Key, Value) -> + PrevValue = config:get(Section, Key), + if Value == PrevValue -> ok; true -> + config:set(Section, Key, Value, false), + test_util:wait_other_value(fun() -> + config:get(Section, Key) + end, PrevValue) + end. + + +set_self_reporter() -> + SelfReporter = passage_reporter_process:new(self(), span), + passage_tracer_registry:set_reporter(?MAIN_TRACER, SelfReporter), + test_util:wait_value(fun() -> + {ok, Result} = passage_tracer_registry:get_reporter(?MAIN_TRACER), + Result + end, SelfReporter).
\ No newline at end of file diff --git a/src/ddoc_cache/src/ddoc_cache_entry.erl b/src/ddoc_cache/src/ddoc_cache_entry.erl index 4cc3d7e52..ed0311bbd 100644 --- a/src/ddoc_cache/src/ddoc_cache_entry.erl +++ b/src/ddoc_cache/src/ddoc_cache_entry.erl @@ -34,7 +34,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -282,6 +283,24 @@ code_change(_, St, _) -> {ok, St}. +format_status(_Opt, [_PDict, State]) -> + #st{ + key = Key, + val = Val, + opener = Opener, + waiters = Waiters, + ts = TS, + accessed = Accepted + } = State, + [{data, [{"State", [ + {key, Key}, + {val, Val}, + {opener, Opener}, + {waiters, {length, length(Waiters)}}, + {ts, TS}, + {accessed, Accepted} + ]}]}]. + spawn_opener(Key) -> {Pid, _} = erlang:spawn_monitor(?MODULE, do_open, [Key]), Pid. diff --git a/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl b/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl index 5248469fb..7c3dc6787 100644 --- a/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl +++ b/src/ddoc_cache/src/ddoc_cache_entry_ddocid.erl @@ -33,7 +33,7 @@ ddocid({_, DDocId}) -> recover({DbName, DDocId}) -> - fabric:open_doc(DbName, DDocId, [ejson_body, ?ADMIN_CTX]). + fabric2_db:open_doc(DbName, DDocId, [ejson_body, ?ADMIN_CTX]). insert({DbName, DDocId}, {ok, #doc{revs = Revs} = DDoc}) -> diff --git a/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl b/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl index 868fa7789..38445af96 100644 --- a/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl +++ b/src/ddoc_cache/src/ddoc_cache_entry_ddocid_rev.erl @@ -34,7 +34,7 @@ ddocid({_, DDocId, _}) -> recover({DbName, DDocId, Rev}) -> Opts = [ejson_body, ?ADMIN_CTX], - {ok, [Resp]} = fabric:open_revs(DbName, DDocId, [Rev], Opts), + {ok, [Resp]} = fabric2_db:open_doc_revs(DbName, DDocId, [Rev], Opts), Resp. diff --git a/src/dreyfus/src/dreyfus_fabric_cleanup.erl b/src/dreyfus/src/dreyfus_fabric_cleanup.erl index 2840a2f2d..681712748 100644 --- a/src/dreyfus/src/dreyfus_fabric_cleanup.erl +++ b/src/dreyfus/src/dreyfus_fabric_cleanup.erl @@ -30,12 +30,16 @@ go(DbName) -> ok. active_sigs(#doc{body={Fields}}=Doc) -> - {RawIndexes} = couch_util:get_value(<<"indexes">>, Fields, {[]}), - {IndexNames, _} = lists:unzip(RawIndexes), - [begin - {ok, Index} = dreyfus_index:design_doc_to_index(Doc, IndexName), - Index#index.sig - end || IndexName <- IndexNames]. + try + {RawIndexes} = couch_util:get_value(<<"indexes">>, Fields, {[]}), + {IndexNames, _} = lists:unzip(RawIndexes), + [begin + {ok, Index} = dreyfus_index:design_doc_to_index(Doc, IndexName), + Index#index.sig + end || IndexName <- IndexNames] + catch error:{badmatch, _Error} -> + [] + end. cleanup_local_purge_doc(DbName, ActiveSigs) -> {ok, BaseDir} = clouseau_rpc:get_root_dir(), diff --git a/src/dreyfus/src/dreyfus_httpd.erl b/src/dreyfus/src/dreyfus_httpd.erl index 346f5ede6..007dace8f 100644 --- a/src/dreyfus/src/dreyfus_httpd.erl +++ b/src/dreyfus/src/dreyfus_httpd.erl @@ -239,6 +239,8 @@ validate_index_query(counts, Value, Args) -> Args#index_query_args{counts=Value}; validate_index_query(ranges, Value, Args) -> Args#index_query_args{ranges=Value}; +validate_index_query(drilldown, [[_|_]|_] = Value, Args) -> + Args#index_query_args{drilldown=Value}; validate_index_query(drilldown, Value, Args) -> DrillDown = Args#index_query_args.drilldown, Args#index_query_args{drilldown=[Value|DrillDown]}; @@ -447,10 +449,15 @@ validate_search_restrictions(Db, DDoc, Args) -> q = Query, partition = Partition, grouping = Grouping, - limit = Limit + limit = Limit, + counts = Counts, + drilldown = Drilldown, + ranges = Ranges } = Args, #grouping{ - by = GroupBy + by = GroupBy, + limit = GroupLimit, + sort = GroupSort } = Grouping, case Query of @@ -496,9 +503,18 @@ validate_search_restrictions(Db, DDoc, Args) -> parse_non_negative_int_param("limit", Limit, "max_limit", MaxLimit) end, - case GroupBy /= nil andalso is_binary(Partition) of + DefaultArgs = #index_query_args{}, + + case is_binary(Partition) andalso ( + Counts /= DefaultArgs#index_query_args.counts + orelse Drilldown /= DefaultArgs#index_query_args.drilldown + orelse Ranges /= DefaultArgs#index_query_args.ranges + orelse GroupSort /= DefaultArgs#index_query_args.grouping#grouping.sort + orelse GroupBy /= DefaultArgs#index_query_args.grouping#grouping.by + orelse GroupLimit /= DefaultArgs#index_query_args.grouping#grouping.limit + ) of true -> - Msg5 = <<"`group_by` and `partition` are incompatible">>, + Msg5 = <<"`partition` and any of `drilldown`, `ranges`, `group_field`, `group_sort`, `group_limit` or `group_by` are incompatible">>, throw({bad_request, Msg5}); false -> ok diff --git a/src/dreyfus/src/dreyfus_index.erl b/src/dreyfus/src/dreyfus_index.erl index 2bf560f37..7236eb16b 100644 --- a/src/dreyfus/src/dreyfus_index.erl +++ b/src/dreyfus/src/dreyfus_index.erl @@ -29,7 +29,7 @@ % gen_server api. -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3]). + code_change/3, format_status/2]). % private definitions. -record(state, { @@ -244,6 +244,30 @@ terminate(_Reason, _State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. +format_status(_Opt, [_PDict, #state{index = #index{} = Index} = State]) -> + #index{ + ddoc_id=Id, + name=IndexName, + sig=Sig + } = Index, + IndexScrubbed = [{ + {ddoc_id, Id}, + {name, IndexName}, + {sig, Sig} + }], + Scrubbed = State#state{ + index = IndexScrubbed, + waiting_list = {length, length(State#state.waiting_list)} + }, + ?record_to_keyval(state, Scrubbed); + +format_status(_Opt, [_PDict, #state{} = State]) -> + Scrubbed = State#state{ + index = nil, + waiting_list = {length, length(State#state.waiting_list)} + }, + ?record_to_keyval(state, Scrubbed). + % private functions. open_index(DbName, #index{analyzer=Analyzer, sig=Sig}) -> diff --git a/src/dreyfus/test/elixir/test/partition_search_test.exs b/src/dreyfus/test/elixir/test/partition_search_test.exs index 19a915ad3..121995449 100644 --- a/src/dreyfus/test/elixir/test/partition_search_test.exs +++ b/src/dreyfus/test/elixir/test/partition_search_test.exs @@ -21,7 +21,7 @@ defmodule PartitionSearchTest do } end - resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:docs => docs}, query: %{w: 3}) + resp = Couch.post("/#{db_name}/_bulk_docs", headers: ["Content-Type": "application/json"], body: %{:docs => docs}, query: %{w: 3}) assert resp.status_code in [201, 202] end @@ -166,7 +166,7 @@ defmodule PartitionSearchTest do resp = Couch.get(url, query: %{q: "some:field"}) assert resp.status_code == 200 ids = get_ids(resp) - assert ids == ["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"] + assert Enum.sort(ids) == Enum.sort(["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"]) end @tag :with_db @@ -179,7 +179,7 @@ defmodule PartitionSearchTest do resp = Couch.get(url, query: %{q: "some:field"}) assert resp.status_code == 200 ids = get_ids(resp) - assert ids == ["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"] + assert Enum.sort(ids) == Enum.sort(["bar:1", "bar:5", "bar:9", "foo:2", "bar:3", "foo:4", "foo:6", "bar:7", "foo:8", "foo:10"]) end @tag :with_db @@ -192,7 +192,7 @@ defmodule PartitionSearchTest do resp = Couch.get(url, query: %{q: "some:field", limit: 3}) assert resp.status_code == 200 ids = get_ids(resp) - assert ids == ["bar:1", "bar:5", "bar:9"] + assert Enum.sort(ids) == Enum.sort(["bar:1", "bar:5", "bar:9"]) end @tag :with_db @@ -216,4 +216,32 @@ defmodule PartitionSearchTest do resp = Couch.post(url, body: %{q: "some:field", partition: "bar"}) assert resp.status_code == 400 end + + @tag :with_partitioned_db + test "restricted parameters are not allowed in query or body", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + body = %{q: "some:field", partition: "foo"} + + Enum.each( + [ + {:counts, "[\"type\"]"}, + {:group_field, "some"}, + {:ranges, :jiffy.encode(%{price: %{cheap: "[0 TO 100]"}})}, + {:drilldown, "[\"key\",\"a\"]"}, + ], + fn {key, value} -> + url = "/#{db_name}/_partition/foo/_design/library/_search/books" + bannedparam = Map.put(body, key, value) + get_resp = Couch.get(url, query: bannedparam) + %{:body => %{"reason" => get_reason}} = get_resp + assert Regex.match?(~r/are incompatible/, get_reason) + post_resp = Couch.post(url, body: bannedparam) + %{:body => %{"reason" => post_reason}} = post_resp + assert Regex.match?(~r/are incompatible/, post_reason) + end + ) + end end diff --git a/src/dreyfus/test/elixir/test/search_test.exs b/src/dreyfus/test/elixir/test/search_test.exs new file mode 100644 index 000000000..829b3395f --- /dev/null +++ b/src/dreyfus/test/elixir/test/search_test.exs @@ -0,0 +1,226 @@ +defmodule SearchTest do + use CouchTestCase + + @moduletag :search + + @moduledoc """ + Test search + """ + + def create_search_docs(db_name) do + resp = Couch.post("/#{db_name}/_bulk_docs", + headers: ["Content-Type": "application/json"], + body: %{:docs => [ + %{"item" => "apple", "place" => "kitchen", "state" => "new"}, + %{"item" => "banana", "place" => "kitchen", "state" => "new"}, + %{"item" => "carrot", "place" => "kitchen", "state" => "old"}, + %{"item" => "date", "place" => "lobby", "state" => "unknown"}, + ]} + ) + assert resp.status_code in [201, 202] + end + + def create_ddoc(db_name, opts \\ %{}) do + default_ddoc = %{ + indexes: %{ + fruits: %{ + analyzer: %{name: "standard"}, + index: "function (doc) {\n index(\"item\", doc.item, {facet: true});\n index(\"place\", doc.place, {facet: true});\n index(\"state\", doc.state, {facet: true});\n}" + } + } + } + + ddoc = Enum.into(opts, default_ddoc) + + resp = Couch.put("/#{db_name}/_design/inventory", body: ddoc) + assert resp.status_code in [201, 202] + assert Map.has_key?(resp.body, "ok") == true + end + + def create_invalid_ddoc(db_name, opts \\ %{}) do + invalid_ddoc = %{ + :indexes => [ + %{"name" => "foo", "ddoc" => "bar", "type" => "text"}, + ] + } + + ddoc = Enum.into(opts, invalid_ddoc) + + resp = Couch.put("/#{db_name}/_design/search", body: ddoc) + assert resp.status_code in [201, 202] + assert Map.has_key?(resp.body, "ok") == true + end + + def get_items (resp) do + %{:body => %{"rows" => rows}} = resp + Enum.map(rows, fn row -> row["doc"]["item"] end) + end + + @tag :with_db + test "search returns all items for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot", "date"]) + end + + @tag :with_db + test "drilldown single key single value for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", drilldown: :jiffy.encode(["place", "kitchen"]), include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot"]) + end + + @tag :with_db + test "drilldown single key multiple values for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", drilldown: :jiffy.encode(["state", "new", "unknown"]), include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "date"]) + end + + @tag :with_db + test "drilldown multiple keys single values for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.get(url, query: %{q: "*:*", drilldown: :jiffy.encode([["state", "old"], ["item", "apple"]]), include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == [] + end + + @tag :with_db + test "drilldown multiple query definitions for GET", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits?q=*:*&drilldown=[\"state\",\"old\"]&drilldown=[\"item\",\"apple\"]&include_docs=true" + resp = Couch.get(url) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == [] + end + + + @tag :with_db + test "search returns all items for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot", "date"]) + end + + @tag :with_db + test "drilldown single key single value for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{query: "*:*", drilldown: ["place", "kitchen"], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "carrot"]) + end + + @tag :with_db + test "drilldown single key multiple values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{query: "*:*", drilldown: ["state", "new", "unknown"], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == Enum.sort(["apple", "banana", "date"]) + end + + @tag :with_db + test "drilldown multiple keys single values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", drilldown: [["state", "old"], ["item", "apple"]], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == [] + end + + @tag :with_db + test "drilldown three keys single values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", drilldown: [["place", "kitchen"], ["state", "new"], ["item", "apple"]], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == ["apple"] + end + + @tag :with_db + test "drilldown multiple keys multiple values for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: %{q: "*:*", drilldown: [["state", "old", "new"], ["item", "apple"]], include_docs: true}) + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == ["apple"] + end + + @tag :with_db + test "drilldown multiple query definitions for POST", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + + url = "/#{db_name}/_design/inventory/_search/fruits" + resp = Couch.post(url, body: "{\"include_docs\": true, \"q\": \"*:*\", \"drilldown\": [\"state\", \"old\"], \"drilldown\": [\"item\", \"apple\"]}") + assert resp.status_code == 200 + ids = get_items(resp) + assert Enum.sort(ids) == ["apple"] + end + + @tag :with_db + test "clean up search index with invalid design document", context do + db_name = context[:db_name] + create_search_docs(db_name) + create_ddoc(db_name) + create_invalid_ddoc(db_name) + + resp = Couch.post("/#{db_name}/_search_cleanup") + assert resp.status_code in [201, 202] + end +end diff --git a/src/ebtree/.gitignore b/src/ebtree/.gitignore new file mode 100644 index 000000000..04f4f25d7 --- /dev/null +++ b/src/ebtree/.gitignore @@ -0,0 +1,3 @@ +.erlfdb/ +_build/ +rebar.lock diff --git a/src/ebtree/README.md b/src/ebtree/README.md new file mode 100644 index 000000000..9ce79a0c6 --- /dev/null +++ b/src/ebtree/README.md @@ -0,0 +1,19 @@ +A B+Tree (all values stored in leaves) with configurable order, where +all data is stored in FoundationDB. + +The tree is balanced at all times. A bidirectional linked list is +maintained between leaf nodes for efficient range queries in either +direction. You can pass in an fdb Db or open Tx, the latter is vastly +more efficient for multiple inserts, so batch if you can. + +A reduction function can be specified, the B+Tree calculates and stores +intermediate reduction values on the inner nodes for performance. + +The FoundationDB keys start with a user defined prefix and the opaque +node id. + +TODO + +1. Rewrite inner node ids (non-root, non-leaf) so we can safely cache + them outside of a transaction. (see "immutable" branch) +2. Chunkify large values over multiple rows? diff --git a/src/ebtree/rebar.config b/src/ebtree/rebar.config new file mode 100644 index 000000000..edf6725c8 --- /dev/null +++ b/src/ebtree/rebar.config @@ -0,0 +1,17 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{erl_opts, [debug_info]}. +{cover_enabled, true}. +{deps, [ + {erlfdb, ".*", {git, "https://github.com/apache/couchdb-erlfdb", {tag, "v1.2.2"}}} +]}. diff --git a/src/ebtree/src/ebtree.app.src b/src/ebtree/src/ebtree.app.src new file mode 100644 index 000000000..d4966f6a5 --- /dev/null +++ b/src/ebtree/src/ebtree.app.src @@ -0,0 +1,27 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, ebtree, + [{description, "An OTP library"}, + {vsn, git}, + {registered, []}, + {applications, + [kernel, + stdlib, + erlfdb + ]}, + {env,[]}, + {modules, []}, + + {licenses, ["Apache 2.0"]}, + {links, []} + ]}. diff --git a/src/ebtree/src/ebtree.erl b/src/ebtree/src/ebtree.erl new file mode 100644 index 000000000..97a820304 --- /dev/null +++ b/src/ebtree/src/ebtree.erl @@ -0,0 +1,1819 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ebtree). + +-export([ + open/3, + open/4, + min/0, + max/0, + insert/4, + insert_multi/3, + delete/3, + lookup/3, + lookup_multi/3, + range/6, + reverse_range/6, + fold/4, + fold/5, + reduce/4, + reduce/5, + full_reduce/2, + group_reduce/7, + group_reduce/8, + validate_tree/2 +]). + +-record(node, { + id, + level = 0, + prev, + next, + members = [] %% [{Key0, Value0} | {FirstKey0, LastKey0, Pointer0, Reduction0}, ...] +}). + +-record(tree, { + prefix, + min, + max, + collate_fun, + reduce_fun, + encode_fun, + persist_fun, + cache_fun +}). + +-define(META, 0). +-define(META_ORDER, 0). + +-define(NODE, 1). +-define(NODE_ROOT_ID, <<0>>). + +-define(underflow(Tree, Node), Tree#tree.min > length(Node#node.members)). +-define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). +-define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). + +-ifdef(TEST). +-define(validate_node(Tree, Node), validate_node(Tree, Node)). +-else. +-define(validate_node(Tree, Node), ignore). +-endif. + +%% two special 1-bit bitstrings that cannot appear in valid keys. +-define(MIN, <<0:1>>). +-define(MAX, <<1:1>>). + + +%% @equiv open(Db, Prefix, Order, []) +-spec open(term(), binary(), pos_integer()) -> #tree{}. +open(Db, Prefix, Order) -> + open(Db, Prefix, Order, []). + + +%% @doc Open a new ebtree, initialising it if doesn't already exist. +%% @param Db An erlfdb database or transaction. +%% @param Prefix The key prefix applied to all ebtree keys. +%% @param Order The maximum number of items allowed in an ebtree node (must be an even number). Ignored +%% if ebtree is already initialised. +%% @param Options Supported options are {reduce_fun, Fun} and {collate_fun, Fun}. +%% @returns A data structure representing the ebtree, to be passed to all other functions. +-spec open(term(), binary(), pos_integer(), list()) -> #tree{}. +open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> + ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), + CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), + EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/3), + PersistFun = proplists:get_value(persist_fun, Options, fun simple_persist/3), + CacheFun = proplists:get_value(cache_fun, Options, fun cache_noop/2), + + Tree = #tree{ + prefix = Prefix, + reduce_fun = ReduceFun, + collate_fun = CollateFun, + encode_fun = EncodeFun, + persist_fun = PersistFun, + cache_fun = CacheFun + }, + + erlfdb:transactional(Db, fun(Tx) -> + case get_meta(Tx, Tree, ?META_ORDER) of + not_found -> + erlfdb:clear_range_startswith(Tx, Prefix), + set_meta(Tx, Tree, ?META_ORDER, Order), + set_node(Tx, Tree, #node{id = ?NODE_ROOT_ID}), + init_order(Tree, Order); + ActualOrder when is_integer(ActualOrder) -> + init_order(Tree, ActualOrder) + end + end). + + +%% @doc a special value guaranteed to be smaller than any value in an ebtree. +min() -> + ?MIN. + + +%% @doc a special value guaranteed to be larger than any value in an ebtree. +max() -> + ?MAX. + +%% @doc Lookup a specific key in the ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @param Key the key to lookup +%% @returns A key-value tuple if found, false if not present in the ebtree. +-spec lookup(Db :: term(), Tree :: #tree{}, Key :: term()) -> + {Key :: term(), Value :: term()} | false. +lookup(Db, #tree{} = Tree, Key) -> + Fun = fun + ({visit, K, V}, _Acc) when K =:= Key -> + {stop, {K, V}}; + ({visit, K, _V}, Acc) -> + case collate(Tree, K, Key, [gt]) of + true -> + {stop, Acc}; + false -> + {ok, Acc} + end; + ({traverse, F, L, _R}, Acc) -> + case {collate(Tree, F, Key, [gt]), collate(Tree, Key, L, [lt, eq])} of + {true, _} -> + {stop, Acc}; + {false, true} -> + {ok, Acc}; + {false, false} -> + {skip, Acc} + end + end, + fold(Db, Tree, Fun, false, []). + + +%% @doc Lookup a list of keys in the ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @param Keys the list of keys to lookup +%% @returns A list containing key/value tuples for keys that were found +-spec lookup_multi(Db :: term(), Tree :: #tree{}, Key :: [term()]) -> + [{Key :: term(), Value :: term()}]. +lookup_multi(Db, #tree{} = Tree, Keys) -> + FoldFun = fun lookup_multi_fold/2, + Acc = {Tree, sort_keys(Tree, Keys), []}, + {_, _, FoundKeys} = fold(Db, Tree, FoldFun, Acc, []), + FoundKeys. + + +lookup_multi_fold(_, {_, [], _} = Acc) -> + % No more keys to find + {stop, Acc}; + +lookup_multi_fold({visit, Key1, Value}, {Tree, [Key2 | Rest], Acc}) -> + {NewKeys, NewAcc} = case collate(Tree, Key1, Key2) of + lt -> + % Still looking for the next user key + {[Key2 | Rest], Acc}; + eq -> + % Found a requested key + {Rest, [{Key2, Value} | Acc]}; + gt -> + % The user key wasn't found so we drop it + {Rest, Acc} + end, + {ok, {Tree, NewKeys, NewAcc}}; + +lookup_multi_fold({traverse, FKey, LKey, R}, {Tree, [UKey | Rest], Acc}) -> + case collate(Tree, FKey, UKey, [gt]) of + true -> + % We've passed by our first user key + lookup_multi_fold({traverse, FKey, LKey, R}, {Tree, Rest, Acc}); + false -> + case collate(Tree, UKey, LKey, [lt, eq]) of + true -> + % Key might be in this range + {ok, {Tree, [UKey | Rest], Acc}}; + false -> + % Next key is not in range + {skip, {Tree, [UKey | Rest], Acc}} + end + end. + + +%% @equiv fold(Db, Tree, Fun, Acc, []) +fold(Db, #tree{} = Tree, Fun, Acc) -> + fold(Db, Tree, Fun, Acc, []). + + +%% @doc Custom traversal of the ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @param Fun A callback function as nodes are loaded that directs the traversal. +%% @param Acc The initial accumulator. +%% @param Options Options that control how the fold is executed. +%% @returns the final accumulator. + +-type fold_args() :: + {visit, Key :: term(), Value :: term()} | + {traverse, First :: term(), Last :: term(), Reduction :: term()}. + +-type fold_option() :: [{dir, fwd | rev}]. + +-spec fold(Db, Tree, Fun, Acc0, Options) -> Acc1 when + Db :: term(), + Tree :: #tree{}, + Fun :: fun((fold_args(), Acc0) -> {ok | skip | stop, Acc1}), + Acc0 :: term(), + Options :: [fold_option()], + Acc1 :: term(). +fold(Db, #tree{} = Tree, Fun, Acc, Options) -> + {_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> + Root = get_node(Tx, Tree, ?NODE_ROOT_ID), + fold(Db, Tree, Root, Fun, Acc, Options) + end), + Reduce. + + +fold(Db, #tree{} = Tree, #node{} = Node, Fun, Acc, Options) -> + Dir = proplists:get_value(dir, Options, fwd), + Members = case Dir of + fwd -> Node#node.members; + rev -> lists:reverse(Node#node.members) + end, + fold(Db, #tree{} = Tree, Members, Fun, Acc, Options); + + +fold(_Db, #tree{} = _Tree, [], _Fun, Acc, _Options) -> + {ok, Acc}; + +fold(Db, #tree{} = Tree, [{K, V} | Rest], Fun, Acc0, Options) -> + case Fun({visit, K, V}, Acc0) of + {ok, Acc1} -> + fold(Db, Tree, Rest, Fun, Acc1, Options); + {stop, Acc1} -> + {stop, Acc1} + end; + +fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0, Options) -> + case Fun({traverse, F, L, R}, Acc0) of + {ok, Acc1} -> + Node = get_node(Db, Tree, P), + case fold(Db, Tree, Node, Fun, Acc1, Options) of + {ok, Acc2} -> + fold(Db, Tree, Rest, Fun, Acc2, Options); + {stop, Acc2} -> + {stop, Acc2} + end; + {skip, Acc1} -> + fold(Db, Tree, Rest, Fun, Acc1, Options); + {stop, Acc1} -> + {stop, Acc1} + end. + + +%% @doc Calculate the final reduce value for the whole ebtree. +%% @param Db An erlfdb database or transaction. +%% @param Tree the ebtree. +%% @returns the final reduce value +-spec full_reduce(Db :: term(), Tree :: #tree{}) -> term(). +full_reduce(Db, #tree{} = Tree) -> + Fun = fun + ({visit, K, V}, {MapAcc, ReduceAcc}) -> + {ok, {[{K, V} | MapAcc], ReduceAcc}}; + ({traverse, _F, _L, R}, {MapAcc, ReduceAcc}) -> + {skip, {MapAcc, [R | ReduceAcc]}} + end, + {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}, []), + do_reduce(Tree, MapValues, ReduceValues). + + +%% @equiv reduce(Db, Tree, StartKey, EndKey, []) +-spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term()) -> term(). +reduce(Db, #tree{} = Tree, StartKey, EndKey) -> + reduce(Db, Tree, StartKey, EndKey, []). + +%% @doc Calculate the reduce value for all keys in the specified range. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @returns the reduce value for the specified range +-spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), + EndKey :: term(), Options :: [reduce_option()]) -> term(). +reduce(Db, #tree{} = Tree, StartKey, EndKey, Options) -> + InclusiveStart = proplists:get_value(inclusive_start, Options, true), + InclusiveEnd = proplists:get_value(inclusive_end, Options, true), + + Fun = fun + ({visit, Key, Value}, {MapAcc, ReduceAcc}) -> + BeforeStart = collate(Tree, Key, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, Key, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + InRange = collate(Tree, Key, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) + andalso collate(Tree, Key, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), + if + BeforeStart -> + {ok, {MapAcc, ReduceAcc}}; + AfterEnd -> + {stop, {MapAcc, ReduceAcc}}; + InRange -> + {ok, {[{Key, Value} | MapAcc], ReduceAcc}} + end; + ({traverse, FirstKey, LastKey, Reduction}, {MapAcc, ReduceAcc}) -> + BeforeStart = collate(Tree, LastKey, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + Whole = collate(Tree, FirstKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) + andalso collate(Tree, LastKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), + if + BeforeStart -> + {skip, {MapAcc, ReduceAcc}}; + AfterEnd -> + {stop, {MapAcc, ReduceAcc}}; + Whole -> + {skip, {MapAcc, [Reduction | ReduceAcc]}}; + true -> + {ok, {MapAcc, ReduceAcc}} + end + end, + {MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}, []), + do_reduce(Tree, MapValues, ReduceValues). + + +do_reduce(#tree{} = Tree, [], []) -> + reduce_values(Tree, [], false); + +do_reduce(#tree{} = Tree, [], ReduceValues) when is_list(ReduceValues) -> + reduce_values(Tree, ReduceValues, true); + +do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_list(ReduceValues) -> + do_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). + + +%% @equiv group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []) +-spec group_reduce( + Db :: term(), + Tree :: #tree{}, + StartKey :: term(), + EndKey :: term(), + GroupKeyFun :: fun((term()) -> group_key()), + UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), + UserAcc0 :: term()) -> Acc1 :: term(). +group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> + group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []). + + +%% @doc Calculate the reduce value for all groups in the specified range. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @param GroupKeyFun A function that takes a key as a parameter and returns the group key. +%% @param UserAccFun A function called when a new group reduction is calculated and returns an acc. +%% @param UserAcc0 The initial accumulator. +%% @param Options Currently supported options are {dir, fwd | rev} +%% and {inclusive_start | inclusive_end, true | false} +%% @returns the final accumulator. +-type group_key() :: term(). + +-type reduce_option() :: [{inclusive_start, boolean()} | {inclusive_end, boolean()}]. + +-spec group_reduce( + Db :: term(), + Tree :: #tree{}, + StartKey :: term(), + EndKey :: term(), + GroupKeyFun :: fun((term()) -> group_key()), + UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), + UserAcc0 :: term(), + Options :: [fold_option() | reduce_option()]) -> Acc1 :: term(). +group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, Options) -> + Dir = proplists:get_value(dir, Options, fwd), + InclusiveStart = proplists:get_value(inclusive_start, Options, true), + InclusiveEnd = proplists:get_value(inclusive_end, Options, true), + NoGroupYet = ?MIN, + Fun = fun + ({visit, Key, Value}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> + BeforeStart = collate(Tree, Key, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, Key, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + InRange = + collate(Tree, Key, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso + collate(Tree, Key, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), + KeyGroup = GroupKeyFun(Key), + SameGroup = collate(Tree, CurrentGroup, KeyGroup, [eq]), + if + Dir == fwd andalso BeforeStart -> + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso AfterEnd -> + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == fwd andalso AfterEnd -> + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso BeforeStart -> + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + SameGroup -> + {ok, {CurrentGroup, UserAcc, [{Key, Value} | MapAcc], ReduceAcc}}; + InRange andalso CurrentGroup =:= NoGroupYet -> + {ok, {KeyGroup, UserAcc, [{Key, Value}], []}}; + InRange -> + %% implicit end of current group and start of a new one + GroupValue = do_reduce(Tree, MapAcc, ReduceAcc), + {ok, {KeyGroup, UserAccFun({CurrentGroup, GroupValue}, UserAcc), [{Key, Value}], []}} + end; + ({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> + BeforeStart = collate(Tree, LastKey, StartKey, if InclusiveStart -> [lt]; true -> [lt, eq] end), + AfterEnd = collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [gt]; true -> [gt, eq] end), + Whole = + collate(Tree, CurrentGroup, GroupKeyFun(FirstKey), [eq]) andalso + collate(Tree, CurrentGroup, GroupKeyFun(LastKey), [eq]), + FirstInRange = + collate(Tree, FirstKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso + collate(Tree, FirstKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), + LastInRange = + collate(Tree, LastKey, StartKey, if InclusiveStart -> [gt, eq]; true -> [gt] end) andalso + collate(Tree, LastKey, EndKey, if InclusiveEnd -> [lt, eq]; true -> [lt] end), + if + Dir == fwd andalso BeforeStart -> + {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso AfterEnd -> + {skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == fwd andalso AfterEnd -> + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Dir == rev andalso BeforeStart -> + {stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; + Whole andalso FirstInRange andalso LastInRange -> + {skip, {CurrentGroup, UserAcc, MapAcc, [Reduction | ReduceAcc]}}; + true -> + {ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} + end + end, + {CurrentGroup, UserAcc1, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, UserAcc0, [], []}, Options), + if + MapValues /= [] orelse ReduceValues /= [] -> + FinalGroup = do_reduce(Tree, MapValues, ReduceValues), + UserAccFun({CurrentGroup, FinalGroup}, UserAcc1); + true -> + UserAcc1 + end. + + +%% @doc Finds all key-value pairs for the specified range in forward order. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @param AccFun A function that is called when a key-value pair is found, returning an accumulator. +%% @param Acc0 The initial accumulator +%% @returns the final accumulator +-spec range(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), + AccFun :: fun(), Acc0 :: term()) -> term(). +range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> + erlfdb:transactional(Db, fun(Tx) -> + range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) + end). + + +range(_Tx, #tree{}, #node{id = ?NODE_ROOT_ID, members = []}, _StartKey, _EndKey, _AccFun, Acc0) -> + Acc0; + +range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> + InRange = [{K, V} || {K, V} <- Node#node.members, + collate(Tree, StartKey, K, [lt, eq]), collate(Tree, K, EndKey, [lt, eq])], + Acc1 = AccFun(InRange, Acc0), + LastKey = last_key(Node), + case Node#node.next /= undefined andalso collate(Tree, LastKey, EndKey, [lt, eq]) of + true -> + range(Tx, Tree, get_node(Tx, Tree, Node#node.next), StartKey, EndKey, AccFun, Acc1); + false -> + Acc1 + end; + +range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> + ChildId = find_child_id(Tree, Node, StartKey), + range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). + + +%% @doc Finds all key-value pairs for the specified range in reverse order. +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param StartKey The beginning of the range +%% @param EndKey The end of the range +%% @param AccFun A function that is called when a key-value pair is found, returning an accumulator. +%% @param Acc0 The initial accumulator +%% @returns the final accumulator +-spec reverse_range(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), + AccFun :: fun(), Acc0 :: term()) -> term(). +reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> + erlfdb:transactional(Db, fun(Tx) -> + reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) + end). + + +reverse_range(_Tx, #tree{}, #node{id = ?NODE_ROOT_ID, members = []}, _StartKey, _EndKey, _AccFun, Acc0) -> + Acc0; + +reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> + InRange = [{K, V} || {K, V} <- Node#node.members, + collate(Tree, StartKey, K, [lt, eq]), collate(Tree, K, EndKey, [lt, eq])], + Acc1 = AccFun(lists:reverse(InRange), Acc0), + FirstKey = first_key(Node), + case Node#node.prev /= undefined andalso collate(Tree, StartKey, FirstKey, [lt, eq]) of + true -> + reverse_range(Tx, Tree, get_node(Tx, Tree, Node#node.prev), StartKey, EndKey, AccFun, Acc1); + false -> + Acc1 + end; + +reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> + ChildId = find_child_id(Tree, Node, EndKey), + reverse_range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). + + +%% @doc Inserts or updates a value in the ebtree +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param Key The key to store the value under. +%% @param Value The value to store. +%% @returns the tree. +-spec insert(Db :: term(), Tree :: #tree{}, Key :: term(), Value :: term()) -> #tree{}. +insert(_Db, #tree{} = _Tree, ?MIN, _Value) -> + erlang:error(min_not_allowed); + +insert(_Db, #tree{} = _Tree, ?MAX, _Value) -> + erlang:error(max_not_allowed); + +insert(Db, #tree{} = Tree, Key, Value) -> + erlfdb:transactional(Db, fun(Tx) -> + Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), + case ?is_full(Tree, Root0) of + true -> + OldRoot = Root0#node{id = new_node_id()}, + FirstKey = first_key(OldRoot), + LastKey = last_key(OldRoot), + Root1 = #node{ + id = ?NODE_ROOT_ID, + level = Root0#node.level + 1, + members = [{FirstKey, LastKey, OldRoot#node.id, []}]}, + {Root2, _, _} = split_child(Tx, Tree, Root1, OldRoot), + insert_nonfull(Tx, Tree, Root2, Key, Value); + false -> + insert_nonfull(Tx, Tree, Root0, Key, Value) + end + end), + Tree. + + +split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> + {LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members), + + LeftId = new_node_id(), + RightId = new_node_id(), + + LeftChild = remove_pointers_if_not_leaf(#node{ + id = LeftId, + level = Child#node.level, + prev = Child#node.prev, + next = RightId, + members = LeftMembers + }), + + RightChild = remove_pointers_if_not_leaf(#node{ + id = RightId, + level = Child#node.level, + prev = LeftId, + next = Child#node.next, + members = RightMembers + }), + + update_prev_neighbour(Tx, Tree, LeftChild), + update_next_neighbour(Tx, Tree, RightChild), + + %% adjust parent members + FirstLeftKey = first_key(LeftMembers), + LastLeftKey = last_key(LeftMembers), + FirstRightKey = first_key(RightMembers), + LastRightKey = last_key(RightMembers), + + %% adjust parent reductions + LeftReduction = reduce_node(Tree, LeftChild), + RightReduction = reduce_node(Tree, RightChild), + + Parent1 = Parent0#node{ + members = + umerge_members(Tree, Parent0#node.level, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], + umerge_members(Tree, Parent0#node.level, [{FirstRightKey, LastRightKey, RightId, RightReduction}], + lists:keydelete(Child#node.id, 3, Parent0#node.members))) + }, + Parent2 = new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1), + clear_node(Tx, Tree, Child), + set_nodes(Tx, Tree, [LeftChild, RightChild, Parent2]), + {Parent2, LeftChild, RightChild}. + + +update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) -> + ok; + +update_prev_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> + Left = get_node(Tx, Tree, Node#node.prev), + set_node(Tx, Tree, Left#node{next = Node#node.id}). + + +update_next_neighbour(_Tx, #tree{} = _Tree, #node{next = undefined} = _Node) -> + ok; + +update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> + Left = get_node(Tx, Tree, Node#node.next), + set_node(Tx, Tree, Left#node{prev = Node#node.id}). + + +insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> + Node1 = Node0#node{ + members = umerge_members(Tree, 0, [{Key, Value}], Node0#node.members) + }, + set_node(Tx, Tree, Node0, Node1), + {Node1#node.id, reduce_node(Tree, Node1)}; + +insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> + ChildId0 = find_child_id(Tree, Node0, Key), + Child0 = get_node(Tx, Tree, ChildId0), + {Node1, Child1} = case ?is_full(Tree, Child0) of + true -> + {Parent, LeftChild, RightChild} = split_child(Tx, Tree, Node0, Child0), + ChildId = find_child_id(Tree, Parent, Key), + Child = if + ChildId =:= LeftChild#node.id -> + LeftChild; + ChildId =:= RightChild#node.id -> + RightChild + end, + {Parent, Child}; + false -> + {Node0, Child0} + end, + ChildId1 = Child1#node.id, + {ChildId2, NewReduction} = insert_nonfull(Tx, Tree, Child1, Key, Value), + {CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members), + [NewFirstKey, _] = sort_keys(Tree, [Key, CurrentFirstKey]), + [_, NewLastKey] = sort_keys(Tree, [Key, CurrentLastKey]), + Node2 = Node1#node{ + members = lists:keyreplace(ChildId1, 3, Node1#node.members, + {NewFirstKey, NewLastKey, ChildId2, NewReduction}) + }, + Node3 = new_node_id_if_cacheable(Tx, Tree, Node0, Node2), + set_node(Tx, Tree, Node0, Node3), + {Node3#node.id, reduce_node(Tree, Node2)}. + + +%% @doc Inserts or updates multiple values in the ebtree +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param KeyValues A list of two-tuples representing the key/values to insert +%% @returns the tree. +-spec insert_multi(Db :: term(), Tree :: #tree{}, KeyValues :: [{term(), term()}]) -> #tree{}. +insert_multi(_Db, #tree{} = Tree, []) -> + Tree; + +insert_multi(Db, #tree{} = Tree, KeyValues) when is_list(KeyValues) -> + % Sort our KeyValues so that we can insert in order + SortedKeyValues = usort_members(Tree, 0, KeyValues), + erlfdb:transactional(Db, fun(Tx) -> + Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), + Members = insert_multi(Tx, Tree, Root0, SortedKeyValues), + Root1 = grow_tree(Tx, Tree, Root0#node{members = Members}), + set_node(Tx, Tree, Root1) + end), + Tree. + + +insert_multi(Tx, #tree{} = Tree, #node{level = L} = Node, KeyValues) when L > 0 -> + ChildKVsPairs = assign_kvs(Tree, Node#node.members, KeyValues), + NewMembers = lists:flatmap(fun({{_F, _L, P, _R} = Child, KVs}) -> + case KVs of + [] -> + [Child]; + _ -> + ChildNode = get_node(Tx, Tree, P), + insert_multi(Tx, Tree, ChildNode, KVs) + end + end, ChildKVsPairs), + split_node_multi(Tx, Tree, Node#node{members = NewMembers}); + +insert_multi(Tx, #tree{} = Tree, #node{level = 0} = Node, KeyValues) -> + NewMembers = umerge_members(Tree, 0, KeyValues, Node#node.members), + split_node_multi(Tx, Tree, Node#node{members = NewMembers}). + + +assign_kvs(_Tree, [Child], KeyValues) -> + [{Child, KeyValues}]; + +assign_kvs(Tree, [{_F, L, _P, _R} = Child | RestChildren], KeyValues) -> + {KVsInChild, RestKVs} = lists:splitwith(fun({Key, _}) -> + collate(Tree, Key, L, [lt, eq]) + end, KeyValues), + [{Child, KVsInChild} | assign_kvs(Tree, RestChildren, RestKVs)]. + + +split_node_multi(Tx, Tree, Node) -> + NumMembers = length(Node#node.members), + % Not =< so that we don't leave full nodes + % in the tree after update. + case NumMembers < Tree#tree.max of + true when Node#node.id == ?NODE_ROOT_ID -> + Node#node.members; + true -> + NewNode = case node_is_cacheable(Node) of + true -> + Node#node{id = new_node_id()}; + false -> + Node + end, + set_node(Tx, Tree, NewNode), + [to_member(Tree, NewNode)]; + false -> + clear_node(Tx, Tree, Node), + Nodes0 = create_nodes(Tx, Tree, Node), + Nodes1 = if Node#node.level > 0 -> Nodes0; true -> + Nodes2 = update_next_ptrs(Nodes0), + Nodes3 = update_prev_ptrs(Nodes2), + Nodes4 = set_first_prev_ptr(Tx, Tree, Node#node.prev, Nodes3), + set_last_next_ptr(Tx, Tree, Node#node.next, Nodes4) + end, + set_nodes(Tx, Tree, Nodes1), + [to_member(Tree, N) || N <- Nodes1] + end. + + +grow_tree(_Tx, _Tree, #node{level = 0, members = [{_, _} | _]} = Root) -> + Root; + +grow_tree(Tx, Tree, #node{level = 0, members = [{_, _, _, _} | _]} = Root) -> + grow_tree(Tx, Tree, Root#node{level = 1}); + +grow_tree(Tx, Tree, Root) -> + case length(Root#node.members) < Tree#tree.max of + true -> + Root; + false -> + NewMembers = split_node_multi(Tx, Tree, Root), + NewRoot = Root#node{ + level = Root#node.level + 1, + members = NewMembers + }, + grow_tree(Tx, Tree, NewRoot) + end. + + +to_member(Tree, Node) -> + FirstKey = first_key(Node#node.members), + LastKey = last_key(Node#node.members), + Reds = reduce_node(Tree, Node), + {FirstKey, LastKey, Node#node.id, Reds}. + + +create_nodes(Tx, #tree{} = Tree, Node) -> + case length(Node#node.members) >= Tree#tree.max of + true -> + {Members, Rest} = lists:split(Tree#tree.min, Node#node.members), + NewNode = #node{ + id = new_node_id(), + level = Node#node.level, + members = Members + }, + [NewNode | create_nodes(Tx, Tree, Node#node{members = Rest})]; + false -> + NewNode = #node{ + id = new_node_id(), + level = Node#node.level, + members = Node#node.members + }, + [NewNode] + end. + + +update_next_ptrs([_] = Nodes) -> + Nodes; + +update_next_ptrs([N1, N2 | Rest]) -> + [N1#node{next = N2#node.id} | update_next_ptrs([N2 | Rest])]. + + +update_prev_ptrs([_] = Nodes) -> + Nodes; + +update_prev_ptrs([N1, N2 | Rest]) -> + [N1 | update_prev_ptrs([N2#node{prev = N1#node.id} | Rest])]. + + +set_first_prev_ptr(Tx, Tree, Prev, [Node | Rest]) -> + NewNode = Node#node{prev = Prev}, + update_prev_neighbour(Tx, Tree, NewNode), + [NewNode | Rest]. + + +set_last_next_ptr(Tx, Tree, Next, [Node0]) -> + Node1 = Node0#node{next = Next}, + update_next_neighbour(Tx, Tree, Node1), + [Node1]; + +set_last_next_ptr(Tx, Tree, Next, [N | Rest]) -> + [N | set_last_next_ptr(Tx, Tree, Next, Rest)]. + + +%% @doc Deletes an entry from the ebtree +%% @param Db An erlfdb database or transaction. +%% @param Tree The ebtree. +%% @param Key The key of the entry to delete. +%% @returns the tree. +-spec delete(Db :: term(), Tree :: #tree{}, Key :: term()) -> #tree{}. +delete(Db, #tree{} = Tree, Key) -> + erlfdb:transactional(Db, fun(Tx) -> + Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), + case delete(Tx, Tree, Root0, Key) of + % if only one child, make it the new root. + #node{level = L, members = [_]} = Root1 when L > 0 -> + [{_, _, ChildId, _}] = Root1#node.members, + Root2 = get_node(Tx, Tree, ChildId), + clear_node(Tx, Tree, Root2), + set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); + Root1 -> + set_node(Tx, Tree, Root0, Root1) + end + end), + Tree. + + +delete(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> + Node#node{ + members = lists:keydelete(Key, 1, Node#node.members) + }; + +delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> + ChildId0 = find_child_id(Tree, Parent0, Key), + Child0 = get_node(Tx, Tree, ChildId0), + Child1 = delete(Tx, Tree, Child0, Key), + case ?underflow(Tree, Child1) of + true -> + SiblingId = find_sibling_id(Tree, Parent0, ChildId0, Key), + Sibling = get_node(Tx, Tree, SiblingId), + NewNodes = case ?at_min(Tree, Sibling) of + true -> + Merged = merge(Tree, Child1, Sibling), + update_prev_neighbour(Tx, Tree, Merged), + update_next_neighbour(Tx, Tree, Merged), + [Merged]; + false -> + {Left, Right} = rebalance(Tree, Child1, Sibling), + update_prev_neighbour(Tx, Tree, Left), + update_next_neighbour(Tx, Tree, Right), + [Left, Right] + end, + + %% remove old members and insert new members + Members0 = Parent0#node.members, + Members1 = lists:keydelete(ChildId0, 3, Members0), + Members2 = lists:keydelete(Sibling#node.id, 3, Members1), + Members3 = lists:foldl(fun(N, Acc) -> + umerge_members(Tree, Parent0#node.level, + [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) + end, Members2, NewNodes), + + Parent1 = Parent0#node{ + members = Members3 + }, + Parent2 = new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1), + clear_nodes(Tx, Tree, [Child0, Sibling]), + set_nodes(Tx, Tree, NewNodes), + Parent2; + false -> + set_node(Tx, Tree, Child0, Child1), + {_OldFirstKey, _OldLastKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 3, Parent0#node.members), + Parent1 = Parent0#node{ + members = lists:keyreplace(ChildId0, 3, Parent0#node.members, + {first_key(Child1), last_key(Child1), Child1#node.id, reduce_node(Tree, Child1)}) + }, + new_node_id_if_cacheable(Tx, Tree, Parent0, Parent1) + end. + + +merge(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> + [Left, Right] = sort_nodes(Tree, [Node1, Node2]), + + #node{ + id = new_node_id(), + level = Level, + prev = Left#node.prev, + next = Right#node.next, + members = lists:append(Left#node.members, Right#node.members) + }. + + +rebalance(#tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> + [Left0, Right0] = sort_nodes(Tree, [Node1, Node2]), + + Members = lists:append(Left0#node.members, Right0#node.members), + {LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), + + Left1Id = new_node_id(), + Right1Id = new_node_id(), + + Left1 = remove_pointers_if_not_leaf(Left0#node{ + id = Left1Id, + next = Right1Id, + members = LeftMembers + }), + Right1 = remove_pointers_if_not_leaf(Right0#node{ + id = Right1Id, + prev = Left1Id, + members = RightMembers + }), + {Left1, Right1}. + + +%% lookup functions + +find_child_id(#tree{} = Tree, #node{} = Node, Key) -> + element(3, find_child(Tree, Node, Key)). + + +find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> + Node1 = Node0#node{members = lists:keydelete(Id, 3, Node0#node.members)}, + find_child_id(Tree, Node1, Key). + + +find_child(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> + find_child_int(Tree, Node#node.members, Key). + + +find_child_int(#tree{} = _Tree, [Child], _Key) -> + Child; + +find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> + case collate(Tree, Key, L, [lt, eq]) of + true -> + Child; + false -> + find_child_int(Tree, Rest, Key) + end. + + +%% metadata functions + +get_meta(Tx, #tree{} = Tree, MetaKey) -> + #tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, + Key = meta_key(Prefix, MetaKey), + Future = erlfdb:get(Tx, Key), + case erlfdb:wait(Future) of + not_found -> + not_found; + Bin when is_binary(Bin) -> + EncodeFun(decode, Key, Bin) + end. + + +set_meta(Tx, #tree{} = Tree, MetaKey, MetaValue) -> + #tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, + Key = meta_key(Prefix, MetaKey), + erlfdb:set( + Tx, + Key, + EncodeFun(encode, Key, MetaValue) + ). + + +meta_key(Prefix, MetaKey) when is_binary(Prefix) -> + erlfdb_tuple:pack({?META, MetaKey}, Prefix). + +%% node persistence functions + +get_node(Tx, #tree{} = Tree, Id) -> + case cache(Tree, get, Id) of + undefined -> + Key = node_key(Tree#tree.prefix, Id), + Value = persist(Tree, Tx, get, Key), + Node = decode_node(Tree, Id, Key, Value), + cache(Tree, set, [Id, Node]), + Node; + #node{} = Node -> + Node + end. + +clear_nodes(Tx, #tree{} = Tree, Nodes) -> + lists:foreach(fun(Node) -> + clear_node(Tx, Tree, Node) + end, Nodes). + + +clear_node(Tx, #tree{} = Tree, #node{} = Node) -> + Key = node_key(Tree#tree.prefix, Node#node.id), + cache(Tree, clear, Node#node.id), + persist(Tree, Tx, clear, Key). + + +set_nodes(Tx, #tree{} = Tree, Nodes) -> + lists:foreach(fun(Node) -> + set_node(Tx, Tree, Node) + end, Nodes). + + +set_node(_Tx, #tree{} = _Tree, #node{} = Same, #node{} = Same) -> + ok; + +set_node(Tx, #tree{} = Tree, #node{} = _From, #node{} = To) -> + set_node(Tx, Tree, To). + + +set_node(Tx, #tree{} = Tree, #node{} = Node) -> + ?validate_node(Tree, Node), + Key = node_key(Tree#tree.prefix, Node#node.id), + Value = encode_node(Tree, Key, Node), + cache(Tree, set, [Node#node.id, Node]), + persist(Tree, Tx, set, [Key, Value]). + + +node_key(Prefix, Id) when is_binary(Prefix), is_binary(Id) -> + erlfdb_tuple:pack({?NODE, Id}, Prefix). + + +%% @doc Walks the whole tree and checks it for consistency. +%% It also prints it to screen. +validate_tree(Db, #tree{} = Tree) -> + erlfdb:transactional(Db, fun(Tx) -> + Root = get_node(Db, Tree, ?NODE_ROOT_ID), + validate_tree(Tx, Tree, Root) + end). + + +validate_tree(_Tx, #tree{} = Tree, #node{level = 0} = Node) -> + print_node(Node), + validate_node(Tree, Node); + +validate_tree(Tx, #tree{} = Tree, #node{} = Node) -> + print_node(Node), + validate_node(Tree, Node), + validate_tree(Tx, Tree, Node#node.members); + +validate_tree(_Tx, #tree{} = _Tree, []) -> + ok; + +validate_tree(Tx, #tree{} = Tree, [{_F, _L, P, _R} | Rest]) -> + Node = get_node(Tx, Tree, P), + validate_tree(Tx, Tree, Node), + validate_tree(Tx, Tree, Rest). + + +validate_node(#tree{} = Tree, #node{} = Node) -> + NumKeys = length(Node#node.members), + IsLeaf = Node#node.level =:= 0, + IsRoot = ?NODE_ROOT_ID == Node#node.id, + OutOfOrder = Node#node.members /= sort_members(Tree, Node#node.level, Node#node.members), + Duplicates = Node#node.members /= usort_members(Tree, Node#node.level, Node#node.members), + if + Node#node.id == undefined -> + erlang:error({node_without_id, Node}); + not IsRoot andalso NumKeys < Tree#tree.min -> + erlang:error({too_few_keys, Node}); + NumKeys > Tree#tree.max -> + erlang:error({too_many_keys, Node}); + not IsLeaf andalso Node#node.prev /= undefined -> + erlang:error({non_leaf_with_prev, Node}); + not IsLeaf andalso Node#node.next /= undefined -> + erlang:error({non_leaf_with_next, Node}); + OutOfOrder -> + erlang:error({out_of_order, Node}); + Duplicates -> + erlang:error({duplicates, Node}); + true -> + ok + end. + + +%% data marshalling functions (encodes unnecesary fields as a NIL_REF) + +encode_node(#tree{} = Tree, Key, #node{prev = undefined} = Node) -> + encode_node(Tree, Key, Node#node{prev = []}); + +encode_node(#tree{} = Tree, Key, #node{next = undefined} = Node) -> + encode_node(Tree, Key, Node#node{next = []}); + +encode_node(#tree{} = Tree, Key, #node{} = Node) -> + #tree{encode_fun = EncodeFun} = Tree, + EncodeFun(encode, Key, Node#node{id = []}). + + +decode_node(#tree{} = Tree, Id, Key, Value) when is_binary(Value) -> + #tree{encode_fun = EncodeFun} = Tree, + Term = EncodeFun(decode, Key, Value), + decode_node(Id, Term). + + +decode_node(Id, #node{prev = []} = Node) -> + decode_node(Id, Node#node{prev = undefined}); + +decode_node(Id, #node{next = []} = Node) -> + decode_node(Id, Node#node{next = undefined}); + +decode_node(Id, #node{} = Node) -> + Node#node{id = Id}. + +%% built-in reduce functions. + +reduce_noop(_KVs, _Rereduce) -> + []. + + +reduce_node(#tree{} = Tree, #node{level = 0} = Node) -> + reduce_values(Tree, Node#node.members, false); + +reduce_node(#tree{} = Tree, #node{} = Node) -> + Rs = [R || {_F, _L, _P, R} <- Node#node.members], + reduce_values(Tree, Rs, true). + + +reduce_values(#tree{} = Tree, Values, Rereduce) when is_list(Values) -> + #tree{reduce_fun = ReduceFun} = Tree, + ReduceFun(Values, Rereduce). + + +%% collation functions + + +collate(#tree{} = _Tree, ?MIN, _B) -> + lt; + +collate(#tree{} = _Tree, _A, ?MIN) -> + gt; + +collate(#tree{} = _Tree, ?MAX, _B) -> + gt; + +collate(#tree{} = _Tree, _A, ?MAX) -> + lt; + +collate(#tree{} = Tree, A, B) -> + #tree{collate_fun = CollateFun} = Tree, + case CollateFun(A, B) of + lt -> lt; + eq -> eq; + gt -> gt; + _ -> error(invalid_collation_result) + end. + + +collate(#tree{} = Tree, A, B, Allowed) -> + lists:member(collate(Tree, A, B), Allowed). + + +umerge_members(#tree{} = Tree, Level, List1, List2) -> + Collate = fun + ({K1, _V1}, {K2, _V2}) when Level == 0 -> + collate(Tree, K1, K2); + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> + collate(Tree, L1, L2) + end, + umerge_members_int(Collate, List1, List2, []). + + +umerge_members_int(Collate, [], [H2 | T2], [HAcc | _] = Acc) -> + case Collate(H2, HAcc) of + lt -> erlang:error(unsorted_members); + eq -> lists:reverse(Acc, T2); + gt -> lists:reverse(Acc, [H2 | T2]) + end; +umerge_members_int(_Collate, List1, [], Acc) -> + lists:reverse(Acc, List1); +umerge_members_int(Collate, [H1 | T1], [H2 | T2], Acc) -> + case Collate(H1, H2) of + lt -> umerge_members_int(Collate, T1, [H2 | T2], [H1 | Acc]); + eq -> umerge_members_int(Collate, T1, T2, [H1 | Acc]); + gt -> umerge_members_int(Collate, [H1 | T1], T2, [H2 | Acc]) + end. + + +sort_keys(#tree{} = Tree, List) -> + CollateWrapper = fun + (K1, K2) -> + collate(Tree, K1, K2, [lt, eq]) + end, + lists:sort(CollateWrapper, List). + + +sort_nodes(#tree{} = Tree, List) -> + CollateWrapper = fun + (#node{} = N1, #node{} = N2) -> + collate(Tree, first_key(N1), first_key(N2), [lt, eq]) + end, + lists:sort(CollateWrapper, List). + + +sort_members(#tree{} = Tree, Level, List) -> + CollateWrapper = fun + ({K1, _V1}, {K2, _V2}) when Level == 0 -> + collate(Tree, K1, K2, [lt, eq]); + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> + collate(Tree, L1, L2, [lt, eq]) + end, + lists:sort(CollateWrapper, List). + + +usort_members(#tree{} = Tree, Level, List) -> + CollateWrapper = fun + ({K1, _V1}, {K2, _V2}) when Level == 0 -> + collate(Tree, K1, K2, [lt, eq]); + ({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) when Level > 0 -> + collate(Tree, L1, L2, [lt, eq]) + end, + lists:usort(CollateWrapper, List). + + +collate_raw(A, B) when A < B -> + lt; + +collate_raw(A, B) when A > B -> + gt; + +collate_raw(A, A) -> + eq. + + +%% encoding function + +encode_erlang(encode, _Key, Value) -> + term_to_binary(Value, [{minor_version, 2}]); + + +encode_erlang(decode, _Key, Value) -> + binary_to_term(Value, [safe]). + +%% persist function + +persist(#tree{} = Tree, Tx, Action, Args) -> + #tree{persist_fun = PersistFun} = Tree, + PersistFun(Tx, Action, Args). + + +simple_persist(Tx, set, [Key, Value]) -> + erlfdb:set(Tx, Key, Value); + +simple_persist(Tx, get, Key) -> + erlfdb:wait(erlfdb:get(Tx, Key)); + +simple_persist(Tx, clear, Key) -> + erlfdb:clear(Tx, Key). + + +%% cache functions + +cache_noop(set, _) -> + ok; +cache_noop(clear, _) -> + ok; +cache_noop(get, _) -> + undefined. + + +cache(#tree{} = Tree, set, [Id, #node{} = Node]) -> + #tree{cache_fun = CacheFun} = Tree, + case node_is_cacheable(Node) of + true -> + CacheFun(set, [Id, Node]); + false -> + ok + end; + +cache(#tree{} = Tree, clear, Id) -> + #tree{cache_fun = CacheFun} = Tree, + CacheFun(clear, Id); + +cache(#tree{} = _Tree, get, ?NODE_ROOT_ID) -> + undefined; + +cache(#tree{} = Tree, get, Id) -> + #tree{cache_fun = CacheFun} = Tree, + CacheFun(get, Id). + + +%% private functions + +init_order(#tree{} = Tree, Order) + when is_integer(Order), Order > 2, Order rem 2 == 0 -> + Tree#tree{ + min = Order div 2, + max = Order + }. + + +first_key(#node{} = Node) -> + first_key(Node#node.members); + +first_key(Members) when is_list(Members) -> + element(1, hd(Members)). + + +last_key(#node{} = Node) -> + last_key(Node#node.members); + +last_key(Members) when is_list(Members) -> + case lists:last(Members) of + {K, _V} -> + K; + {_F, L, _P, _R} -> + L + end. + + +new_node_id_if_cacheable(Tx, #tree{} = Tree, #node{} = Old, #node{} = New) -> + MembersChanged = Old#node.members /= New#node.members, + NodeIsCacheable = node_is_cacheable(New), + if + MembersChanged andalso NodeIsCacheable -> + clear_node(Tx, Tree, New), + New#node{id = new_node_id()}; + true -> + New + end. + + +node_is_cacheable(#node{id = ?NODE_ROOT_ID}) -> + false; + +node_is_cacheable(#node{level = 0}) -> + false; + +node_is_cacheable(#node{}) -> + true. + + +new_node_id() -> + crypto:strong_rand_bytes(16). + + +%% remove prev/next pointers for nonleaf nodes +remove_pointers_if_not_leaf(#node{level = 0} = Node) -> + Node; + +remove_pointers_if_not_leaf(#node{} = Node) -> + Node#node{prev = undefined, next = undefined}. + + + +print_node(#node{level = 0} = Node) -> + io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~w}~n~n", + [b64(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), + Node#node.members]); + +print_node(#node{} = Node) -> + io:format("#node{id = ~s, level = ~w, prev = ~s, next = ~s, members = ~s}~n~n", + [base64:encode(Node#node.id), Node#node.level, b64(Node#node.prev), b64(Node#node.next), + [io_lib:format("{~w, ~w, ~s, ~w}, ", [F, L, b64(P), R]) || {F, L, P, R} <- Node#node.members]]). + + +b64(undefined) -> + undefined; + +b64(Bin) -> + base64:encode(Bin). + +%% tests + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +reduce_sum(KVs, false) -> + {_, Vs} = lists:unzip(KVs), + lists:sum(Vs); + +reduce_sum(Rs, true) -> + lists:sum(Rs). + + +reduce_count(KVs, false) -> + length(KVs); + +reduce_count(Rs, true) -> + lists:sum(Rs). + + +reduce_stats(KVs, false) -> + {_, Vs} = lists:unzip(KVs), + { + lists:sum(Vs), + lists:min(Vs), + lists:max(Vs), + length(Vs), + lists:sum([V * V || V <- Vs]) + }; + +reduce_stats(Rs, true) -> + lists:foldl( + fun({Sum, Min, Max, Count, SumSqr}, + {SumAcc, MinAcc, MaxAcc, CountAcc, SumSqrAcc}) -> + { + Sum + SumAcc, + erlang:min(Min, MinAcc), + erlang:max(Max, MaxAcc), + Count + CountAcc, + SumSqr + SumSqrAcc + } end, hd(Rs), tl(Rs)). + + +collation_fun_test_() -> + Tree = #tree{collate_fun = fun collate_raw/2}, + [ + ?_test(?assertEqual(gt, collate(Tree, 4, 3))), + ?_test(?assertEqual(lt, collate(Tree, 3, 4))), + ?_test(?assertEqual(eq, collate(Tree, 3, 3))) + ]. + + +collate_validation_test() -> + Tree = #tree{collate_fun = fun(_A, _B) -> foo end}, + ?assertError(invalid_collation_result, collate(Tree, 1, 2)). + + +order_is_preserved_test() -> + Db = erlfdb_util:get_test_db([empty]), + open(Db, <<1,2,3>>, 4), + Tree = open(Db, <<1,2,3>>, 8), + ?assertEqual(4, Tree#tree.max). + + +min_not_allowed_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + ?assertError(min_not_allowed, ebtree:insert(Db, Tree, ebtree:min(), foo)). + + +max_not_allowed_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + ?assertError(max_not_allowed, ebtree:insert(Db, Tree, ebtree:max(), foo)). + + +lookup_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), + ?assertEqual(false, lookup(Db, Tree, 101)). + + +lookup_multi_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + validate_tree(Db, Tree), + ?assertEqual([{1, 2}], lookup_multi(Db, Tree, [1])), + ?assertEqual([{15, 16}, {2, 3}], lookup_multi(Db, Tree, [2, 15])), + ?assertEqual([{15, 16}, {4, 5}, {2, 3}], lookup_multi(Db, Tree, [2, 101, 15, 4, -3])). + + +insert_multi_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1, 2, 3>>, 4), + AllKVs = lists:foldl(fun(_Seq, Acc) -> + KVs = [{rand:uniform(), rand:uniform()} || _ <- lists:seq(1, 16)], + insert_multi(Db, Tree, KVs), + KVs ++ Acc + end, [], lists:seq(1, 16)), + lists:foreach(fun({K, V}) -> + ?assertEqual({K, V}, lookup(Db, Tree, K)) + end, AllKVs), + validate_tree(Db, Tree). + + +delete_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual(false, lookup(Db, Tree, Key)) end, Keys). + + +range_after_delete_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 16)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), + lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, lists:seq(1, 16, 2)), + ?assertEqual(8, range(Db, Tree, 1, 16, fun(E, A) -> length(E) + A end, 0)), + ?assertEqual(8, reverse_range(Db, Tree, 1, 16, fun(E, A) -> length(E) + A end, 0)). + + +full_reduce_empty_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), + ?assertEqual(0, full_reduce(Db, Tree)). + + +full_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), + TestFun = fun(Max) -> + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)) + end, + [ + ?_test(TestFun(4)), + ?_test(TestFun(8)) + ]. + + +full_reduce_after_delete_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), + Max = 16, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + ?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)), + lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), + ?assertEqual(0, full_reduce(Db, Tree)). + + +count_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_count/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + Expected = fun(S, E) -> E - S + 1 end, + [ + ?_test(?assertEqual(Expected(1, 5), reduce(Db, Tree, 1, 5))), + ?_test(?assertEqual(Expected(50, 60), reduce(Db, Tree, 50, 60))), + ?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), + ?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), + ?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), + ?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))), + ?_test(?assertEqual(Expected(6, 7), reduce(Db, Tree, 5, 7, + [{inclusive_start, false}]))), + ?_test(?assertEqual(Expected(5, 6), reduce(Db, Tree, 5, 7, + [{inclusive_end, false}]))), + ?_test(?assertEqual(Expected(6, 6), reduce(Db, Tree, 5, 7, + [{inclusive_start, false}, {inclusive_end, false}]))) + ]. + +sum_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + Expected = fun(S, E) -> lists:sum(lists:seq(S, E)) end, + [ + ?_test(?assertEqual(Expected(1, 5), reduce(Db, Tree, 1, 5))), + ?_test(?assertEqual(Expected(50, 60), reduce(Db, Tree, 50, 60))), + ?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), + ?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), + ?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), + ?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))) + ]. + + +stats_reduce_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_stats/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + [ + ?_test(?assertEqual({15,1,5,5,55}, reduce(Db, Tree, 1, 5))), + ?_test(?assertEqual({605,50,60,11,33385}, reduce(Db, Tree, 50, 60))), + ?_test(?assertEqual({3276,21,83,63,191184}, reduce(Db, Tree, 21, 83))), + ?_test(?assertEqual({1,1,1,1,1}, reduce(Db, Tree, 1, 1))), + ?_test(?assertEqual({5050,1,100,100,338350}, reduce(Db, Tree, 0, 200))), + ?_test(?assertEqual({18,5,7,3,110}, reduce(Db, Tree, 5, 7))) + ]. + + +group_reduce_level_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + GroupKeyFun = fun(Key) -> lists:sublist(Key, 2) end, + UserAccFun = fun({K,V}, Acc) -> Acc ++ [{K, V}] end, + lists:foreach(fun(Key) -> insert(Db, Tree, [Key rem 4, Key rem 3, Key], Key) end, Keys), + [ + ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], + group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, []))), + + ?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], + group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, [], [{dir, fwd}]))), + + ?_test(?assertEqual([{[1, 2], 376}, {[1, 1], 441}, {[1, 0], 408}], + group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, [], [{dir, rev}]))), + + ?_test(?assertEqual([{[0,0],432}, {[0,1],468}, {[0,2],400}, {[1,0],408}, {[1,1],441}, {[1,2],376}, + {[2,0],384}, {[2,1],416}, {[2,2],450}, {[3,0],459}, {[3,1],392}, {[3,2],424}], + group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))) + ]. + + +group_reduce_int_test_() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_count/2}]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + GroupKeyFun = fun(_Key) -> null end, + UserAccFun = fun({K,V}, Acc) -> Acc ++ [{K, V}] end, + lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), + [ + ?_test(?assertEqual([{null, 100}], group_reduce(Db, Tree, + ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{null, 99}], group_reduce(Db, Tree, 2, ebtree:max(), GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{null, 96}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, []))), + ?_test(?assertEqual([{null, 95}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, [], [{inclusive_start, false}]))), + ?_test(?assertEqual([{null, 95}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, [], [{inclusive_end, false}]))), + ?_test(?assertEqual([{null, 94}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, [], + [{inclusive_start, false}, {inclusive_end, false}]))) + ]. + + +raw_collation_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + insert(Db, Tree, null, null), + insert(Db, Tree, 1, 1), + ?assertEqual([{1, 1}, {null, null}], range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). + + +custom_collation_test() -> + Db = erlfdb_util:get_test_db([empty]), + CollateFun = fun(A, B) -> collate_raw(B, A) end, + Tree = open(Db, <<1,2,3>>, 4, [{collate_fun, CollateFun}]), + insert(Db, Tree, 1, 1), + insert(Db, Tree, 2, 2), + ?assertEqual([{2, 2}, {1, 1}], range(Db, Tree, 3, 0, fun(E, A) -> A ++ E end, [])). + + +empty_range_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1, 2, 3>>, 10), + ?assertEqual( + blah, + range(Db, Tree, min(), max(), fun(_, A) -> A end, blah) + ). + + +range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), + ?assertEqual([{K, K + 1} || K <- lists:seq(StartKey, EndKey)], + range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) + ) end, + lists:seq(1, 100)) + end}. + + +empty_reverse_range_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1, 2, 3>>, 10), + ?assertEqual( + blah, + reverse_range(Db, Tree, min(), max(), fun(_, A) -> A end, blah) + ). + + +reverse_range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 8), Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), + ?assertEqual([{K, K + 1} || K <- lists:seq(EndKey, StartKey, -1)], + reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) + ) end, + lists:seq(1, 100)) + end}. + + +custom_collation_range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + CollateFun = fun(A, B) -> collate_raw(B, A) end, + Tree = open(Db, <<1,2,3>>, 6, [{collate_fun, CollateFun}]), + lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = sort_keys(Tree, [rand:uniform(Max), rand:uniform(Max)]), + Seq = if + StartKey < EndKey -> + lists:seq(StartKey, EndKey); + true -> + lists:seq(StartKey, EndKey, -1) + end, + ?assertEqual([{K, K + 1} || K <- Seq], + range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) + ) end, + lists:seq(1, 100)) + end}. + + +custom_collation_reverse_range_test_() -> + {timeout, 1000, fun() -> + Db = erlfdb_util:get_test_db([empty]), + Max = 100, + Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], + CollateFun = fun(A, B) -> collate_raw(B, A) end, + Tree = open(Db, <<1,2,3>>, 6, [{collate_fun, CollateFun}]), + lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), + lists:foreach( + fun(_) -> + [StartKey, EndKey] = sort_keys(Tree, [rand:uniform(Max), rand:uniform(Max)]), + Seq = if + StartKey < EndKey -> + lists:seq(StartKey, EndKey); + true -> + lists:seq(StartKey, EndKey, -1) + end, + ?assertEqual([{K, K + 1} || K <- lists:reverse(Seq)], + reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) + ) end, + lists:seq(1, 100)) + end}. + + +validate_tree_test() -> + Db = erlfdb_util:get_test_db([empty]), + Tree = open(Db, <<1,2,3>>, 4), + [ebtree:insert(Db, Tree, I, I) || I <- lists:seq(1, 16)], + validate_tree(Db, Tree). + + +validate_node_test_() -> + [ + ?_test(?assertError({node_without_id, _}, validate_node( + #tree{}, #node{id = undefined}))), + ?_test(?assertError({too_few_keys, _}, validate_node( + #tree{collate_fun = fun collate_raw/2, min = 2}, + #node{id = 1, members = [{1, 1}]}))), + ?_test(?assertError({too_many_keys, _}, validate_node( + #tree{collate_fun = fun collate_raw/2, min = 2, max = 2}, + #node{id = 1, members = [{1, 1}, {2, 2}, {3, 3}]}))), + ?_test(?assertError({non_leaf_with_prev, _}, validate_node( + #tree{min = 0}, #node{id = 1, level = 1, prev = 1}))), + ?_test(?assertError({non_leaf_with_next, _}, validate_node( + #tree{min = 0}, #node{id = 1, level = 1, next = 1}))), + ?_test(?assertError({out_of_order, _}, validate_node( + #tree{min = 0, collate_fun = fun collate_raw/2}, + #node{id = 1, members = [{2, 2}, {1, 1}]}))), + ?_test(?assertError({duplicates, _}, validate_node( + #tree{min = 0, collate_fun = fun collate_raw/2}, + #node{id = 1, members = [{1, 1}, {1, 1}]}))) + ]. + + +cache_test_() -> + {spawn, [fun() -> + Db = erlfdb_util:get_test_db([empty]), + CacheFun = fun + (set, [Id, Node]) -> + erlang:put(Id, Node); + (clear, Id) -> + erlang:erase(Id); + (get, Id) -> + erlang:get(Id) + end, + Tree = open(Db, <<1,2,3>>, 4, [{cache_fun, CacheFun}]), + [ebtree:insert(Db, Tree, I, I) || I <- lists:seq(1, 16)], + ?assertEqual({1, 1}, ebtree:lookup(Db, Tree, 1)), + NodeCache = [V || {_K, V} <- erlang:get(), is_record(V, node)], + ?assertEqual(3, length(NodeCache)) + end]}. + + +umerge_members_test() -> + Tree = #tree{collate_fun = fun collate_raw/2}, + NewList = fun() -> + Raw = [{rand:uniform(100), rand:uniform()} || _ <- lists:seq(1, 100)], + lists:ukeysort(1, Raw) + end, + lists:foreach(fun(_) -> + A = NewList(), + B = NewList(), + Stdlib = lists:ukeymerge(1, A, B), + Custom = umerge_members(Tree, 0, A, B), + ?assertEqual(Stdlib, Custom) + end, lists:seq(1, 100)). + + +-endif. diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl new file mode 100644 index 000000000..ebbb7c7c5 --- /dev/null +++ b/src/fabric/include/fabric2.hrl @@ -0,0 +1,88 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-define(uint2bin(I), binary:encode_unsigned(I, little)). +-define(bin2uint(I), binary:decode_unsigned(I, little)). +-define(bin2int(V), binary_to_integer(V)). +-define(METADATA_VERSION_KEY, <<16#FF, "/metadataVersion">>). + +% Prefix Definitions + +% Layer Level: (LayerPrefix, X, ...) + +-define(CLUSTER_CONFIG, 0). +-define(ALL_DBS, 1). +-define(DB_HCA, 2). +-define(DELETED_DBS, 3). +-define(DBS, 15). +-define(EXPIRING_CACHE, 53). +-define(REPLICATION_IDS, 54). +-define(TX_IDS, 255). + +% Cluster Level: (LayerPrefix, ?CLUSTER_CONFIG, X, ...) + +-define(AEGIS, 0). + +% Database Level: (LayerPrefix, ?DBS, DbPrefix, X, ...) + +-define(DB_VERSION, 0). +-define(DB_CONFIG, 16). +-define(DB_STATS, 17). +-define(DB_ALL_DOCS, 18). +-define(DB_CHANGES, 19). +-define(DB_REVS, 20). +-define(DB_DOCS, 21). +-define(DB_LOCAL_DOCS, 22). +-define(DB_ATTS, 23). +-define(DB_VIEWS, 24). +-define(DB_LOCAL_DOC_BODIES, 25). +-define(DB_ATT_NAMES, 26). +-define(DB_SEARCH, 27). +-define(DB_AEGIS, 28). + + +% Versions + +% 0 - Initial implementation +% 1 - Added attachment hash +% 2 - Added size information + +-define(CURR_REV_FORMAT, 2). + +% 0 - Adding local doc versions + +-define(CURR_LDOC_FORMAT, 0). + +% 0 - Attachment storage version + +-define(CURR_ATT_STORAGE_VER, 0). + +% Misc constants + +-define(PDICT_DB_KEY, '$fabric_db_handle'). +-define(PDICT_LAYER_CACHE, '$fabric_layer_id'). +-define(PDICT_CHECKED_DB_IS_CURRENT, '$fabric_checked_db_is_current'). +-define(PDICT_CHECKED_MD_IS_CURRENT, '$fabric_checked_md_is_current'). +-define(PDICT_TX_ID_KEY, '$fabric_tx_id'). +-define(PDICT_TX_RES_KEY, '$fabric_tx_result'). +-define(PDICT_FOLD_ACC_STATE, '$fabric_fold_acc_state'). + +% Let's keep these in ascending order +-define(TRANSACTION_TOO_OLD, 1007). +-define(FUTURE_VERSION, 1009). +-define(COMMIT_UNKNOWN_RESULT, 1021). +-define(TRANSACTION_CANCELLED, 1025). +-define(TRANSACTION_TOO_LARGE, 2101). + + +-define(DEFAULT_BINARY_CHUNK_SIZE, 100000). diff --git a/src/fabric/src/fabric.app.src b/src/fabric/src/fabric.app.src index d7686ca1a..a7059fd10 100644 --- a/src/fabric/src/fabric.app.src +++ b/src/fabric/src/fabric.app.src @@ -13,15 +13,22 @@ {application, fabric, [ {description, "Routing and proxying layer for CouchDB cluster"}, {vsn, git}, - {registered, []}, + {mod, {fabric2_app, []}}, + {registered, [ + fabric_server + ]}, {applications, [ kernel, stdlib, config, + couch_epi, couch, + ctrace, rexi, mem3, couch_log, - couch_stats + couch_stats, + erlfdb, + aegis ]} ]}. diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 27fa8c045..bb538e2db 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -668,53 +668,53 @@ set_namespace(NS, #mrargs{extra = Extra} = Args) -> get_view_sig_from_filename(FilePath) -> filename:basename(filename:basename(FilePath, ".view"), ".compact"). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -update_doc_test_() -> - { - "Update doc tests", { - setup, fun setup/0, fun teardown/1, - fun(Ctx) -> [ - should_throw_conflict(Ctx) - ] end - } - }. - -should_throw_conflict(Doc) -> - ?_test(begin - ?assertThrow(conflict, update_doc(<<"test-db">>, Doc, [])) - end). - - -setup() -> - Doc = #doc{ - id = <<"test_doc">>, - revs = {3, [<<5,68,252,180,43,161,216,223,26,119,71,219,212,229, - 159,113>>]}, - body = {[{<<"foo">>,<<"asdf">>},{<<"author">>,<<"tom">>}]}, - atts = [], deleted = false, meta = [] - }, - ok = application:ensure_started(config), - ok = meck:expect(mem3, shards, fun(_, _) -> [] end), - ok = meck:expect(mem3, quorum, fun(_) -> 1 end), - ok = meck:expect(rexi, cast, fun(_, _) -> ok end), - ok = meck:expect(rexi_utils, recv, - fun(_, _, _, _, _, _) -> - {ok, {error, [{Doc, conflict}]}} - end), - ok = meck:expect(couch_util, reorder_results, - fun(_, [{_, Res}]) -> - [Res] - end), - ok = meck:expect(fabric_util, create_monitors, fun(_) -> ok end), - ok = meck:expect(rexi_monitor, stop, fun(_) -> ok end), - Doc. - - -teardown(_) -> - meck:unload(), - ok = application:stop(config). - - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% update_doc_test_() -> +%% { +%% "Update doc tests", { +%% setup, fun setup/0, fun teardown/1, +%% fun(Ctx) -> [ +%% should_throw_conflict(Ctx) +%% ] end +%% } +%% }. +%% +%% should_throw_conflict(Doc) -> +%% ?_test(begin +%% ?assertThrow(conflict, update_doc(<<"test-db">>, Doc, [])) +%% end). +%% +%% +%% setup() -> +%% Doc = #doc{ +%% id = <<"test_doc">>, +%% revs = {3, [<<5,68,252,180,43,161,216,223,26,119,71,219,212,229, +%% 159,113>>]}, +%% body = {[{<<"foo">>,<<"asdf">>},{<<"author">>,<<"tom">>}]}, +%% atts = [], deleted = false, meta = [] +%% }, +%% ok = application:ensure_started(config), +%% ok = meck:expect(mem3, shards, fun(_, _) -> [] end), +%% ok = meck:expect(mem3, quorum, fun(_) -> 1 end), +%% ok = meck:expect(rexi, cast, fun(_, _) -> ok end), +%% ok = meck:expect(rexi_utils, recv, +%% fun(_, _, _, _, _, _) -> +%% {ok, {error, [{Doc, conflict}]}} +%% end), +%% ok = meck:expect(couch_util, reorder_results, +%% fun(_, [{_, Res}]) -> +%% [Res] +%% end), +%% ok = meck:expect(fabric_util, create_monitors, fun(_) -> ok end), +%% ok = meck:expect(rexi_monitor, stop, fun(_) -> ok end), +%% Doc. +%% +%% +%% teardown(_) -> +%% meck:unload(), +%% ok = application:stop(config). +%% +%% +%% -endif. diff --git a/src/fabric/src/fabric2_active_tasks.erl b/src/fabric/src/fabric2_active_tasks.erl new file mode 100644 index 000000000..e706ebaa4 --- /dev/null +++ b/src/fabric/src/fabric2_active_tasks.erl @@ -0,0 +1,52 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(fabric2_active_tasks). + + +-export([ + get_active_tasks/0, + get_active_task_info/1, + + update_active_task_info/2 +]). + + +-define(ACTIVE_TASK_INFO, <<"active_task_info">>). + + +get_active_tasks() -> + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(undefined), fun(JTx) -> + Types = couch_jobs:get_types(JTx), + lists:foldl(fun(Type, TaskAcc) -> + JobIds = couch_jobs:get_active_jobs_ids(JTx, Type), + Tasks = lists:filtermap(fun(JobId) -> + {ok, Data} = couch_jobs:get_job_data(JTx, Type, JobId), + case maps:get(?ACTIVE_TASK_INFO, Data, not_found) of + not_found -> false; + #{} = Map when map_size(Map) == 0 -> false; + #{} = Info -> {true, Info} + end + end, JobIds), + TaskAcc ++ Tasks + end, [], Types) + end). + + +get_active_task_info(JobData) -> + #{?ACTIVE_TASK_INFO:= ActiveTaskInfo} = JobData, + ActiveTaskInfo. + + +update_active_task_info(JobData, ActiveTaskInfo) -> + JobData#{?ACTIVE_TASK_INFO => ActiveTaskInfo}. diff --git a/src/fabric/src/fabric2_app.erl b/src/fabric/src/fabric2_app.erl new file mode 100644 index 000000000..da95acb53 --- /dev/null +++ b/src/fabric/src/fabric2_app.erl @@ -0,0 +1,32 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_app). +-behaviour(application). + + +-export([ + start/2, + stop/1 +]). + + +start(_Type, StartArgs) -> + fabric2_sup:start_link(StartArgs). + + +stop(_State) -> + case application:get_env(erlfdb, test_cluster_pid) of + {ok, Pid} -> Pid ! close; + _ -> ok + end, + ok. diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl new file mode 100644 index 000000000..b3e510b2e --- /dev/null +++ b/src/fabric/src/fabric2_db.erl @@ -0,0 +1,2363 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db). + + +-export([ + create/2, + open/2, + delete/2, + undelete/4, + + list_dbs/0, + list_dbs/1, + list_dbs/3, + + list_dbs_info/0, + list_dbs_info/1, + list_dbs_info/3, + + list_deleted_dbs_info/0, + list_deleted_dbs_info/1, + list_deleted_dbs_info/3, + + check_is_admin/1, + check_is_member/1, + + name/1, + get_after_doc_read_fun/1, + get_before_doc_update_fun/1, + get_committed_update_seq/1, + get_compacted_seq/1, + get_compactor_pid/1, + get_db_info/1, + %% get_partition_info/2, + get_del_doc_count/1, + get_doc_count/1, + get_doc_count/2, + %% get_epochs/1, + %% get_filepath/1, + get_instance_start_time/1, + get_pid/1, + get_revs_limit/1, + get_revs_limit/2, + get_security/1, + get_security/2, + get_update_seq/1, + get_user_ctx/1, + get_uuid/1, + %% get_purge_seq/1, + %% get_oldest_purge_seq/1, + %% get_purge_infos_limit/1, + + is_clustered/1, + is_db/1, + is_partitioned/1, + is_system_db/1, + is_system_db_name/1, + is_replicator_db/1, + is_users_db/1, + + set_revs_limit/2, + %% set_purge_infos_limit/2, + set_security/2, + set_user_ctx/2, + + ensure_full_commit/1, + ensure_full_commit/2, + + %% load_validation_funs/1, + %% reload_validation_funs/1, + + open_doc/2, + open_doc/3, + open_doc_revs/4, + %% open_doc_int/3, + get_doc_info/2, + get_full_doc_info/2, + get_full_doc_infos/2, + get_missing_revs/2, + get_design_docs/1, + %% get_purge_infos/2, + + %% get_minimum_purge_seq/1, + %% purge_client_exists/3, + + validate_docid/1, + %% doc_from_json_obj_validate/2, + + update_doc/2, + update_doc/3, + update_docs/2, + update_docs/3, + %% delete_doc/3, + + %% purge_docs/2, + %% purge_docs/3, + + read_attachment/3, + write_attachment/3, + + fold_docs/3, + fold_docs/4, + fold_docs/5, + fold_design_docs/4, + fold_local_docs/4, + fold_changes/4, + fold_changes/5, + %% count_changes_since/2, + %% fold_purge_infos/4, + %% fold_purge_infos/5, + + %% calculate_start_seq/3, + %% owner_of/2, + + %% start_compact/1, + %% cancel_compact/1, + %% wait_for_compaction/1, + %% wait_for_compaction/2, + + dbname_suffix/1, + normalize_dbname/1, + validate_dbname/1, + + %% make_doc/5, + new_revid/2, + + apply_open_doc_opts/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2.hrl"). + + +% Default max database name length is based on CouchDb < 4.x compatibility. See +% default.ini entry for additional information. +-define(DEFAULT_MAX_DATABASE_NAME_LENGTH, 238). + +-define(DBNAME_REGEX, + "^[a-z][a-z0-9\\_\\$()\\+\\-\\/]*" % use the stock CouchDB regex + "(\\.[0-9]{10,})?$" % but allow an optional shard timestamp at the end +). + +-define(FIRST_DDOC_KEY, <<"_design/">>). +-define(LAST_DDOC_KEY, <<"_design0">>). + +-define(RETURN(Term), throw({?MODULE, Term})). + +-define(DEFAULT_UPDATE_DOCS_BATCH_SIZE, 2500000). + + +-record(bacc, { + db, + docs, + batch_size, + options, + rev_futures, + seen, + results +}). + + +create(DbName, Options) -> + case validate_dbname(DbName) of + ok -> + Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> + case fabric2_fdb:exists(TxDb) of + true -> + {error, file_exists}; + false -> + fabric2_fdb:create(TxDb, Options) + end + end), + % We cache outside of the transaction so that we're sure + % that the transaction was committed. + case Result of + #{} = Db0 -> + Db1 = maybe_add_sys_db_callbacks(Db0), + ok = fabric2_server:store(Db1), + fabric2_db_plugin:after_db_create(DbName, get_uuid(Db1)), + {ok, Db1#{tx := undefined}}; + Error -> + Error + end; + Error -> + Error + end. + + +open(DbName, Options) -> + UUID = fabric2_util:get_value(uuid, Options), + case fabric2_server:fetch(DbName, UUID) of + #{} = Db -> + Db1 = maybe_set_user_ctx(Db, Options), + Db2 = maybe_set_interactive(Db1, Options), + {ok, require_member_check(Db2)}; + undefined -> + Result = fabric2_fdb:transactional(DbName, Options, fun(TxDb) -> + fabric2_fdb:open(TxDb, Options) + end), + % Cache outside the transaction retry loop + case Result of + #{} = Db0 -> + Db1 = maybe_add_sys_db_callbacks(Db0), + ok = fabric2_server:store(Db1), + Db2 = Db1#{tx := undefined}, + {ok, require_member_check(Db2)}; + Error -> + Error + end + end. + + +delete(DbName, Options) -> + % Delete doesn't check user_ctx, that's done at the HTTP API level + % here we just care to get the `database_does_not_exist` error thrown + Options1 = lists:keystore(user_ctx, 1, Options, ?ADMIN_CTX), + case lists:keyfind(deleted_at, 1, Options1) of + {deleted_at, TimeStamp} -> + fabric2_fdb:transactional(DbName, Options1, fun(TxDb) -> + fabric2_fdb:remove_deleted_db(TxDb, TimeStamp) + end); + false -> + {ok, Db} = open(DbName, Options1), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:delete(TxDb) + end), + if Resp /= ok -> Resp; true -> + fabric2_db_plugin:after_db_delete(DbName, get_uuid(Db)), + fabric2_server:remove(DbName) + end + end. + + +undelete(DbName, TgtDbName, TimeStamp, Options) -> + case validate_dbname(TgtDbName) of + ok -> + Resp = fabric2_fdb:transactional(DbName, Options, + fun(TxDb) -> + fabric2_fdb:undelete(TxDb, TgtDbName, TimeStamp) + end + ), + if Resp /= ok -> ok; true -> + {ok, Db} = open(TgtDbName, Options), + fabric2_db_plugin:after_db_create(TgtDbName, get_uuid(Db)) + end, + Resp; + Error -> + Error + end. + + +list_dbs() -> + list_dbs([]). + + +list_dbs(Options) -> + Callback = fun(DbName, Acc) -> [DbName | Acc] end, + DbNames = fabric2_fdb:transactional(fun(Tx) -> + fabric2_fdb:list_dbs(Tx, Callback, [], Options) + end), + lists:reverse(DbNames). + + +list_dbs(UserFun, UserAcc0, Options) -> + FoldFun = fun + (DbName, Acc) -> maybe_stop(UserFun({row, [{id, DbName}]}, Acc)) + end, + fabric2_fdb:transactional(fun(Tx) -> + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + UserAcc2 = fabric2_fdb:list_dbs( + Tx, + FoldFun, + UserAcc1, + Options + ), + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + +list_dbs_info() -> + list_dbs_info([]). + + +list_dbs_info(Options) -> + Callback = fun(Value, Acc) -> + NewAcc = case Value of + {meta, _} -> Acc; + {row, DbInfo} -> [DbInfo | Acc]; + complete -> Acc + end, + {ok, NewAcc} + end, + {ok, DbInfos} = list_dbs_info(Callback, [], Options), + {ok, lists:reverse(DbInfos)}. + + +list_dbs_info(UserFun, UserAcc0, Options) -> + FoldFun = fun(DbName, InfoFuture, {FutureQ, Count, Acc}) -> + NewFutureQ = queue:in({DbName, InfoFuture}, FutureQ), + drain_info_futures(NewFutureQ, Count + 1, UserFun, Acc) + end, + fabric2_fdb:transactional(fun(Tx) -> + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + InitAcc = {queue:new(), 0, UserAcc1}, + {FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_dbs_info( + Tx, + FoldFun, + InitAcc, + Options + ), + UserAcc3 = drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2), + {ok, maybe_stop(UserFun(complete, UserAcc3))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + +list_deleted_dbs_info() -> + list_deleted_dbs_info([]). + + +list_deleted_dbs_info(Options) -> + Callback = fun(Value, Acc) -> + NewAcc = case Value of + {meta, _} -> Acc; + {row, DbInfo} -> [DbInfo | Acc]; + complete -> Acc + end, + {ok, NewAcc} + end, + {ok, DbInfos} = list_deleted_dbs_info(Callback, [], Options), + {ok, lists:reverse(DbInfos)}. + + +list_deleted_dbs_info(UserFun, UserAcc0, Options0) -> + Dir = fabric2_util:get_value(dir, Options0, fwd), + StartKey0 = fabric2_util:get_value(start_key, Options0), + EndKey0 = fabric2_util:get_value(end_key, Options0), + + {FirstBinary, LastBinary} = case Dir of + fwd -> {<<>>, <<255>>}; + rev -> {<<255>>, <<>>} + end, + + StartKey1 = case StartKey0 of + undefined -> + {FirstBinary}; + DbName0 when is_binary(DbName0) -> + {DbName0, FirstBinary}; + [DbName0, TimeStamp0] when is_binary(DbName0), is_binary(TimeStamp0) -> + {DbName0, TimeStamp0}; + BadStartKey -> + erlang:error({invalid_start_key, BadStartKey}) + end, + EndKey1 = case EndKey0 of + undefined -> + {LastBinary}; + DbName1 when is_binary(DbName1) -> + {DbName1, LastBinary}; + [DbName1, TimeStamp1] when is_binary(DbName1), is_binary(TimeStamp1) -> + {DbName1, TimeStamp1}; + BadEndKey -> + erlang:error({invalid_end_key, BadEndKey}) + end, + + Options1 = Options0 -- [{start_key, StartKey0}, {end_key, EndKey0}], + Options2 = [ + {start_key, StartKey1}, + {end_key, EndKey1}, + {wrap_keys, false} + ] ++ Options1, + + FoldFun = fun(DbName, TimeStamp, InfoFuture, {FutureQ, Count, Acc}) -> + NewFutureQ = queue:in({DbName, TimeStamp, InfoFuture}, FutureQ), + drain_deleted_info_futures(NewFutureQ, Count + 1, UserFun, Acc) + end, + fabric2_fdb:transactional(fun(Tx) -> + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + InitAcc = {queue:new(), 0, UserAcc1}, + {FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_deleted_dbs_info( + Tx, + FoldFun, + InitAcc, + Options2 + ), + UserAcc3 = drain_all_deleted_info_futures( + FinalFutureQ, + UserFun, + UserAcc2 + ), + {ok, maybe_stop(UserFun(complete, UserAcc3))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + +is_admin(Db, {SecProps}) when is_list(SecProps) -> + case fabric2_db_plugin:check_is_admin(Db) of + true -> + true; + false -> + UserCtx = get_user_ctx(Db), + {Admins} = get_admins(SecProps), + is_authorized(Admins, UserCtx) + end. + + +check_is_admin(Db) -> + check_is_admin(Db, get_security(Db)). + + +check_is_admin(Db, SecDoc) -> + case is_admin(Db, SecDoc) of + true -> + ok; + false -> + UserCtx = get_user_ctx(Db), + Reason = <<"You are not a db or server admin.">>, + throw_security_error(UserCtx, Reason) + end. + + +check_is_member(Db) -> + check_is_member(Db, get_security(Db)). + + +check_is_member(Db, SecDoc) -> + case is_member(Db, SecDoc) of + true -> + ok; + false -> + UserCtx = get_user_ctx(Db), + throw_security_error(UserCtx) + end. + + +require_admin_check(#{} = Db) -> + Db#{security_fun := fun check_is_admin/2}. + + +require_member_check(#{} = Db) -> + Db#{security_fun := fun check_is_member/2}. + + +name(#{name := DbName}) -> + DbName. + + +get_after_doc_read_fun(#{after_doc_read := AfterDocRead}) -> + AfterDocRead. + + +get_before_doc_update_fun(#{before_doc_update := BeforeDocUpdate}) -> + BeforeDocUpdate. + +get_committed_update_seq(#{} = Db) -> + get_update_seq(Db). + + +get_compacted_seq(#{} = Db) -> + get_update_seq(Db). + + +get_compactor_pid(#{} = _Db) -> + nil. + + +get_db_info(#{} = Db) -> + DbProps = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_info(TxDb) + end), + {ok, make_db_info(name(Db), DbProps)}. + + +get_del_doc_count(#{} = Db) -> + get_doc_count(Db, <<"doc_del_count">>). + + +get_doc_count(Db) -> + get_doc_count(Db, <<"doc_count">>). + + +get_doc_count(Db, undefined) -> + get_doc_count(Db, <<"doc_count">>); + +get_doc_count(Db, <<"_all_docs">>) -> + get_doc_count(Db, <<"doc_count">>); + +get_doc_count(DbName, <<"_design">>) -> + get_doc_count(DbName, <<"doc_design_count">>); + +get_doc_count(DbName, <<"_local">>) -> + get_doc_count(DbName, <<"doc_local_count">>); + +get_doc_count(Db, Key) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_stat(TxDb, Key) + end). + + +get_instance_start_time(#{}) -> + 0. + + +get_pid(#{}) -> + nil. + + +get_revs_limit(#{} = Db) -> + get_revs_limit(Db, []). + + +get_revs_limit(#{} = Db, Opts) -> + CurrentDb = get_cached_db(Db, Opts), + maps:get(revs_limit, CurrentDb). + + +get_security(#{} = Db) -> + get_security(Db, []). + + +get_security(#{} = Db, Opts) -> + CurrentDb = get_cached_db(Db, Opts), + maps:get(security_doc, CurrentDb). + + +get_update_seq(#{} = Db) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_last_change(TxDb) + end). + + +get_user_ctx(#{user_ctx := UserCtx}) -> + UserCtx. + + +get_uuid(#{uuid := UUID}) -> + UUID. + + +is_clustered(#{}) -> + false. + + +is_db(#{name := _}) -> + true; +is_db(_) -> + false. + + +is_partitioned(#{}) -> + false. + + +is_system_db(#{name := DbName}) -> + is_system_db_name(DbName). + + +is_system_db_name(DbName) when is_list(DbName) -> + is_system_db_name(?l2b(DbName)); +is_system_db_name(DbName) when is_binary(DbName) -> + Suffix = filename:basename(DbName), + case {filename:dirname(DbName), lists:member(Suffix, ?SYSTEM_DATABASES)} of + {<<".">>, Result} -> Result; + {_Prefix, false} -> false; + {Prefix, true} -> + ReOpts = [{capture,none}, dollar_endonly], + re:run(Prefix, ?DBNAME_REGEX, ReOpts) == match + end. + + +is_replicator_db(#{name := DbName}) -> + is_replicator_db(DbName); + +is_replicator_db(DbName) when is_binary(DbName) -> + fabric2_util:dbname_ends_with(DbName, <<"_replicator">>). + + +is_users_db(#{name := DbName}) -> + is_users_db(DbName); + +is_users_db(DbName) when is_binary(DbName) -> + AuthenticationDb = config:get("chttpd_auth", "authentication_db"), + CfgUsersSuffix = config:get("couchdb", "users_db_suffix", "_users"), + + IsAuthCache = if AuthenticationDb == undefined -> false; true -> + DbName == ?l2b(AuthenticationDb) + end, + IsCfgUsersDb = fabric2_util:dbname_ends_with(DbName, ?l2b(CfgUsersSuffix)), + IsGlobalUsersDb = fabric2_util:dbname_ends_with(DbName, <<"_users">>), + + IsAuthCache orelse IsCfgUsersDb orelse IsGlobalUsersDb. + + +set_revs_limit(#{} = Db0, RevsLimit) when is_integer(RevsLimit) -> + Db1 = require_admin_check(Db0), + Resp = fabric2_fdb:transactional(Db1, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, revs_limit, RevsLimit) + end), + case Resp of + {ok, #{} = Db2} -> fabric2_server:store(Db2); + Err -> Err + end. + + +set_security(#{} = Db0, Security) -> + Db1 = require_admin_check(Db0), + ok = fabric2_util:validate_security_object(Security), + Resp = fabric2_fdb:transactional(Db1, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, security_doc, Security) + end), + case Resp of + {ok, #{} = Db2} -> fabric2_server:store(Db2); + Err -> Err + end. + + +set_user_ctx(#{} = Db, UserCtx) -> + Db#{user_ctx := UserCtx}. + + +ensure_full_commit(#{}) -> + {ok, 0}. + + +ensure_full_commit(#{}, _Timeout) -> + {ok, 0}. + + +open_doc(#{} = Db, DocId) -> + open_doc(Db, DocId, []). + + +open_doc(#{} = Db, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, _Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + case fabric2_fdb:get_local_doc(TxDb, DocId) of + #doc{} = Doc -> {ok, Doc}; + Else -> Else + end + end); + +open_doc(#{} = Db, DocId, Options) -> + NeedsTreeOpts = [revs_info, conflicts, deleted_conflicts], + NeedsTree = (Options -- NeedsTreeOpts /= Options), + OpenDeleted = lists:member(deleted, Options), + fabric2_fdb:transactional(Db, fun(TxDb) -> + Revs = case NeedsTree of + true -> fabric2_fdb:get_all_revs(TxDb, DocId); + false -> fabric2_fdb:get_winning_revs(TxDb, DocId, 1) + end, + if Revs == [] -> {not_found, missing}; true -> + #{winner := true} = RI = lists:last(Revs), + case fabric2_fdb:get_doc_body(TxDb, DocId, RI) of + #doc{deleted = true} when not OpenDeleted -> + {not_found, deleted}; + #doc{} = Doc -> + apply_open_doc_opts(Doc, Revs, Options); + Else -> + Else + end + end + end). + + +open_doc_revs(Db, DocId, Revs, Options) -> + Latest = lists:member(latest, Options), + fabric2_fdb:transactional(Db, fun(TxDb) -> + AllRevInfos = fabric2_fdb:get_all_revs(TxDb, DocId), + RevTree = lists:foldl(fun(RI, TreeAcc) -> + RIPath = fabric2_util:revinfo_to_path(RI), + {Merged, _} = couch_key_tree:merge(TreeAcc, RIPath), + Merged + end, [], AllRevInfos), + {Found, Missing} = case Revs of + all -> + {couch_key_tree:get_all_leafs(RevTree), []}; + _ when Latest -> + couch_key_tree:get_key_leafs(RevTree, Revs); + _ -> + couch_key_tree:get(RevTree, Revs) + end, + Docs = lists:map(fun({Value, {Pos, [Rev | RevPath]}}) -> + case Value of + ?REV_MISSING -> + % We have the rev in our list but know nothing about it + {{not_found, missing}, {Pos, Rev}}; + _ -> + RevInfo = #{ + rev_id => {Pos, Rev}, + rev_path => RevPath + }, + case fabric2_fdb:get_doc_body(TxDb, DocId, RevInfo) of + #doc{} = Doc -> + apply_open_doc_opts(Doc, AllRevInfos, Options); + Else -> + {Else, {Pos, Rev}} + end + end + end, Found), + MissingDocs = [{{not_found, missing}, MRev} || MRev <- Missing], + {ok, Docs ++ MissingDocs} + end). + + +get_doc_info(Db, DocId) -> + case get_full_doc_info(Db, DocId) of + not_found -> not_found; + FDI -> couch_doc:to_doc_info(FDI) + end. + + +get_full_doc_info(Db, DocId) -> + RevInfos = fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:get_all_revs(TxDb, DocId) + end), + if RevInfos == [] -> not_found; true -> + #{winner := true} = Winner = lists:last(RevInfos), + RevTree = lists:foldl(fun(RI, TreeAcc) -> + RIPath = fabric2_util:revinfo_to_path(RI), + {Merged, _} = couch_key_tree:merge(TreeAcc, RIPath), + Merged + end, [], RevInfos), + #full_doc_info{ + id = DocId, + update_seq = fabric2_fdb:vs_to_seq(maps:get(sequence, Winner)), + deleted = maps:get(deleted, Winner), + rev_tree = RevTree + } + end. + + +get_full_doc_infos(Db, DocIds) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + lists:map(fun(DocId) -> + get_full_doc_info(TxDb, DocId) + end, DocIds) + end). + + +get_missing_revs(Db, JsonIdRevs) -> + IdRevs = [idrevs(IdR) || IdR <- JsonIdRevs], + AllRevInfos = fabric2_fdb:transactional(Db, fun(TxDb) -> + lists:foldl(fun({Id, _Revs}, Acc) -> + case maps:is_key(Id, Acc) of + true -> + Acc; + false -> + RevInfos = fabric2_fdb:get_all_revs(TxDb, Id), + Acc#{Id => RevInfos} + end + end, #{}, IdRevs) + end), + AllMissing = lists:flatmap(fun({Id, Revs}) -> + #{Id := RevInfos} = AllRevInfos, + Missing = try + lists:foldl(fun(RevInfo, RevAcc) -> + if RevAcc /= [] -> ok; true -> + throw(all_found) + end, + filter_found_revs(RevInfo, RevAcc) + end, Revs, RevInfos) + catch throw:all_found -> + [] + end, + if Missing == [] -> []; true -> + PossibleAncestors = find_possible_ancestors(RevInfos, Missing), + [{Id, Missing, PossibleAncestors}] + end + end, IdRevs), + {ok, AllMissing}. + + +get_design_docs(Db) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_ALL_DOCS}, DbPrefix), + Options = set_design_doc_keys([]), + FoldFun = fun({Key, Val}, Acc) -> + {DocId} = erlfdb_tuple:unpack(Key, Prefix), + RevId = erlfdb_tuple:unpack(Val), + Rev = #{ + rev_id => RevId, + rev_path => [] + }, + Future = fabric2_fdb:get_doc_body_future(TxDb, DocId, Rev), + [{DocId, Rev, Future} | Acc] + end, + Futures = fabric2_fdb:fold_range(TxDb, Prefix, FoldFun, [], Options), + + % Using foldl instead of map means that the design + % docs come out in sorted order. + lists:foldl(fun({DocId, Rev, Future}, Acc) -> + [fabric2_fdb:get_doc_body_wait(TxDb, DocId, Rev, Future) | Acc] + end, [], Futures) + end). + + +validate_docid(<<"">>) -> + throw({illegal_docid, <<"Document id must not be empty">>}); +validate_docid(<<"_design/">>) -> + throw({illegal_docid, <<"Illegal document id `_design/`">>}); +validate_docid(<<"_local/">>) -> + throw({illegal_docid, <<"Illegal document id `_local/`">>}); +validate_docid(Id) when is_binary(Id) -> + MaxLen = case config:get("couchdb", "max_document_id_length", "infinity") of + "infinity" -> infinity; + IntegerVal -> list_to_integer(IntegerVal) + end, + case MaxLen > 0 andalso byte_size(Id) > MaxLen of + true -> throw({illegal_docid, <<"Document id is too long">>}); + false -> ok + end, + case couch_util:validate_utf8(Id) of + false -> throw({illegal_docid, <<"Document id must be valid UTF-8">>}); + true -> ok + end, + case Id of + <<?DESIGN_DOC_PREFIX, _/binary>> -> ok; + <<?LOCAL_DOC_PREFIX, _/binary>> -> ok; + <<"_", _/binary>> -> + case fabric2_db_plugin:validate_docid(Id) of + true -> + ok; + false -> + throw( + {illegal_docid, + <<"Only reserved document ids may start with underscore.">>}) + end; + _Else -> ok + end; +validate_docid(Id) -> + couch_log:debug("Document id is not a string: ~p", [Id]), + throw({illegal_docid, <<"Document id must be a string">>}). + + +update_doc(Db, Doc) -> + update_doc(Db, Doc, []). + + +update_doc(Db, Doc, Options) -> + case update_docs(Db, [Doc], Options) of + {ok, [{ok, NewRev}]} -> + {ok, NewRev}; + {ok, [{{_Id, _Rev}, Error}]} -> + throw(Error); + {error, [{{_Id, _Rev}, Error}]} -> + throw(Error); + {error, [Error]} -> + throw(Error); + {ok, []} -> + % replication success + {Pos, [RevId | _]} = Doc#doc.revs, + {ok, {Pos, RevId}} + end. + + +update_docs(Db, Docs) -> + update_docs(Db, Docs, []). + + +update_docs(Db, Docs0, Options) -> + Docs1 = apply_before_doc_update(Db, Docs0, Options), + try + validate_atomic_update(Docs0, lists:member(all_or_nothing, Options)), + + Resps0 = batch_update_docs(Db, Docs1, Options), + + % Notify index builder + fabric2_index:db_updated(name(Db)), + + % Convert errors + Resps1 = lists:map(fun(Resp) -> + case Resp of + {#doc{} = Doc, Error} -> + #doc{ + id = DocId, + revs = Revs + } = Doc, + RevId = case Revs of + {RevPos, [Rev | _]} -> {RevPos, Rev}; + {0, []} -> {0, <<>>}; + Else -> Else + end, + {{DocId, RevId}, Error}; + Else -> + Else + end + end, Resps0), + case is_replicated(Options) of + true -> + {ok, lists:flatmap(fun(R) -> + case R of + {ok, []} -> []; + {{_, _}, {ok, []}} -> []; + Else -> [Else] + end + end, Resps1)}; + false -> + {ok, Resps1} + end + catch throw:{aborted, Errors} -> + {aborted, Errors} + end. + + +read_attachment(Db, DocId, AttId) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:read_attachment(TxDb, DocId, AttId) + end). + + +write_attachment(Db, DocId, Att) -> + Data = couch_att:fetch(data, Att), + Encoding = couch_att:fetch(encoding, Att), + {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data, Encoding), + couch_att:store(data, {loc, Db, DocId, AttId}, Att). + + +fold_docs(Db, UserFun, UserAcc) -> + fold_docs(Db, UserFun, UserAcc, []). + + +fold_docs(Db, UserFun, UserAcc0, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_ALL_DOCS}, DbPrefix), + Meta = get_all_docs_meta(TxDb, Options), + + UserAcc1 = maybe_stop(UserFun({meta, Meta}, UserAcc0)), + + UserAcc2 = fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> + {DocId} = erlfdb_tuple:unpack(K, Prefix), + RevId = erlfdb_tuple:unpack(V), + Row0 = [ + {id, DocId}, + {key, DocId}, + {value, {[{rev, couch_doc:rev_to_str(RevId)}]}} + ], + + DocOpts = couch_util:get_value(doc_opts, Options, []), + OpenOpts = [deleted | DocOpts], + + Row1 = case lists:keyfind(include_docs, 1, Options) of + {include_docs, true} -> + Row0 ++ open_json_doc(TxDb, DocId, OpenOpts, DocOpts); + _ -> + Row0 + end, + + maybe_stop(UserFun({row, Row1}, Acc)) + end, UserAcc1, Options), + + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + +fold_docs(Db, DocIds, UserFun, UserAcc0, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + NeedsTreeOpts = [revs_info, conflicts, deleted_conflicts], + NeedsTree = (Options -- NeedsTreeOpts /= Options), + + InitAcc = #{ + revs_q => queue:new(), + revs_count => 0, + body_q => queue:new(), + body_count => 0, + doc_opts => Options, + user_acc => UserAcc0, + user_fun => UserFun + }, + + FinalAcc1 = lists:foldl(fun(DocId, Acc) -> + #{ + revs_q := RevsQ, + revs_count := RevsCount + } = Acc, + Future = fold_docs_get_revs(TxDb, DocId, NeedsTree), + NewAcc = Acc#{ + revs_q := queue:in({DocId, Future}, RevsQ), + revs_count := RevsCount + 1 + }, + drain_fold_docs_revs_futures(TxDb, NewAcc) + end, InitAcc, DocIds), + + FinalAcc2 = drain_all_fold_docs_revs_futures(TxDb, FinalAcc1), + FinalAcc3 = drain_all_fold_docs_body_futures(TxDb, FinalAcc2), + + #{ + user_acc := FinalUserAcc + } = FinalAcc3, + {ok, FinalUserAcc} + + catch throw:{stop, StopUserAcc} -> + {ok, StopUserAcc} + end + end). + + + + +fold_design_docs(Db, UserFun, UserAcc0, Options1) -> + Options2 = set_design_doc_keys(Options1), + fold_docs(Db, UserFun, UserAcc0, Options2). + + +fold_local_docs(Db, UserFun, UserAcc0, Options0) -> + % This is mostly for testing and sanity checking. When calling from a test + % namespace will be automatically set. We also assert when called from the + % API the correct namespace was set + Options = case lists:keyfind(namespace, 1, Options0) of + {namespace, <<"_local">>} -> Options0; + false -> [{namespace, <<"_local">>} | Options0] + end, + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOCS}, DbPrefix), + Meta = get_all_docs_meta(TxDb, Options), + + UserAcc1 = maybe_stop(UserFun({meta, Meta}, UserAcc0)), + + UserAcc2 = fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> + {DocId} = erlfdb_tuple:unpack(K, Prefix), + Rev = fabric2_fdb:get_local_doc_rev(TxDb, DocId, V), + maybe_stop(UserFun({row, [ + {id, DocId}, + {key, DocId}, + {value, {[{rev, couch_doc:rev_to_str({0, Rev})}]}} + ]}, Acc)) + end, UserAcc1, Options), + + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + +fold_changes(Db, SinceSeq, UserFun, UserAcc) -> + fold_changes(Db, SinceSeq, UserFun, UserAcc, []). + + +fold_changes(Db, SinceSeq, UserFun, UserAcc, Options) -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + try + #{ + db_prefix := DbPrefix + } = TxDb, + + Prefix = erlfdb_tuple:pack({?DB_CHANGES}, DbPrefix), + + Dir = case fabric2_util:get_value(dir, Options, fwd) of + rev -> rev; + _ -> fwd + end, + + RestartTx = case fabric2_util:get_value(restart_tx, Options) of + undefined -> [{restart_tx, true}]; + _AlreadySet -> [] + end, + + StartKey = get_since_seq(TxDb, Dir, SinceSeq), + EndKey = case Dir of + rev -> fabric2_util:seq_zero_vs(); + _ -> fabric2_util:seq_max_vs() + end, + FoldOpts = [ + {start_key, StartKey}, + {end_key, EndKey} + ] ++ RestartTx ++ Options, + + {ok, fabric2_fdb:fold_range(TxDb, Prefix, fun({K, V}, Acc) -> + {SeqVS} = erlfdb_tuple:unpack(K, Prefix), + {DocId, Deleted, RevId} = erlfdb_tuple:unpack(V), + + Change = #{ + id => DocId, + sequence => fabric2_fdb:vs_to_seq(SeqVS), + rev_id => RevId, + deleted => Deleted + }, + + maybe_stop(UserFun(Change, Acc)) + end, UserAcc, FoldOpts)} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end + end). + + +dbname_suffix(DbName) -> + filename:basename(normalize_dbname(DbName)). + + +normalize_dbname(DbName) -> + % Remove in the final cleanup. We don't need to handle shards prefix or + % remove .couch suffixes anymore. Keep it for now to pass all the existing + % tests. + couch_db:normalize_dbname(DbName). + + +validate_dbname(DbName) when is_list(DbName) -> + validate_dbname(?l2b(DbName)); + +validate_dbname(DbName) when is_binary(DbName) -> + Normalized = normalize_dbname(DbName), + fabric2_db_plugin:validate_dbname( + DbName, Normalized, fun validate_dbname_int/2). + +validate_dbname_int(DbName, Normalized) when is_binary(DbName) -> + case validate_dbname_length(DbName) of + ok -> validate_dbname_pat(DbName, Normalized); + {error, _} = Error -> Error + end. + + +validate_dbname_length(DbName) -> + MaxLength = config:get_integer("couchdb", "max_database_name_length", + ?DEFAULT_MAX_DATABASE_NAME_LENGTH), + case byte_size(DbName) =< MaxLength of + true -> ok; + false -> {error, {database_name_too_long, DbName}} + end. + + +validate_dbname_pat(DbName, Normalized) -> + DbNoExt = couch_util:drop_dot_couch_ext(DbName), + case re:run(DbNoExt, ?DBNAME_REGEX, [{capture,none}, dollar_endonly]) of + match -> + ok; + nomatch -> + case is_system_db_name(Normalized) of + true -> ok; + false -> {error, {illegal_database_name, DbName}} + end + end. + + +maybe_add_sys_db_callbacks(Db) -> + IsReplicatorDb = is_replicator_db(Db), + IsUsersDb = is_users_db(Db), + + {BDU, ADR} = if + IsReplicatorDb -> + { + fun couch_replicator_docs:before_doc_update/3, + fun couch_replicator_docs:after_doc_read/2 + }; + IsUsersDb -> + { + fun fabric2_users_db:before_doc_update/3, + fun fabric2_users_db:after_doc_read/2 + }; + true -> + {undefined, undefined} + end, + + Db#{ + before_doc_update := BDU, + after_doc_read := ADR + }. + + +make_db_info(DbName, Props) -> + BaseProps = [ + {cluster, {[{n, 0}, {q, 0}, {r, 0}, {w, 0}]}}, + {compact_running, false}, + {data_size, 0}, + {db_name, DbName}, + {disk_format_version, 0}, + {disk_size, 0}, + {instance_start_time, <<"0">>}, + {purge_seq, 0} + ], + + lists:foldl(fun({Key, Val}, Acc) -> + lists:keystore(Key, 1, Acc, {Key, Val}) + end, BaseProps, Props). + + +drain_info_futures(FutureQ, Count, _UserFun, Acc) when Count < 100 -> + {FutureQ, Count, Acc}; + +drain_info_futures(FutureQ, Count, UserFun, Acc) when Count >= 100 -> + {{value, {DbName, Future}}, RestQ} = queue:out(FutureQ), + InfoProps = fabric2_fdb:get_info_wait(Future), + DbInfo = make_db_info(DbName, InfoProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + {RestQ, Count - 1, NewAcc}. + + +drain_all_info_futures(FutureQ, UserFun, Acc) -> + case queue:out(FutureQ) of + {{value, {DbName, Future}}, RestQ} -> + InfoProps = fabric2_fdb:get_info_wait(Future), + DbInfo = make_db_info(DbName, InfoProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + drain_all_info_futures(RestQ, UserFun, NewAcc); + {empty, _} -> + Acc + end. + + +drain_deleted_info_futures(FutureQ, Count, _UserFun, Acc) when Count < 100 -> + {FutureQ, Count, Acc}; + +drain_deleted_info_futures(FutureQ, Count, UserFun, Acc) when Count >= 100 -> + {{value, {DbName, TimeStamp, Future}}, RestQ} = queue:out(FutureQ), + BaseProps = fabric2_fdb:get_info_wait(Future), + DeletedProps = BaseProps ++ [ + {deleted, true}, + {timestamp, TimeStamp} + ], + DbInfo = make_db_info(DbName, DeletedProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + {RestQ, Count - 1, NewAcc}. + + +drain_all_deleted_info_futures(FutureQ, UserFun, Acc) -> + case queue:out(FutureQ) of + {{value, {DbName, TimeStamp, Future}}, RestQ} -> + BaseProps = fabric2_fdb:get_info_wait(Future), + DeletedProps = BaseProps ++ [ + {deleted, true}, + {timestamp, TimeStamp} + ], + DbInfo = make_db_info(DbName, DeletedProps), + NewAcc = maybe_stop(UserFun({row, DbInfo}, Acc)), + drain_all_deleted_info_futures(RestQ, UserFun, NewAcc); + {empty, _} -> + Acc + end. + + +fold_docs_get_revs(Db, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, _) -> + fabric2_fdb:get_local_doc_rev_future(Db, DocId); + +fold_docs_get_revs(Db, DocId, true) -> + fabric2_fdb:get_all_revs_future(Db, DocId); + +fold_docs_get_revs(Db, DocId, false) -> + fabric2_fdb:get_winning_revs_future(Db, DocId, 1). + + +fold_docs_get_revs_wait(_Db, <<?LOCAL_DOC_PREFIX, _/binary>>, RevsFuture) -> + Rev = fabric2_fdb:get_local_doc_rev_wait(RevsFuture), + [Rev]; + +fold_docs_get_revs_wait(Db, _DocId, RevsFuture) -> + fabric2_fdb:get_revs_wait(Db, RevsFuture). + + +fold_docs_get_doc_body_future(Db, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, + [Rev]) -> + fabric2_fdb:get_local_doc_body_future(Db, DocId, Rev); + +fold_docs_get_doc_body_future(Db, DocId, Revs) -> + Winner = get_rev_winner(Revs), + fabric2_fdb:get_doc_body_future(Db, DocId, Winner). + + +fold_docs_get_doc_body_wait(Db, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, [Rev], + _DocOpts, BodyFuture) -> + case fabric2_fdb:get_local_doc_body_wait(Db, DocId, Rev, BodyFuture) of + {not_found, missing} -> {not_found, missing}; + Doc -> {ok, Doc} + end; + +fold_docs_get_doc_body_wait(Db, DocId, Revs, DocOpts, BodyFuture) -> + RevInfo = get_rev_winner(Revs), + Base = fabric2_fdb:get_doc_body_wait(Db, DocId, RevInfo, + BodyFuture), + apply_open_doc_opts(Base, Revs, DocOpts). + + +drain_fold_docs_revs_futures(_TxDb, #{revs_count := C} = Acc) when C < 100 -> + Acc; +drain_fold_docs_revs_futures(TxDb, Acc) -> + drain_one_fold_docs_revs_future(TxDb, Acc). + + +drain_all_fold_docs_revs_futures(_TxDb, #{revs_count := C} = Acc) when C =< 0 -> + Acc; +drain_all_fold_docs_revs_futures(TxDb, #{revs_count := C} = Acc) when C > 0 -> + NewAcc = drain_one_fold_docs_revs_future(TxDb, Acc), + drain_all_fold_docs_revs_futures(TxDb, NewAcc). + + +drain_one_fold_docs_revs_future(TxDb, Acc) -> + #{ + revs_q := RevsQ, + revs_count := RevsCount, + body_q := BodyQ, + body_count := BodyCount + } = Acc, + {{value, {DocId, RevsFuture}}, RestRevsQ} = queue:out(RevsQ), + + Revs = fold_docs_get_revs_wait(TxDb, DocId, RevsFuture), + DocFuture = case Revs of + [] -> + {DocId, [], not_found}; + [_ | _] -> + BodyFuture = fold_docs_get_doc_body_future(TxDb, DocId, Revs), + {DocId, Revs, BodyFuture} + end, + NewAcc = Acc#{ + revs_q := RestRevsQ, + revs_count := RevsCount - 1, + body_q := queue:in(DocFuture, BodyQ), + body_count := BodyCount + 1 + }, + drain_fold_docs_body_futures(TxDb, NewAcc). + + +drain_fold_docs_body_futures(_TxDb, #{body_count := C} = Acc) when C < 100 -> + Acc; +drain_fold_docs_body_futures(TxDb, Acc) -> + drain_one_fold_docs_body_future(TxDb, Acc). + + +drain_all_fold_docs_body_futures(_TxDb, #{body_count := C} = Acc) when C =< 0 -> + Acc; +drain_all_fold_docs_body_futures(TxDb, #{body_count := C} = Acc) when C > 0 -> + NewAcc = drain_one_fold_docs_body_future(TxDb, Acc), + drain_all_fold_docs_body_futures(TxDb, NewAcc). + + +drain_one_fold_docs_body_future(TxDb, Acc) -> + #{ + body_q := BodyQ, + body_count := BodyCount, + doc_opts := DocOpts, + user_fun := UserFun, + user_acc := UserAcc + } = Acc, + {{value, {DocId, Revs, BodyFuture}}, RestBodyQ} = queue:out(BodyQ), + Doc = case BodyFuture of + not_found -> + {not_found, missing}; + _ -> + fold_docs_get_doc_body_wait(TxDb, DocId, Revs, DocOpts, BodyFuture) + end, + NewUserAcc = maybe_stop(UserFun(DocId, Doc, UserAcc)), + Acc#{ + body_q := RestBodyQ, + body_count := BodyCount - 1, + user_acc := NewUserAcc + }. + + +get_rev_winner(Revs) -> + [Winner] = lists:filter(fun(Rev) -> + maps:get(winner, Rev) + end, Revs), + Winner. + + +new_revid(Db, Doc) -> + #doc{ + id = DocId, + body = Body, + revs = {OldStart, OldRevs}, + atts = Atts, + deleted = Deleted + } = Doc, + + {NewAtts, AttSigInfo} = lists:mapfoldl(fun(Att, Acc) -> + [Name, Type, Data, Md5] = couch_att:fetch([name, type, data, md5], Att), + case Data of + {loc, _, _, _} -> + {Att, [{Name, Type, Md5} | Acc]}; + _ -> + Att1 = couch_att:flush(Db, DocId, Att), + Att2 = couch_att:store(revpos, OldStart + 1, Att1), + {Att2, [{Name, Type, couch_att:fetch(md5, Att2)} | Acc]} + end + end, [], Atts), + + Rev = case length(Atts) == length(AttSigInfo) of + true -> + OldRev = case OldRevs of [] -> 0; [OldRev0 | _] -> OldRev0 end, + SigTerm = [Deleted, OldStart, OldRev, Body, AttSigInfo], + couch_hash:md5_hash(term_to_binary(SigTerm, [{minor_version, 1}])); + false -> + erlang:error(missing_att_info) + end, + + Doc#doc{ + revs = {OldStart + 1, [Rev | OldRevs]}, + atts = NewAtts + }. + + +get_all_docs_meta(TxDb, Options) -> + NS = couch_util:get_value(namespace, Options), + DocCount = get_doc_count(TxDb, NS), + case lists:keyfind(update_seq, 1, Options) of + {_, true} -> + UpdateSeq = fabric2_db:get_update_seq(TxDb), + [{update_seq, UpdateSeq}]; + _ -> + [] + end ++ [{total, DocCount}, {offset, null}]. + + +maybe_set_interactive(#{} = Db, Options) -> + Interactive = fabric2_util:get_value(interactive, Options, false), + Db#{interactive := Interactive}. + + +maybe_set_user_ctx(Db, Options) -> + case fabric2_util:get_value(user_ctx, Options) of + #user_ctx{} = UserCtx -> + set_user_ctx(Db, UserCtx); + undefined -> + Db + end. + + +is_member(Db, {SecProps}) when is_list(SecProps) -> + case is_admin(Db, {SecProps}) of + true -> + true; + false -> + case is_public_db(SecProps) of + true -> + true; + false -> + {Members} = get_members(SecProps), + UserCtx = get_user_ctx(Db), + is_authorized(Members, UserCtx) + end + end. + + +is_authorized(Group, UserCtx) -> + #user_ctx{ + name = UserName, + roles = UserRoles + } = UserCtx, + Names = fabric2_util:get_value(<<"names">>, Group, []), + Roles = fabric2_util:get_value(<<"roles">>, Group, []), + case check_security(roles, UserRoles, [<<"_admin">> | Roles]) of + true -> + true; + false -> + check_security(names, UserName, Names) + end. + + +check_security(roles, [], _) -> + false; +check_security(roles, UserRoles, Roles) -> + UserRolesSet = ordsets:from_list(UserRoles), + RolesSet = ordsets:from_list(Roles), + not ordsets:is_disjoint(UserRolesSet, RolesSet); +check_security(names, _, []) -> + false; +check_security(names, null, _) -> + false; +check_security(names, UserName, Names) -> + lists:member(UserName, Names). + + +throw_security_error(#user_ctx{name = null} = UserCtx) -> + Reason = <<"You are not authorized to access this db.">>, + throw_security_error(UserCtx, Reason); +throw_security_error(#user_ctx{name = _} = UserCtx) -> + Reason = <<"You are not allowed to access this db.">>, + throw_security_error(UserCtx, Reason). + + +throw_security_error(#user_ctx{} = UserCtx, Reason) -> + Error = security_error_type(UserCtx), + throw({Error, Reason}). + + +security_error_type(#user_ctx{name = null}) -> + unauthorized; +security_error_type(#user_ctx{name = _}) -> + forbidden. + + +is_public_db(SecProps) -> + {Members} = get_members(SecProps), + Names = fabric2_util:get_value(<<"names">>, Members, []), + Roles = fabric2_util:get_value(<<"roles">>, Members, []), + Names =:= [] andalso Roles =:= []. + + +get_admins(SecProps) -> + fabric2_util:get_value(<<"admins">>, SecProps, {[]}). + + +get_members(SecProps) -> + % we fallback to readers here for backwards compatibility + case fabric2_util:get_value(<<"members">>, SecProps) of + undefined -> + fabric2_util:get_value(<<"readers">>, SecProps, {[]}); + Members -> + Members + end. + + +apply_open_doc_opts(Doc0, Revs, Options) -> + IncludeRevsInfo = lists:member(revs_info, Options), + IncludeConflicts = lists:member(conflicts, Options), + IncludeDelConflicts = lists:member(deleted_conflicts, Options), + IncludeLocalSeq = lists:member(local_seq, Options), + + % This revs_info becomes fairly useless now that we're + % not keeping old document bodies around... + Meta1 = if not IncludeRevsInfo -> []; true -> + {Pos, [Rev | RevPath]} = Doc0#doc.revs, + RevPathMissing = lists:map(fun(R) -> {R, missing} end, RevPath), + [{revs_info, Pos, [{Rev, available} | RevPathMissing]}] + end, + + Meta2 = if not IncludeConflicts -> []; true -> + Conflicts = [RI || RI = #{winner := false, deleted := false} <- Revs], + if Conflicts == [] -> []; true -> + ConflictRevs = [maps:get(rev_id, RI) || RI <- Conflicts], + [{conflicts, ConflictRevs}] + end + end, + + Meta3 = if not IncludeDelConflicts -> []; true -> + DelConflicts = [RI || RI = #{winner := false, deleted := true} <- Revs], + if DelConflicts == [] -> []; true -> + DelConflictRevs = [maps:get(rev_id, RI) || RI <- DelConflicts], + [{deleted_conflicts, DelConflictRevs}] + end + end, + + Meta4 = if not IncludeLocalSeq -> []; true -> + #{winner := true, sequence := SeqVS} = lists:last(Revs), + [{local_seq, fabric2_fdb:vs_to_seq(SeqVS)}] + end, + + Doc1 = case lists:keyfind(atts_since, 1, Options) of + {_, PossibleAncestors} -> + #doc{ + revs = DocRevs, + atts = Atts0 + } = Doc0, + RevPos = find_ancestor_rev_pos(DocRevs, PossibleAncestors), + Atts1 = lists:map(fun(Att) -> + [AttPos, Data] = couch_att:fetch([revpos, data], Att), + if AttPos > RevPos -> couch_att:store(data, Data, Att); + true -> couch_att:store(data, stub, Att) + end + end, Atts0), + Doc0#doc{atts = Atts1}; + false -> + Doc0 + end, + + {ok, Doc1#doc{meta = Meta1 ++ Meta2 ++ Meta3 ++ Meta4}}. + + +find_ancestor_rev_pos({_, []}, _PossibleAncestors) -> + 0; +find_ancestor_rev_pos(_DocRevs, []) -> + 0; +find_ancestor_rev_pos({RevPos, [RevId | Rest]}, AttsSinceRevs) -> + case lists:member({RevPos, RevId}, AttsSinceRevs) of + true -> RevPos; + false -> find_ancestor_rev_pos({RevPos - 1, Rest}, AttsSinceRevs) + end. + + +filter_found_revs(RevInfo, Revs) -> + #{ + rev_id := {Pos, Rev}, + rev_path := RevPath + } = RevInfo, + FullRevPath = [Rev | RevPath], + lists:flatmap(fun({FindPos, FindRev} = RevIdToFind) -> + if FindPos > Pos -> [RevIdToFind]; true -> + % Add 1 because lists:nth is 1 based + Idx = Pos - FindPos + 1, + case Idx > length(FullRevPath) of + true -> + [RevIdToFind]; + false -> + case lists:nth(Idx, FullRevPath) == FindRev of + true -> []; + false -> [RevIdToFind] + end + end + end + end, Revs). + + +find_possible_ancestors(RevInfos, MissingRevs) -> + % Find any revinfos that are possible ancestors + % of the missing revs. A possible ancestor is + % any rev that has a start position less than + % any missing revision. Stated alternatively, + % find any revinfo that could theoretically + % extended to be one or more of the missing + % revisions. + % + % Since we are looking at any missing revision + % we can just compare against the maximum missing + % start position. + MaxMissingPos = case MissingRevs of + [] -> 0; + [_ | _] -> lists:max([Start || {Start, _Rev} <- MissingRevs]) + end, + lists:flatmap(fun(RevInfo) -> + #{rev_id := {RevPos, _} = RevId} = RevInfo, + case RevPos < MaxMissingPos of + true -> [RevId]; + false -> [] + end + end, RevInfos). + + +apply_before_doc_update(Db, Docs, Options) -> + UpdateType = case lists:member(replicated_changes, Options) of + true -> replicated_changes; + false -> interactive_edit + end, + lists:map(fun(Doc) -> + fabric2_db_plugin:before_doc_update(Db, Doc, UpdateType) + end, Docs). + + +update_doc_int(#{} = Db, #doc{} = Doc, Options) -> + IsLocal = case Doc#doc.id of + <<?LOCAL_DOC_PREFIX, _/binary>> -> true; + _ -> false + end, + try + case {IsLocal, is_replicated(Options)} of + {false, false} -> update_doc_interactive(Db, Doc, Options); + {false, true} -> update_doc_replicated(Db, Doc, Options); + {true, _} -> update_local_doc(Db, Doc, Options) + end + catch throw:{?MODULE, Return} -> + Return + end. + + +batch_update_docs(Db, Docs, Options) -> + BAcc = #bacc{ + db = Db, + docs = Docs, + batch_size = get_batch_size(Options), + options = Options, + rev_futures = #{}, + seen = [], + results = [] + }, + #bacc{results = Res} = batch_update_docs(BAcc), + lists:reverse(Res). + + +batch_update_docs(#bacc{docs = []} = BAcc) -> + BAcc; + +batch_update_docs(#bacc{db = Db} = BAcc) -> + #bacc{ + db = Db, + docs = Docs, + options = Options + } = BAcc, + + BAccTx2 = fabric2_fdb:transactional(Db, fun(TxDb) -> + BAccTx = BAcc#bacc{db = TxDb}, + case is_replicated(Options) of + false -> + Tagged = tag_docs(Docs), + RevFutures = get_winning_rev_futures(TxDb, Tagged), + BAccTx1 = BAccTx#bacc{ + docs = Tagged, + rev_futures = RevFutures + }, + batch_update_interactive_tx(BAccTx1); + true -> + BAccTx1 = batch_update_replicated_tx(BAccTx), + % For replicated updates reset `seen` after every transaction + BAccTx1#bacc{seen = []} + end + end), + + % Clean up after the transaction ends so we can recurse with a clean state + maps:map(fun(Tag, RangeFuture) when is_reference(Tag) -> + ok = erlfdb:cancel(RangeFuture, [flush]) + end, BAccTx2#bacc.rev_futures), + + BAcc1 = BAccTx2#bacc{ + db = Db, + rev_futures = #{} + }, + + batch_update_docs(BAcc1). + + +batch_update_interactive_tx(#bacc{docs = []} = BAcc) -> + BAcc; + +batch_update_interactive_tx(#bacc{} = BAcc) -> + #bacc{ + db = TxDb, + docs = [Doc | Docs], + options = Options, + batch_size = MaxSize, + rev_futures = RevFutures, + seen = Seen, + results = Results + } = BAcc, + {Res, Seen1} = try + update_docs_interactive(TxDb, Doc, Options, RevFutures, Seen) + catch throw:{?MODULE, Return} -> + {Return, Seen} + end, + BAcc1 = BAcc#bacc{ + docs = Docs, + results = [Res | Results], + seen = Seen1 + }, + case fabric2_fdb:get_approximate_tx_size(TxDb) > MaxSize of + true -> BAcc1; + false -> batch_update_interactive_tx(BAcc1) + end. + + +batch_update_replicated_tx(#bacc{docs = []} = BAcc) -> + BAcc; + +batch_update_replicated_tx(#bacc{} = BAcc) -> + #bacc{ + db = TxDb, + docs = [Doc | Docs], + options = Options, + batch_size = MaxSize, + seen = Seen, + results = Results + } = BAcc, + case lists:member(Doc#doc.id, Seen) of + true -> + % If we already updated this doc in the current transaction, wait + % till the next transaction to update it again. + BAcc; + false -> + Res = update_doc_int(TxDb, Doc, Options), + BAcc1 = BAcc#bacc{ + docs = Docs, + results = [Res | Results], + seen = [Doc#doc.id | Seen] + }, + case fabric2_fdb:get_approximate_tx_size(TxDb) > MaxSize of + true -> BAcc1; + false -> batch_update_replicated_tx(BAcc1) + end + end. + + +update_docs_interactive(Db, #doc{id = <<?LOCAL_DOC_PREFIX, _/binary>>} = Doc, + Options, _Futures, SeenIds) -> + {update_local_doc(Db, Doc, Options), SeenIds}; + +update_docs_interactive(Db, Doc, Options, Futures, SeenIds) -> + case lists:member(Doc#doc.id, SeenIds) of + true -> + {conflict, SeenIds}; + false -> + Future = maps:get(doc_tag(Doc), Futures), + case update_doc_interactive(Db, Doc, Future, Options) of + {ok, _} = Resp -> + {Resp, [Doc#doc.id | SeenIds]}; + _ = Resp -> + {Resp, SeenIds} + end + end. + + +update_doc_interactive(Db, Doc0, Options) -> + % Get the current winning revision. This is needed + % regardless of which branch we're updating. The extra + % revision we're grabbing is an optimization to + % save us a round trip if we end up deleting + % the winning revision branch. + NumRevs = if Doc0#doc.deleted -> 2; true -> 1 end, + Future = fabric2_fdb:get_winning_revs_future(Db, Doc0#doc.id, NumRevs), + update_doc_interactive(Db, Doc0, Future, Options). + + +update_doc_interactive(Db, Doc0, Future, _Options) -> + RevInfos = fabric2_fdb:get_revs_wait(Db, Future), + {Winner, SecondPlace} = case RevInfos of + [] -> {not_found, not_found}; + [WRI] -> {WRI, not_found}; + [WRI, SPRI] -> {WRI, SPRI} + end, + WinnerRevId = case Winner of + not_found -> + {0, <<>>}; + _ -> + case maps:get(deleted, Winner) of + true -> {0, <<>>}; + false -> maps:get(rev_id, Winner) + end + end, + + % Check that a revision was specified if required + Doc0RevId = doc_to_revid(Doc0), + HasRev = Doc0RevId =/= {0, <<>>}, + if HasRev orelse WinnerRevId == {0, <<>>} -> ok; true -> + ?RETURN({Doc0, conflict}) + end, + + % Allow inserting new deleted documents. Only works when the document has + % never existed to match CouchDB 3.x + case not HasRev andalso Doc0#doc.deleted andalso is_map(Winner) of + true -> ?RETURN({Doc0, conflict}); + false -> ok + end, + + % Get the target revision to update + Target = case Doc0RevId == WinnerRevId of + true -> + Winner; + false -> + case fabric2_fdb:get_non_deleted_rev(Db, Doc0#doc.id, Doc0RevId) of + #{deleted := false} = Target0 -> + Target0; + not_found -> + % Either a missing revision or a deleted + % revision. Either way a conflict. Note + % that we get not_found for a deleted revision + % because we only check for the non-deleted + % key in fdb + ?RETURN({Doc0, conflict}) + end + end, + + Doc1 = case Winner of + #{deleted := true} when not Doc0#doc.deleted -> + % When recreating a deleted document we want to extend + % the winning revision branch rather than create a + % new branch. If we did not do this we could be + % recreating into a state that previously existed. + Doc0#doc{revs = fabric2_util:revinfo_to_revs(Winner)}; + #{} -> + % Otherwise we're extending the target's revision + % history with this update + Doc0#doc{revs = fabric2_util:revinfo_to_revs(Target)}; + not_found -> + % Creating a new doc means our revs start empty + Doc0 + end, + + % Validate the doc update and create the + % new revinfo map + Doc2 = prep_and_validate(Db, Doc1, Target), + + Doc3 = new_revid(Db, Doc2), + + #doc{ + deleted = NewDeleted, + revs = {NewRevPos, [NewRev | NewRevPath]}, + atts = Atts + } = Doc4 = stem_revisions(Db, Doc3), + + NewRevInfo = #{ + winner => undefined, + exists => false, + deleted => NewDeleted, + rev_id => {NewRevPos, NewRev}, + rev_path => NewRevPath, + sequence => undefined, + branch_count => undefined, + att_hash => fabric2_util:hash_atts(Atts), + rev_size => fabric2_util:rev_size(Doc4) + }, + + % Gather the list of possible winnig revisions + Possible = case Target == Winner of + true when not Doc4#doc.deleted -> + [NewRevInfo]; + true when Doc4#doc.deleted -> + case SecondPlace of + #{} -> [NewRevInfo, SecondPlace]; + not_found -> [NewRevInfo] + end; + false -> + [NewRevInfo, Winner] + end, + + % Sort the rev infos such that the winner is first + {NewWinner0, NonWinner} = case fabric2_util:sort_revinfos(Possible) of + [W] -> {W, not_found}; + [W, NW] -> {W, NW} + end, + + BranchCount = case Winner of + not_found -> 1; + #{branch_count := BC} -> BC + end, + NewWinner = NewWinner0#{branch_count := BranchCount}, + ToUpdate = if NonWinner == not_found -> []; true -> [NonWinner] end, + ToRemove = if Target == not_found -> []; true -> [Target] end, + + ok = fabric2_fdb:write_doc( + Db, + Doc4, + NewWinner, + Winner, + ToUpdate, + ToRemove + ), + + {ok, {NewRevPos, NewRev}}. + + +update_doc_replicated(Db, Doc0, _Options) -> + #doc{ + id = DocId, + deleted = Deleted, + revs = {RevPos, [Rev | RevPath]} + } = Doc0, + + DocRevInfo0 = #{ + winner => undefined, + exists => false, + deleted => Deleted, + rev_id => {RevPos, Rev}, + rev_path => RevPath, + sequence => undefined, + branch_count => undefined, + att_hash => <<>>, + rev_size => null + }, + + AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId), + + RevTree = lists:foldl(fun(RI, TreeAcc) -> + RIPath = fabric2_util:revinfo_to_path(RI), + {Merged, _} = couch_key_tree:merge(TreeAcc, RIPath), + Merged + end, [], AllRevInfos), + + DocRevPath = fabric2_util:revinfo_to_path(DocRevInfo0), + + {NewTree, Status} = couch_key_tree:merge(RevTree, DocRevPath), + if Status /= internal_node -> ok; true -> + % We already know this revision so nothing + % left to do. + ?RETURN({Doc0, {ok, []}}) + end, + + % Its possible to have a replication with fewer than $revs_limit + % revisions which extends an existing branch. To avoid + % losing revision history we extract the new node from the + % tree and use the combined path after stemming. + {[{_, {RevPos, UnstemmedRevs}}], []} + = couch_key_tree:get(NewTree, [{RevPos, Rev}]), + + Doc1 = stem_revisions(Db, Doc0#doc{revs = {RevPos, UnstemmedRevs}}), + + {RevPos, [Rev | NewRevPath]} = Doc1#doc.revs, + DocRevInfo1 = DocRevInfo0#{rev_path := NewRevPath}, + + % Find any previous revision we knew about for + % validation and attachment handling. + AllLeafsFull = couch_key_tree:get_all_leafs_full(NewTree), + LeafPath = get_leaf_path(RevPos, Rev, AllLeafsFull), + PrevRevInfo = find_prev_revinfo(RevPos, LeafPath), + Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo), + Doc3 = flush_doc_atts(Db, Doc2), + DocRevInfo2 = DocRevInfo1#{ + atts_hash => fabric2_util:hash_atts(Doc3#doc.atts), + rev_size => fabric2_util:rev_size(Doc3) + }, + + % Possible winners are the previous winner and + % the new DocRevInfo + Winner = case fabric2_util:sort_revinfos(AllRevInfos) of + [#{winner := true} = WRI | _] -> WRI; + [] -> not_found + end, + {NewWinner0, NonWinner} = case Winner == PrevRevInfo of + true -> + {DocRevInfo2, not_found}; + false -> + [W, NW] = fabric2_util:sort_revinfos([Winner, DocRevInfo2]), + {W, NW} + end, + + NewWinner = NewWinner0#{branch_count := length(AllLeafsFull)}, + ToUpdate = if NonWinner == not_found -> []; true -> [NonWinner] end, + ToRemove = if PrevRevInfo == not_found -> []; true -> [PrevRevInfo] end, + + ok = fabric2_fdb:write_doc( + Db, + Doc3, + NewWinner, + Winner, + ToUpdate, + ToRemove + ), + + {ok, []}. + + +update_local_doc(Db, Doc0, _Options) -> + Doc1 = case increment_local_doc_rev(Doc0) of + {ok, Updated} -> Updated; + {error, Error} -> ?RETURN({Doc0, Error}) + end, + + ok = fabric2_fdb:write_local_doc(Db, Doc1), + + #doc{revs = {0, [Rev]}} = Doc1, + {ok, {0, integer_to_binary(Rev)}}. + + +flush_doc_atts(Db, Doc) -> + #doc{ + id = DocId, + atts = Atts + } = Doc, + NewAtts = lists:map(fun(Att) -> + case couch_att:fetch(data, Att) of + {loc, _, _, _} -> + Att; + _ -> + couch_att:flush(Db, DocId, Att) + end + end, Atts), + Doc#doc{atts = NewAtts}. + + +get_winning_rev_futures(Db, Docs) -> + lists:foldl(fun(Doc, Acc) -> + #doc{ + id = DocId, + deleted = Deleted + } = Doc, + IsLocal = case DocId of + <<?LOCAL_DOC_PREFIX, _/binary>> -> true; + _ -> false + end, + if IsLocal -> Acc; true -> + NumRevs = if Deleted -> 2; true -> 1 end, + Future = fabric2_fdb:get_winning_revs_future(Db, DocId, NumRevs), + DocTag = doc_tag(Doc), + Acc#{DocTag => Future} + end + end, #{}, Docs). + + +prep_and_validate(Db, NewDoc, PrevRevInfo) -> + HasStubs = couch_doc:has_stubs(NewDoc), + HasVDUs = [] /= maps:get(validate_doc_update_funs, Db), + IsDDoc = case NewDoc#doc.id of + <<?DESIGN_DOC_PREFIX, _/binary>> -> true; + _ -> false + end, + + WasDeleted = case PrevRevInfo of + not_found -> false; + #{deleted := D} -> D + end, + + PrevDoc = case HasStubs orelse (HasVDUs and not IsDDoc) of + true when PrevRevInfo /= not_found, not WasDeleted -> + case fabric2_fdb:get_doc_body(Db, NewDoc#doc.id, PrevRevInfo) of + #doc{} = PDoc -> PDoc; + {not_found, _} -> nil + end; + _ -> + nil + end, + + MergedDoc = if not HasStubs -> NewDoc; true -> + % This will throw an error if we have any + % attachment stubs missing data + couch_doc:merge_stubs(NewDoc, PrevDoc) + end, + check_duplicate_attachments(MergedDoc), + validate_doc_update(Db, MergedDoc, PrevDoc), + MergedDoc. + + +validate_doc_update(Db, #doc{id = <<"_design/", _/binary>>} = Doc, _) -> + case catch check_is_admin(Db) of + ok -> validate_ddoc(Db, Doc); + Error -> ?RETURN({Doc, Error}) + end; +validate_doc_update(Db, Doc, PrevDoc) -> + #{ + security_doc := Security, + validate_doc_update_funs := VDUs + } = Db, + Fun = fun() -> + JsonCtx = fabric2_util:user_ctx_to_json(Db), + lists:map(fun(VDU) -> + try + case VDU(Doc, PrevDoc, JsonCtx, Security) of + ok -> ok; + Error1 -> throw(Error1) + end + catch throw:Error2 -> + ?RETURN({Doc, Error2}) + end + end, VDUs) + end, + Stat = [couchdb, query_server, vdu_process_time], + if VDUs == [] -> ok; true -> + couch_stats:update_histogram(Stat, Fun) + end. + + +validate_ddoc(Db, DDoc) -> + try + ok = couch_index_server:validate(Db, couch_doc:with_ejson_body(DDoc)) + catch + throw:{invalid_design_doc, Reason} -> + throw({bad_request, invalid_design_doc, Reason}); + throw:{compilation_error, Reason} -> + throw({bad_request, compilation_error, Reason}); + throw:Error -> + ?RETURN({DDoc, Error}) + end. + + +validate_atomic_update(_, false) -> + ok; +validate_atomic_update(AllDocs, true) -> + % TODO actually perform the validation. This requires some hackery, we need + % to basically extract the prep_and_validate_updates function from couch_db + % and only run that, without actually writing in case of a success. + Error = {not_implemented, <<"all_or_nothing is not supported">>}, + PreCommitFailures = lists:map(fun(#doc{id=Id, revs = {Pos,Revs}}) -> + case Revs of [] -> RevId = <<>>; [RevId|_] -> ok end, + {{Id, {Pos, RevId}}, Error} + end, AllDocs), + throw({aborted, PreCommitFailures}). + + +check_duplicate_attachments(#doc{atts = Atts}) -> + lists:foldl(fun(Att, Names) -> + Name = couch_att:fetch(name, Att), + case ordsets:is_element(Name, Names) of + true -> throw({bad_request, <<"Duplicate attachments">>}); + false -> ordsets:add_element(Name, Names) + end + end, ordsets:new(), Atts). + + +get_since_seq(Db, rev, <<>>) -> + get_since_seq(Db, rev, now); + +get_since_seq(_Db, _Dir, Seq) when Seq == <<>>; Seq == <<"0">>; Seq == 0-> + fabric2_util:seq_zero_vs(); + +get_since_seq(Db, Dir, Seq) when Seq == now; Seq == <<"now">> -> + CurrSeq = fabric2_fdb:get_last_change(Db), + get_since_seq(Db, Dir, CurrSeq); + +get_since_seq(_Db, _Dir, Seq) when is_binary(Seq), size(Seq) == 24 -> + fabric2_fdb:next_vs(fabric2_fdb:seq_to_vs(Seq)); + +get_since_seq(Db, Dir, List) when is_list(List) -> + get_since_seq(Db, Dir, list_to_binary(List)); + +get_since_seq(_Db, _Dir, Seq) -> + erlang:error({invalid_since_seq, Seq}). + + +get_leaf_path(Pos, Rev, [{Pos, [{Rev, _RevInfo} | LeafPath]} | _]) -> + LeafPath; +get_leaf_path(Pos, Rev, [_WrongLeaf | RestLeafs]) -> + get_leaf_path(Pos, Rev, RestLeafs). + + +find_prev_revinfo(_Pos, []) -> + not_found; +find_prev_revinfo(Pos, [{_Rev, ?REV_MISSING} | RestPath]) -> + find_prev_revinfo(Pos - 1, RestPath); +find_prev_revinfo(_Pos, [{_Rev, #{} = RevInfo} | _]) -> + RevInfo. + + +increment_local_doc_rev(#doc{deleted = true} = Doc) -> + {ok, Doc#doc{revs = {0, [0]}}}; +increment_local_doc_rev(#doc{revs = {0, []}} = Doc) -> + {ok, Doc#doc{revs = {0, [1]}}}; +increment_local_doc_rev(#doc{revs = {0, [RevStr | _]}} = Doc) -> + try + PrevRev = binary_to_integer(RevStr), + {ok, Doc#doc{revs = {0, [PrevRev + 1]}}} + catch error:badarg -> + {error, <<"Invalid rev format">>} + end; +increment_local_doc_rev(#doc{}) -> + {error, <<"Invalid rev format">>}. + + +doc_to_revid(#doc{revs = Revs}) -> + case Revs of + {0, []} -> {0, <<>>}; + {RevPos, [Rev | _]} -> {RevPos, Rev} + end. + + +tag_docs([]) -> + []; +tag_docs([#doc{meta = Meta} = Doc | Rest]) -> + Meta1 = lists:keystore(ref, 1, Meta, {ref, make_ref()}), + NewDoc = Doc#doc{meta = Meta1}, + [NewDoc | tag_docs(Rest)]. + + +doc_tag(#doc{meta = Meta}) -> + fabric2_util:get_value(ref, Meta). + + +idrevs({Id, Revs}) when is_list(Revs) -> + {docid(Id), [rev(R) || R <- Revs]}. + + +docid(DocId) when is_list(DocId) -> + list_to_binary(DocId); +docid(DocId) -> + DocId. + + +rev(Rev) when is_list(Rev); is_binary(Rev) -> + couch_doc:parse_rev(Rev); +rev({Seq, Hash} = Rev) when is_integer(Seq), is_binary(Hash) -> + Rev. + + +maybe_stop({ok, Acc}) -> + Acc; +maybe_stop({stop, Acc}) -> + throw({stop, Acc}). + + +set_design_doc_keys(Options1) -> + Dir = couch_util:get_value(dir, Options1, fwd), + Options2 = set_design_doc_start_key(Options1, Dir), + set_design_doc_end_key(Options2, Dir). + + +set_design_doc_start_key(Options, fwd) -> + Key1 = couch_util:get_value(start_key, Options, ?FIRST_DDOC_KEY), + Key2 = max(Key1, ?FIRST_DDOC_KEY), + lists:keystore(start_key, 1, Options, {start_key, Key2}); + +set_design_doc_start_key(Options, rev) -> + Key1 = couch_util:get_value(start_key, Options, ?LAST_DDOC_KEY), + Key2 = min(Key1, ?LAST_DDOC_KEY), + lists:keystore(start_key, 1, Options, {start_key, Key2}). + + +set_design_doc_end_key(Options, fwd) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + Key1 = couch_util:get_value(end_key, Options, ?LAST_DDOC_KEY), + Key2 = min(Key1, ?LAST_DDOC_KEY), + lists:keystore(end_key, 1, Options, {end_key, Key2}); + EKeyGT -> + Key2 = min(EKeyGT, ?LAST_DDOC_KEY), + lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2}) + end; + +set_design_doc_end_key(Options, rev) -> + case couch_util:get_value(end_key_gt, Options) of + undefined -> + Key1 = couch_util:get_value(end_key, Options, ?FIRST_DDOC_KEY), + Key2 = max(Key1, ?FIRST_DDOC_KEY), + lists:keystore(end_key, 1, Options, {end_key, Key2}); + EKeyGT -> + Key2 = max(EKeyGT, ?FIRST_DDOC_KEY), + lists:keystore(end_key_gt, 1, Options, {end_key_gt, Key2}) + end. + + +stem_revisions(#{} = Db, #doc{} = Doc) -> + #{revs_limit := RevsLimit} = Db, + #doc{revs = {RevPos, Revs}} = Doc, + case RevPos >= RevsLimit of + true -> Doc#doc{revs = {RevPos, lists:sublist(Revs, RevsLimit)}}; + false -> Doc + end. + + +open_json_doc(Db, DocId, OpenOpts, DocOpts) -> + case fabric2_db:open_doc(Db, DocId, OpenOpts) of + {not_found, missing} -> + []; + {ok, #doc{deleted = true}} -> + [{doc, null}]; + {ok, #doc{} = Doc} -> + [{doc, couch_doc:to_json_obj(Doc, DocOpts)}] + end. + + +get_cached_db(#{} = Db, Opts) when is_list(Opts) -> + MaxAge = fabric2_util:get_value(max_age, Opts, 0), + Now = erlang:monotonic_time(millisecond), + Age = Now - maps:get(check_current_ts, Db), + case Age < MaxAge of + true -> + Db; + false -> + fabric2_fdb:transactional(Db, fun(TxDb) -> + fabric2_fdb:ensure_current(TxDb) + end) + end. + + +is_replicated(Options) when is_list(Options) -> + lists:member(replicated_changes, Options). + + +get_batch_size(Options) -> + case fabric2_util:get_value(batch_size, Options) of + undefined -> + config:get_integer("fabric", "update_docs_batch_size", + ?DEFAULT_UPDATE_DOCS_BATCH_SIZE); + Val when is_integer(Val) -> + Val + end. diff --git a/src/fabric/src/fabric2_db_expiration.erl b/src/fabric/src/fabric2_db_expiration.erl new file mode 100644 index 000000000..92f22e749 --- /dev/null +++ b/src/fabric/src/fabric2_db_expiration.erl @@ -0,0 +1,246 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_expiration). + + +-behaviour(gen_server). + + +-export([ + start_link/0, + cleanup/1, + process_expirations/2 +]). + +-export([ + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("fabric/include/fabric2.hrl"). + +-define(JOB_TYPE, <<"db_expiration">>). +-define(JOB_ID, <<"db_expiration_job">>). +-define(DEFAULT_JOB_Version, 1). +-define(DEFAULT_RETENTION_SEC, 172800). % 48 hours +-define(DEFAULT_SCHEDULE_SEC, 3600). % 1 hour +-define(ERROR_RESCHEDULE_SEC, 5). +-define(CHECK_ENABLED_SEC, 2). +-define(JOB_TIMEOUT_SEC, 30). + + +-record(st, { + job +}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + process_flag(trap_exit, true), + {ok, #st{job = undefined}, 0}. + + +terminate(_M, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(timeout, #st{job = undefined} = St) -> + ok = wait_for_couch_jobs_app(), + ok = couch_jobs:set_type_timeout(?JOB_TYPE, ?JOB_TIMEOUT_SEC), + ok = maybe_add_job(), + Pid = spawn_link(?MODULE, cleanup, [is_enabled()]), + {noreply, St#st{job = Pid}}; + +handle_info({'EXIT', Pid, Exit}, #st{job = Pid} = St) -> + case Exit of + normal -> ok; + Error -> couch_log:error("~p : job error ~p", [?MODULE, Error]) + end, + NewPid = spawn_link(?MODULE, cleanup, [is_enabled()]), + {noreply, St#st{job = NewPid}}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +wait_for_couch_jobs_app() -> + % Because of a circular dependency between couch_jobs and fabric apps, wait + % for couch_jobs to initialize before continuing. If we refactor the + % commits FDB utilities out we can remove this bit of code. + case lists:keysearch(couch_jobs, 1, application:which_applications()) of + {value, {couch_jobs, _, _}} -> + ok; + false -> + timer:sleep(100), + wait_for_couch_jobs_app() + end. + + +maybe_add_job() -> + case couch_jobs:get_job_data(undefined, ?JOB_TYPE, job_id()) of + {error, not_found} -> + Now = erlang:system_time(second), + ok = couch_jobs:add(undefined, ?JOB_TYPE, job_id(), #{}, Now); + {ok, _JobData} -> + ok + end. + + +cleanup(false) -> + timer:sleep(?CHECK_ENABLED_SEC * 1000), + exit(normal); + +cleanup(true) -> + Now = erlang:system_time(second), + ScheduleSec = schedule_sec(), + Opts = #{max_sched_time => Now + min(ScheduleSec div 3, 15)}, + case couch_jobs:accept(?JOB_TYPE, Opts) of + {ok, Job, Data} -> + try + {ok, Job1, Data1} = ?MODULE:process_expirations(Job, Data), + ok = resubmit_job(Job1, Data1, schedule_sec()) + catch + _Tag:Error -> + Stack = erlang:get_stacktrace(), + couch_log:error("~p : processing error ~p ~p ~p", + [?MODULE, Job, Error, Stack]), + ok = resubmit_job(Job, Data, ?ERROR_RESCHEDULE_SEC), + exit({job_error, Error, Stack}) + end; + {error, not_found} -> + timer:sleep(?CHECK_ENABLED_SEC * 1000), + ?MODULE:cleanup(is_enabled()) + end. + + +resubmit_job(Job, Data, After) -> + Now = erlang:system_time(second), + SchedTime = Now + After, + couch_jobs_fdb:tx(couch_jobs_fdb:get_jtx(), fun(JTx) -> + {ok, Job1} = couch_jobs:resubmit(JTx, Job, SchedTime), + ok = couch_jobs:finish(JTx, Job1, Data) + end), + ok. + + +process_expirations(#{} = Job, #{} = Data) -> + Start = now_sec(), + Callback = fun(Value, LastUpdateAt) -> + case Value of + {meta, _} -> ok; + {row, DbInfo} -> process_row(DbInfo); + complete -> ok + end, + {ok, maybe_report_progress(Job, LastUpdateAt)} + end, + {ok, _Infos} = fabric2_db:list_deleted_dbs_info( + Callback, + Start, + [{restart_tx, true}] + ), + {ok, Job, Data}. + + +process_row(DbInfo) -> + DbName = proplists:get_value(db_name, DbInfo), + TimeStamp = proplists:get_value(timestamp, DbInfo), + Now = now_sec(), + Retention = retention_sec(), + Since = Now - Retention, + case Since >= timestamp_to_sec(TimeStamp) of + true -> + couch_log:notice("Permanently deleting ~s database with" + " timestamp ~s", [DbName, TimeStamp]), + ok = fabric2_db:delete(DbName, [{deleted_at, TimeStamp}]); + false -> + ok + end. + + +maybe_report_progress(Job, LastUpdateAt) -> + % Update periodically the job so it doesn't expire + Now = now_sec(), + Progress = #{ + <<"processed_at">> => Now + + }, + case (Now - LastUpdateAt) > (?JOB_TIMEOUT_SEC div 2) of + true -> + couch_jobs:update(undefined, Job, Progress), + Now; + false -> + LastUpdateAt + end. + + +job_id() -> + JobVersion = job_version(), + <<?JOB_ID/binary, "-", JobVersion:16/integer>>. + + +now_sec() -> + Now = os:timestamp(), + Nowish = calendar:now_to_universal_time(Now), + calendar:datetime_to_gregorian_seconds(Nowish). + + +timestamp_to_sec(TimeStamp) -> + <<Year:4/binary, "-", Month:2/binary, "-", Day:2/binary, + "T", + Hour:2/binary, ":", Minutes:2/binary, ":", Second:2/binary, + "Z">> = TimeStamp, + + calendar:datetime_to_gregorian_seconds( + {{?bin2int(Year), ?bin2int(Month), ?bin2int(Day)}, + {?bin2int(Hour), ?bin2int(Minutes), ?bin2int(Second)}} + ). + + +is_enabled() -> + config:get_boolean("couchdb", "db_expiration_enabled", false). + + +job_version() -> + config:get_integer("couchdb", "db_expiration_job_version", + ?DEFAULT_JOB_Version). + + +retention_sec() -> + config:get_integer("couchdb", "db_expiration_retention_sec", + ?DEFAULT_RETENTION_SEC). + + +schedule_sec() -> + config:get_integer("couchdb", "db_expiration_schedule_sec", + ?DEFAULT_SCHEDULE_SEC). diff --git a/src/fabric/src/fabric2_db_plugin.erl b/src/fabric/src/fabric2_db_plugin.erl new file mode 100644 index 000000000..095b94cf4 --- /dev/null +++ b/src/fabric/src/fabric2_db_plugin.erl @@ -0,0 +1,112 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_plugin). + +-export([ + validate_dbname/3, + after_db_create/2, + after_db_delete/2, + before_doc_update/3, + after_doc_write/6, + after_doc_read/2, + validate_docid/1, + check_is_admin/1, + is_valid_purge_client/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-define(SERVICE_ID, fabric2_db). + + +%% ------------------------------------------------------------------ +%% API Function Definitions +%% ------------------------------------------------------------------ + +validate_dbname(DbName, Normalized, Default) -> + maybe_handle(validate_dbname, [DbName, Normalized], Default). + + +after_db_create(DbName, DbUUID) when is_binary(DbName), is_binary(DbUUID) -> + with_pipe(after_db_create, [DbName, DbUUID]). + + +after_db_delete(DbName, DbUUID) when is_binary(DbName), is_binary(DbUUID) -> + with_pipe(after_db_delete, [DbName, DbUUID]). + + +before_doc_update(_, #doc{id = <<?LOCAL_DOC_PREFIX, _/binary>>} = Doc, _) -> + Doc; + +before_doc_update(Db, Doc0, UpdateType) -> + Fun = fabric2_db:get_before_doc_update_fun(Db), + case with_pipe(before_doc_update, [Doc0, Db, UpdateType]) of + [Doc1, _Db, UpdateType1] when is_function(Fun) -> + Fun(Doc1, Db, UpdateType1); + [Doc1, _Db, _UpdateType] -> + Doc1 + end. + + +after_doc_write(Db, Doc, NewWinner, OldWinner, NewRevId, Seq)-> + with_pipe(after_doc_write, [Db, Doc, NewWinner, OldWinner, NewRevId, Seq]). + + +after_doc_read(Db, Doc0) -> + Fun = fabric2_db:get_after_doc_read_fun(Db), + case with_pipe(after_doc_read, [Doc0, Db]) of + [Doc1, _Db] when is_function(Fun) -> Fun(Doc1, Db); + [Doc1, _Db] -> Doc1 + end. + + +validate_docid(Id) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + %% callbacks return true only if it specifically allow the given Id + couch_epi:any(Handle, ?SERVICE_ID, validate_docid, [Id], []). + + +check_is_admin(Db) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + %% callbacks return true only if it specifically allow the given Id + couch_epi:any(Handle, ?SERVICE_ID, check_is_admin, [Db], []). + + +is_valid_purge_client(DbName, Props) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + %% callbacks return true only if it specifically allow the given Id + couch_epi:any(Handle, ?SERVICE_ID, is_valid_purge_client, [DbName, Props], []). + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +with_pipe(Func, Args) -> + do_apply(Func, Args, [pipe]). + +do_apply(Func, Args, Opts) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + couch_epi:apply(Handle, ?SERVICE_ID, Func, Args, Opts). + +maybe_handle(Func, Args, Default) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + case couch_epi:decide(Handle, ?SERVICE_ID, Func, Args, []) of + no_decision when is_function(Default) -> + apply(Default, Args); + no_decision -> + Default; + {decided, Result} -> + Result + end. diff --git a/src/fabric/src/fabric2_epi.erl b/src/fabric/src/fabric2_epi.erl new file mode 100644 index 000000000..f73eeb0d2 --- /dev/null +++ b/src/fabric/src/fabric2_epi.erl @@ -0,0 +1,48 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_epi). + +-behaviour(couch_epi_plugin). + +-export([ + app/0, + providers/0, + services/0, + data_subscriptions/0, + data_providers/0, + processes/0, + notify/3 +]). + +app() -> + fabric. + +providers() -> + []. + +services() -> + [ + {fabric2_db, fabric2_db_plugin} + ]. + +data_subscriptions() -> + []. + +data_providers() -> + []. + +processes() -> + []. + +notify(_Key, _Old, _New) -> + ok. diff --git a/src/fabric/src/fabric2_events.erl b/src/fabric/src/fabric2_events.erl new file mode 100644 index 000000000..e1198243a --- /dev/null +++ b/src/fabric/src/fabric2_events.erl @@ -0,0 +1,102 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_events). + + +-export([ + link_listener/4, + stop_listener/1 +]). + +-export([ + init/2, + poll/1 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +link_listener(Mod, Fun, Acc, Options) -> + State = #{ + dbname => fabric2_util:get_value(dbname, Options), + uuid => fabric2_util:get_value(uuid, Options, undefined), + timeout => fabric2_util:get_value(timeout, Options, 1000), + mod => Mod, + callback => Fun, + acc => Acc + }, + Pid = spawn_link(?MODULE, init, [self(), State]), + receive + {Pid, initialized} -> ok + end, + {ok, Pid}. + + +stop_listener(Pid) -> + Pid ! stop_listening. + + +init(Parent, #{dbname := DbName} = State) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + Since = fabric2_db:get_update_seq(Db), + erlang:monitor(process, Parent), + Parent ! {self(), initialized}, + poll(State#{since => Since}). + + +poll(#{} = State) -> + #{ + dbname := DbName, + uuid := DbUUID, + timeout := Timeout, + since := Since, + mod := Mod, + callback := Fun, + acc := Acc + } = State, + {Resp, NewSince} = try + Opts = [?ADMIN_CTX, {uuid, DbUUID}], + case fabric2_db:open(DbName, Opts) of + {ok, Db} -> + case fabric2_db:get_update_seq(Db) of + Since -> + {{ok, Acc}, Since}; + Other -> + {Mod:Fun(DbName, updated, Acc), Other} + end; + Error -> + exit(Error) + end + catch error:database_does_not_exist -> + Mod:Fun(DbName, deleted, Acc), + {{stop, ok}, Since} + end, + receive + stop_listening -> + ok; + {'DOWN', _, _, _, _} -> + ok + after 0 -> + case Resp of + {ok, NewAcc} -> + timer:sleep(Timeout), + NewState = State#{ + since := NewSince, + acc := NewAcc + }, + ?MODULE:poll(NewState); + {stop, _} -> + ok + end + end. diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl new file mode 100644 index 000000000..36fa451ab --- /dev/null +++ b/src/fabric/src/fabric2_fdb.erl @@ -0,0 +1,2085 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_fdb). + + +-export([ + transactional/1, + transactional/3, + transactional/2, + + create/2, + open/2, + ensure_current/1, + delete/1, + undelete/3, + remove_deleted_db/2, + exists/1, + + get_dir/1, + + list_dbs/4, + list_dbs_info/4, + list_deleted_dbs_info/4, + + get_info/1, + get_info_future/2, + get_info_wait/1, + set_config/3, + + get_stat/2, + incr_stat/3, + incr_stat/4, + + get_all_revs/2, + get_all_revs_future/2, + get_winning_revs/3, + get_winning_revs_future/3, + get_revs_wait/2, + get_non_deleted_rev/3, + + get_doc_body/3, + get_doc_body_future/3, + get_doc_body_wait/4, + + get_local_doc_rev_future/2, + get_local_doc_rev_wait/1, + get_local_doc_body_future/3, + get_local_doc_body_wait/4, + get_local_doc/2, + get_local_doc_rev/3, + + write_doc/6, + write_local_doc/2, + + read_attachment/3, + write_attachment/4, + + get_last_change/1, + + fold_range/5, + + vs_to_seq/1, + seq_to_vs/1, + next_vs/1, + + new_versionstamp/1, + + get_approximate_tx_size/1, + + chunkify_binary/1, + chunkify_binary/2, + + debug_cluster/0, + debug_cluster/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2.hrl"). + + +-define(MAX_FOLD_RANGE_RETRIES, 3). + + +-record(fold_acc, { + db, + restart_tx, + start_key, + end_key, + limit, + skip, + retries, + base_opts, + user_fun, + user_acc +}). + +-record(info_future, { + tx, + db_prefix, + changes_future, + meta_future, + uuid_future, + retries = 0 +}). + + +transactional(Fun) -> + do_transaction(Fun, undefined). + + +transactional(DbName, Options, Fun) when is_binary(DbName) -> + with_span(Fun, #{'db.name' => DbName}, fun() -> + transactional(fun(Tx) -> + Fun(init_db(Tx, DbName, Options)) + end) + end). + + +transactional(#{tx := undefined} = Db, Fun) -> + DbName = maps:get(name, Db, undefined), + try + Db1 = refresh(Db), + Reopen = maps:get(reopen, Db1, false), + Db2 = maps:remove(reopen, Db1), + LayerPrefix = case Reopen of + true -> undefined; + false -> maps:get(layer_prefix, Db2) + end, + with_span(Fun, #{'db.name' => DbName}, fun() -> + do_transaction(fun(Tx) -> + case Reopen of + true -> Fun(reopen(Db2#{tx => Tx})); + false -> Fun(Db2#{tx => Tx}) + end + end, LayerPrefix) + end) + catch throw:{?MODULE, reopen} -> + with_span('db.reopen', #{'db.name' => DbName}, fun() -> + transactional(Db#{reopen => true}, Fun) + end) + end; + +transactional(#{tx := {erlfdb_transaction, _}} = Db, Fun) -> + DbName = maps:get(name, Db, undefined), + with_span(Fun, #{'db.name' => DbName}, fun() -> + Fun(Db) + end). + + +do_transaction(Fun, LayerPrefix) when is_function(Fun, 1) -> + Db = get_db_handle(), + try + erlfdb:transactional(Db, fun(Tx) -> + case get(erlfdb_trace) of + Name when is_binary(Name) -> + UId = erlang:unique_integer([positive]), + UIdBin = integer_to_binary(UId, 36), + TxId = <<Name/binary, "_", UIdBin/binary>>, + erlfdb:set_option(Tx, transaction_logging_enable, TxId); + _ -> + ok + end, + case is_transaction_applied(Tx) of + true -> + get_previous_transaction_result(); + false -> + execute_transaction(Tx, Fun, LayerPrefix) + end + end) + after + clear_transaction() + end. + + +create(#{} = Db0, Options) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = Db1 = ensure_current(Db0, false), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + HCA = erlfdb_hca:create(erlfdb_tuple:pack({?DB_HCA}, LayerPrefix)), + AllocPrefix = erlfdb_hca:allocate(HCA, Tx), + DbPrefix = erlfdb_tuple:pack({?DBS, AllocPrefix}, LayerPrefix), + erlfdb:set(Tx, DbKey, DbPrefix), + + % This key is responsible for telling us when something in + % the database cache (i.e., fabric2_server's ets table) has + % changed and requires re-loading. This currently includes + % revs_limit and validate_doc_update functions. There's + % no order to versioning here. Its just a value that changes + % that is used in the ensure_current check. + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + DbVersion = fabric2_util:uuid(), + erlfdb:set(Tx, DbVersionKey, DbVersion), + + UUID = fabric2_util:uuid(), + + Defaults = [ + {?DB_CONFIG, <<"uuid">>, UUID}, + {?DB_CONFIG, <<"revs_limit">>, ?uint2bin(1000)}, + {?DB_CONFIG, <<"security_doc">>, <<"{}">>}, + {?DB_STATS, <<"doc_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"doc_del_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"doc_design_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"doc_local_count">>, ?uint2bin(0)}, + {?DB_STATS, <<"sizes">>, <<"external">>, ?uint2bin(2)}, + {?DB_STATS, <<"sizes">>, <<"views">>, ?uint2bin(0)} + ], + lists:foreach(fun + ({P, K, V}) -> + Key = erlfdb_tuple:pack({P, K}, DbPrefix), + erlfdb:set(Tx, Key, V); + ({P, S, K, V}) -> + Key = erlfdb_tuple:pack({P, S, K}, DbPrefix), + erlfdb:set(Tx, Key, V) + end, Defaults), + + UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), + Options1 = lists:keydelete(user_ctx, 1, Options), + + Db2 = Db1#{ + uuid => UUID, + db_prefix => DbPrefix, + db_version => DbVersion, + + revs_limit => 1000, + security_doc => {[]}, + user_ctx => UserCtx, + check_current_ts => erlang:monotonic_time(millisecond), + + validate_doc_update_funs => [], + before_doc_update => undefined, + after_doc_read => undefined, + % All other db things as we add features, + + db_options => Options1, + interactive => false + }, + aegis:init_db(Db2, Options). + + +open(#{} = Db0, Options) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = Db1 = ensure_current(Db0, false), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + DbPrefix = case erlfdb:wait(erlfdb:get(Tx, DbKey)) of + Bin when is_binary(Bin) -> Bin; + not_found -> erlang:error(database_does_not_exist) + end, + + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + DbVersion = erlfdb:wait(erlfdb:get(Tx, DbVersionKey)), + + UserCtx = fabric2_util:get_value(user_ctx, Options, #user_ctx{}), + Options1 = lists:keydelete(user_ctx, 1, Options), + + UUID = fabric2_util:get_value(uuid, Options1), + Options2 = lists:keydelete(uuid, 1, Options1), + + Interactive = fabric2_util:get_value(interactive, Options2, false), + Options3 = lists:keydelete(interactive, 1, Options2), + + Db2 = Db1#{ + db_prefix => DbPrefix, + db_version => DbVersion, + + uuid => <<>>, + revs_limit => 1000, + security_doc => {[]}, + + user_ctx => UserCtx, + check_current_ts => erlang:monotonic_time(millisecond), + + % Place holders until we implement these + % bits. + validate_doc_update_funs => [], + before_doc_update => undefined, + after_doc_read => undefined, + + db_options => Options3, + interactive => Interactive + }, + + Db3 = load_config(Db2), + Db4 = aegis:open_db(Db3), + + case {UUID, Db4} of + {undefined, _} -> ok; + {<<_/binary>>, #{uuid := UUID}} -> ok; + {<<_/binary>>, #{uuid := _}} -> erlang:error(database_does_not_exist) + end, + + load_validate_doc_funs(Db4). + + +% Match on `name` in the function head since some non-fabric2 db +% objects might not have names and so they don't get cached +refresh(#{tx := undefined, name := DbName} = Db) -> + #{ + uuid := UUID, + md_version := OldVer + } = Db, + + case fabric2_server:fetch(DbName, UUID) of + % Relying on these assumptions about the `md_version` value: + % - It is bumped every time `db_version` is bumped + % - Is a versionstamp, so we can check which one is newer + % - If it is `not_found`, it would sort less than a binary value + #{md_version := Ver} = Db1 when Ver > OldVer -> + Db1#{ + user_ctx := maps:get(user_ctx, Db), + security_fun := maps:get(security_fun, Db), + interactive := maps:get(interactive, Db) + }; + _ -> + Db + end; + +refresh(#{} = Db) -> + Db. + + + +reopen(#{} = OldDb) -> + require_transaction(OldDb), + #{ + tx := Tx, + name := DbName, + uuid := UUID, + db_options := Options, + user_ctx := UserCtx, + security_fun := SecurityFun, + interactive := Interactive + } = OldDb, + Options1 = lists:keystore(user_ctx, 1, Options, {user_ctx, UserCtx}), + NewDb = open(init_db(Tx, DbName, Options1), Options1), + + % Check if database was re-created + case {Interactive, maps:get(uuid, NewDb)} of + {true, _} -> ok; + {false, UUID} -> ok; + {false, _OtherUUID} -> error(database_does_not_exist) + end, + + NewDb#{security_fun := SecurityFun, interactive := Interactive}. + + +delete(#{} = Db) -> + DoRecovery = fabric2_util:do_recovery(), + case DoRecovery of + true -> soft_delete_db(Db); + false -> hard_delete_db(Db) + end. + + +undelete(#{} = Db0, TgtDbName, TimeStamp) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = ensure_current(Db0, false), + DbKey = erlfdb_tuple:pack({?ALL_DBS, TgtDbName}, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DbKey)) of + Bin when is_binary(Bin) -> + file_exists; + not_found -> + DeletedDbTupleKey = { + ?DELETED_DBS, + DbName, + TimeStamp + }, + DeleteDbKey = erlfdb_tuple:pack(DeletedDbTupleKey, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DeleteDbKey)) of + not_found -> + not_found; + DbPrefix -> + erlfdb:set(Tx, DbKey, DbPrefix), + erlfdb:clear(Tx, DeleteDbKey), + bump_db_version(#{ + tx => Tx, + db_prefix => DbPrefix + }), + ok + end + end. + + +remove_deleted_db(#{} = Db0, TimeStamp) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix + } = ensure_current(Db0, false), + + DeletedDbTupleKey = { + ?DELETED_DBS, + DbName, + TimeStamp + }, + DeletedDbKey = erlfdb_tuple:pack(DeletedDbTupleKey, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DeletedDbKey)) of + not_found -> + not_found; + DbPrefix -> + erlfdb:clear(Tx, DeletedDbKey), + erlfdb:clear_range_startswith(Tx, DbPrefix), + bump_db_version(#{ + tx => Tx, + db_prefix => DbPrefix + }), + ok + end. + + +exists(#{name := DbName} = Db) when is_binary(DbName) -> + #{ + tx := Tx, + layer_prefix := LayerPrefix + } = ensure_current(Db, false), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DbKey)) of + Bin when is_binary(Bin) -> true; + not_found -> false + end. + + +get_dir(Tx) -> + Root = erlfdb_directory:root(), + Dir = fabric2_server:fdb_directory(), + CouchDB = erlfdb_directory:create_or_open(Tx, Root, Dir), + erlfdb_directory:get_name(CouchDB). + + +list_dbs(Tx, Callback, AccIn, Options0) -> + Options = case fabric2_util:get_value(restart_tx, Options0) of + undefined -> [{restart_tx, true} | Options0]; + _AlreadySet -> Options0 + end, + LayerPrefix = get_dir(Tx), + Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), + fold_range({tx, Tx}, Prefix, fun({K, _V}, Acc) -> + {DbName} = erlfdb_tuple:unpack(K, Prefix), + Callback(DbName, Acc) + end, AccIn, Options). + + +list_dbs_info(Tx, Callback, AccIn, Options0) -> + Options = case fabric2_util:get_value(restart_tx, Options0) of + undefined -> [{restart_tx, true} | Options0]; + _AlreadySet -> Options0 + end, + LayerPrefix = get_dir(Tx), + Prefix = erlfdb_tuple:pack({?ALL_DBS}, LayerPrefix), + fold_range({tx, Tx}, Prefix, fun({DbNameKey, DbPrefix}, Acc) -> + {DbName} = erlfdb_tuple:unpack(DbNameKey, Prefix), + InfoFuture = get_info_future(Tx, DbPrefix), + Callback(DbName, InfoFuture, Acc) + end, AccIn, Options). + + +list_deleted_dbs_info(Tx, Callback, AccIn, Options0) -> + Options = case fabric2_util:get_value(restart_tx, Options0) of + undefined -> [{restart_tx, true} | Options0]; + _AlreadySet -> Options0 + end, + LayerPrefix = get_dir(Tx), + Prefix = erlfdb_tuple:pack({?DELETED_DBS}, LayerPrefix), + fold_range({tx, Tx}, Prefix, fun({DbKey, DbPrefix}, Acc) -> + {DbName, TimeStamp} = erlfdb_tuple:unpack(DbKey, Prefix), + InfoFuture = get_info_future(Tx, DbPrefix), + Callback(DbName, TimeStamp, InfoFuture, Acc) + end, AccIn, Options). + + +get_info(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + get_info_wait(get_info_future(Tx, DbPrefix)). + + +get_info_future(Tx, DbPrefix) -> + {CStart, CEnd} = erlfdb_tuple:range({?DB_CHANGES}, DbPrefix), + ChangesFuture = erlfdb:get_range(Tx, CStart, CEnd, [ + {streaming_mode, exact}, + {limit, 1}, + {reverse, true} + ]), + + UUIDKey = erlfdb_tuple:pack({?DB_CONFIG, <<"uuid">>}, DbPrefix), + UUIDFuture = erlfdb:get(Tx, UUIDKey), + + StatsPrefix = erlfdb_tuple:pack({?DB_STATS}, DbPrefix), + MetaFuture = erlfdb:get_range_startswith(Tx, StatsPrefix), + + % Save the tx object only if it's read-only as we might retry to get the + % future again after the tx was reset + SaveTx = case erlfdb:get_writes_allowed(Tx) of + true -> undefined; + false -> Tx + end, + + #info_future{ + tx = SaveTx, + db_prefix = DbPrefix, + changes_future = ChangesFuture, + meta_future = MetaFuture, + uuid_future = UUIDFuture + }. + + +get_info_wait(#info_future{tx = Tx, retries = Retries} = Future) + when Tx =:= undefined orelse Retries >= 2 -> + get_info_wait_int(Future); + +get_info_wait(#info_future{tx = Tx, retries = Retries} = Future) -> + try + get_info_wait_int(Future) + catch + error:{erlfdb_error, ?TRANSACTION_CANCELLED} -> + Future1 = get_info_future(Tx, Future#info_future.db_prefix), + get_info_wait(Future1#info_future{retries = Retries + 1}); + error:{erlfdb_error, ?TRANSACTION_TOO_OLD} -> + ok = erlfdb:reset(Tx), + Future1 = get_info_future(Tx, Future#info_future.db_prefix), + get_info_wait(Future1#info_future{retries = Retries + 1}) + end. + + +load_config(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + {Start, End} = erlfdb_tuple:range({?DB_CONFIG}, DbPrefix), + Future = erlfdb:get_range(Tx, Start, End), + + lists:foldl(fun({K, V}, DbAcc) -> + {?DB_CONFIG, Key} = erlfdb_tuple:unpack(K, DbPrefix), + case Key of + <<"uuid">> -> DbAcc#{uuid := V}; + <<"revs_limit">> -> DbAcc#{revs_limit := ?bin2uint(V)}; + <<"security_doc">> -> DbAcc#{security_doc := ?JSON_DECODE(V)} + end + end, Db, erlfdb:wait(Future)). + + +set_config(#{} = Db0, Key, Val) when is_atom(Key) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db = ensure_current(Db0), + {BinKey, BinVal} = case Key of + uuid -> {<<"uuid">>, Val}; + revs_limit -> {<<"revs_limit">>, ?uint2bin(max(1, Val))}; + security_doc -> {<<"security_doc">>, ?JSON_ENCODE(Val)} + end, + DbKey = erlfdb_tuple:pack({?DB_CONFIG, BinKey}, DbPrefix), + erlfdb:set(Tx, DbKey, BinVal), + {ok, DbVersion} = bump_db_version(Db), + {ok, Db#{db_version := DbVersion, Key := Val}}. + + +get_stat(#{} = Db, StatKey) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_STATS, StatKey}, DbPrefix), + + % Might need to figure out some sort of type + % system here. Uints are because stats are all + % atomic op adds for the moment. + ?bin2uint(erlfdb:wait(erlfdb:get(Tx, Key))). + + +incr_stat(_Db, _StatKey, 0) -> + ok; + +incr_stat(#{} = Db, StatKey, Increment) when is_integer(Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_STATS, StatKey}, DbPrefix), + erlfdb:add(Tx, Key, Increment). + + +incr_stat(_Db, _Section, _Key, 0) -> + ok; + +incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + BinKey = erlfdb_tuple:pack({?DB_STATS, Section, Key}, DbPrefix), + erlfdb:add(Tx, BinKey, Increment). + + +get_all_revs(#{} = Db, DocId) -> + DbName = maps:get(name, Db, undefined), + with_span('db.get_all_revs', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> + Future = get_all_revs_future(Db, DocId), + get_revs_wait(Db, Future) + end). + + +get_all_revs_future(#{} = Db, DocId) -> + Options = [{streaming_mode, want_all}], + get_revs_future(Db, DocId, Options). + + +get_winning_revs(Db, DocId, NumRevs) -> + DbName = maps:get(name, Db, undefined), + with_span('db.get_winning_revs', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> + Future = get_winning_revs_future(Db, DocId, NumRevs), + get_revs_wait(Db, Future) + end). + + +get_winning_revs_future(#{} = Db, DocId, NumRevs) -> + Options = [{reverse, true}, {limit, NumRevs}], + get_revs_future(Db, DocId, Options). + + +get_revs_future(#{} = Db, DocId, Options) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {StartKey, EndKey} = erlfdb_tuple:range({?DB_REVS, DocId}, DbPrefix), + erlfdb:fold_range_future(Tx, StartKey, EndKey, Options). + + +get_revs_wait(#{} = Db, RangeFuture) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + RevRows = erlfdb:fold_range_wait(Tx, RangeFuture, fun({K, V}, Acc) -> + Key = erlfdb_tuple:unpack(K, DbPrefix), + Val = erlfdb_tuple:unpack(V), + [fdb_to_revinfo(Key, Val) | Acc] + end, []), + lists:reverse(RevRows). + + +get_non_deleted_rev(#{} = Db, DocId, RevId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {RevPos, Rev} = RevId, + + BaseKey = {?DB_REVS, DocId, true, RevPos, Rev}, + Key = erlfdb_tuple:pack(BaseKey, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, Key)) of + not_found -> + not_found; + Val -> + fdb_to_revinfo(BaseKey, erlfdb_tuple:unpack(Val)) + end. + + +get_doc_body(Db, DocId, RevInfo) -> + DbName = maps:get(name, Db, undefined), + with_span('db.get_doc_body', #{'db.name' => DbName, 'doc.id' => DocId}, fun() -> + Future = get_doc_body_future(Db, DocId, RevInfo), + get_doc_body_wait(Db, DocId, RevInfo, Future) + end). + + +get_doc_body_future(#{} = Db, DocId, RevInfo) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + #{ + rev_id := {RevPos, Rev} + } = RevInfo, + + Key = {?DB_DOCS, DocId, RevPos, Rev}, + {StartKey, EndKey} = erlfdb_tuple:range(Key, DbPrefix), + erlfdb:fold_range_future(Tx, StartKey, EndKey, []). + + +get_doc_body_wait(#{} = Db0, DocId, RevInfo, Future) -> + #{ + tx := Tx + } = Db = ensure_current(Db0), + + #{ + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevInfo, + + FoldFun = aegis:wrap_fold_fun(Db, fun({_K, V}, Acc) -> + [V | Acc] + end), + RevBodyRows = erlfdb:fold_range_wait(Tx, Future, FoldFun, []), + BodyRows = lists:reverse(RevBodyRows), + + fdb_to_doc(Db, DocId, RevPos, [Rev | RevPath], BodyRows). + + +get_local_doc_rev_future(Db, DocId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, DocId}, DbPrefix), + erlfdb:get(Tx, Key). + + +get_local_doc_rev_wait(Future) -> + erlfdb:wait(Future). + + +get_local_doc_body_future(#{} = Db, DocId, _Rev) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + Prefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, DocId}, DbPrefix), + erlfdb:get_range_startswith(Tx, Prefix). + + +get_local_doc_body_wait(#{} = Db0, DocId, Rev, Future) -> + Db = ensure_current(Db0), + + {_, Chunks} = lists:unzip(aegis:decrypt(Db, erlfdb:wait(Future))), + fdb_to_local_doc(Db, DocId, Rev, Chunks). + + +get_local_doc(#{} = Db, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId) -> + RevFuture = get_local_doc_rev_future(Db, DocId), + Rev = get_local_doc_rev_wait(RevFuture), + + BodyFuture = get_local_doc_body_future(Db, DocId, Rev), + get_local_doc_body_wait(Db, DocId, Rev, BodyFuture). + + +get_local_doc_rev(_Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, Val) -> + case Val of + <<255, RevBin/binary>> -> + % Versioned local docs + try + case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, Rev, _Size} -> Rev + end + catch _:_ -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end; + <<131, _/binary>> -> + % Compatibility clause for an older encoding format + try binary_to_term(Val, [safe]) of + {Rev, _} -> Rev; + _ -> erlang:error({invalid_local_doc_rev, DocId, Val}) + catch + error:badarg -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end; + <<_/binary>> -> + try binary_to_integer(Val) of + IntVal when IntVal >= 0 -> + Val; + _ -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + catch + error:badarg -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end + end. + + +write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db = ensure_current(Db0), + + #doc{ + id = DocId, + deleted = Deleted, + atts = Atts + } = Doc, + + % Doc body + + ok = write_doc_body(Db, Doc), + + % Attachment bookkeeping + + % If a document's attachments have changed we have to scan + % for any attachments that may need to be deleted. The check + % for `>= 2` is a bit subtle. The important point is that + % one of the revisions will be from the new document so we + % have to find at least one more beyond that to assert that + % the attachments have not changed. + AttHash = fabric2_util:hash_atts(Atts), + RevsToCheck = [NewWinner0] ++ ToUpdate ++ ToRemove, + AttHashCount = lists:foldl(fun(Att, Count) -> + #{att_hash := RevAttHash} = Att, + case RevAttHash == AttHash of + true -> Count + 1; + false -> Count + end + end, 0, RevsToCheck), + if + AttHashCount == length(RevsToCheck) -> + ok; + AttHashCount >= 2 -> + ok; + true -> + cleanup_attachments(Db, DocId, Doc, ToRemove) + end, + + % Revision tree + + NewWinner = NewWinner0#{ + winner := true + }, + NewRevId = maps:get(rev_id, NewWinner), + + {WKey, WVal, WinnerVS} = revinfo_to_fdb(Tx, DbPrefix, DocId, NewWinner), + ok = erlfdb:set_versionstamped_value(Tx, WKey, WVal), + + lists:foreach(fun(RI0) -> + RI = RI0#{winner := false}, + {K, V, undefined} = revinfo_to_fdb(Tx, DbPrefix, DocId, RI), + ok = erlfdb:set(Tx, K, V) + end, ToUpdate), + + lists:foreach(fun(RI0) -> + RI = RI0#{winner := false}, + {K, _, undefined} = revinfo_to_fdb(Tx, DbPrefix, DocId, RI), + ok = erlfdb:clear(Tx, K), + ok = clear_doc_body(Db, DocId, RI0) + end, ToRemove), + + % _all_docs + + UpdateStatus = case {OldWinner, NewWinner} of + {not_found, #{deleted := false}} -> + created; + {not_found, #{deleted := true}} -> + replicate_deleted; + {#{deleted := true}, #{deleted := false}} -> + recreated; + {#{deleted := false}, #{deleted := false}} -> + updated; + {#{deleted := false}, #{deleted := true}} -> + deleted; + {#{deleted := true}, #{deleted := true}} -> + ignore + end, + + case UpdateStatus of + replicate_deleted -> + ok; + ignore -> + ok; + deleted -> + ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), + ok = erlfdb:clear(Tx, ADKey); + _ -> + ADKey = erlfdb_tuple:pack({?DB_ALL_DOCS, DocId}, DbPrefix), + ADVal = erlfdb_tuple:pack(NewRevId), + ok = erlfdb:set(Tx, ADKey, ADVal) + end, + + % _changes + + if OldWinner == not_found -> ok; true -> + OldSeq = maps:get(sequence, OldWinner), + OldSeqKey = erlfdb_tuple:pack({?DB_CHANGES, OldSeq}, DbPrefix), + erlfdb:clear(Tx, OldSeqKey) + end, + + NewSeqKey = erlfdb_tuple:pack_vs({?DB_CHANGES, WinnerVS}, DbPrefix), + NewSeqVal = erlfdb_tuple:pack({DocId, Deleted, NewRevId}), + erlfdb:set_versionstamped_key(Tx, NewSeqKey, NewSeqVal), + + % Bump db version on design doc changes + + IsDDoc = case Doc#doc.id of + <<?DESIGN_DOC_PREFIX, _/binary>> -> true; + _ -> false + end, + + if not IsDDoc -> ok; true -> + bump_db_version(Db) + end, + + % Update our document counts + + case UpdateStatus of + created -> + if not IsDDoc -> ok; true -> + incr_stat(Db, <<"doc_design_count">>, 1) + end, + incr_stat(Db, <<"doc_count">>, 1); + recreated -> + if not IsDDoc -> ok; true -> + incr_stat(Db, <<"doc_design_count">>, 1) + end, + incr_stat(Db, <<"doc_count">>, 1), + incr_stat(Db, <<"doc_del_count">>, -1); + replicate_deleted -> + incr_stat(Db, <<"doc_del_count">>, 1); + ignore -> + ok; + deleted -> + if not IsDDoc -> ok; true -> + incr_stat(Db, <<"doc_design_count">>, -1) + end, + incr_stat(Db, <<"doc_count">>, -1), + incr_stat(Db, <<"doc_del_count">>, 1); + updated -> + ok + end, + + fabric2_db_plugin:after_doc_write(Db, Doc, NewWinner, OldWinner, + NewRevId, WinnerVS), + + % Update database size + AddSize = sum_add_rev_sizes([NewWinner | ToUpdate]), + RemSize = sum_rem_rev_sizes(ToRemove), + incr_stat(Db, <<"sizes">>, <<"external">>, AddSize - RemSize), + + ok. + + +write_local_doc(#{} = Db0, Doc) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db = ensure_current(Db0), + + Id = Doc#doc.id, + + {LDocKey, LDocVal, NewSize, Rows} = local_doc_to_fdb(Db, Doc), + + {WasDeleted, PrevSize} = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of + <<255, RevBin/binary>> -> + case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, _Rev, Size} -> + {false, Size} + end; + <<_/binary>> -> + {false, 0}; + not_found -> + {true, 0} + end, + + BPrefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id}, DbPrefix), + + case Doc#doc.deleted of + true -> + erlfdb:clear(Tx, LDocKey), + erlfdb:clear_range_startswith(Tx, BPrefix); + false -> + erlfdb:set(Tx, LDocKey, LDocVal), + % Make sure to clear the whole range, in case there was a larger + % document body there before. + erlfdb:clear_range_startswith(Tx, BPrefix), + lists:foreach(fun({K, V}) -> + erlfdb:set(Tx, K, aegis:encrypt(Db, K, V)) + end, Rows) + end, + + case {WasDeleted, Doc#doc.deleted} of + {true, false} -> + incr_stat(Db, <<"doc_local_count">>, 1); + {false, true} -> + incr_stat(Db, <<"doc_local_count">>, -1); + _ -> + ok + end, + + incr_stat(Db, <<"sizes">>, <<"external">>, NewSize - PrevSize), + + ok. + + +read_attachment(#{} = Db, DocId, AttId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + Data = case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of + not_found -> + throw({not_found, missing}); + KVs -> + {_, Chunks} = lists:unzip(aegis:decrypt(Db, KVs)), + iolist_to_binary(Chunks) + end, + + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, IdKey)) of + <<>> -> + Data; % Old format, before CURR_ATT_STORAGE_VER = 0 + <<_/binary>> = InfoBin -> + {?CURR_ATT_STORAGE_VER, Compressed} = erlfdb_tuple:unpack(InfoBin), + case Compressed of + true -> binary_to_term(Data, [safe]); + false -> Data + end + end. + + +write_attachment(#{} = Db, DocId, Data, Encoding) + when is_binary(Data), is_atom(Encoding) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + AttId = fabric2_util:uuid(), + + {Data1, Compressed} = case Encoding of + gzip -> + {Data, false}; + _ -> + Opts = [{minor_version, 1}, {compressed, 6}], + CompressedData = term_to_binary(Data, Opts), + case size(CompressedData) < Data of + true -> {CompressedData, true}; + false -> {Data, false} + end + end, + + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}), + ok = erlfdb:set(Tx, IdKey, InfoVal), + + Chunks = chunkify_binary(Data1), + + lists:foldl(fun(Chunk, ChunkId) -> + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix), + ok = erlfdb:set(Tx, AttKey, aegis:encrypt(Db, AttKey, Chunk)), + ChunkId + 1 + end, 0, Chunks), + {ok, AttId}. + + +get_last_change(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + {Start, End} = erlfdb_tuple:range({?DB_CHANGES}, DbPrefix), + Options = [{limit, 1}, {reverse, true}], + case erlfdb:get_range(Tx, Start, End, Options) of + [] -> + vs_to_seq(fabric2_util:seq_zero_vs()); + [{K, _V}] -> + {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(K, DbPrefix), + vs_to_seq(SeqVS) + end. + + +fold_range(TxOrDb, RangePrefix, UserFun, UserAcc, Options) -> + {Db, Tx} = case TxOrDb of + {tx, TxObj} -> + {undefined, TxObj}; + #{} = DbObj -> + DbObj1 = #{tx := TxObj} = ensure_current(DbObj), + {DbObj1, TxObj} + end, + % FoundationDB treats a limit 0 of as unlimited so we guard against it + case fabric2_util:get_value(limit, Options) of 0 -> UserAcc; _ -> + FAcc = get_fold_acc(Db, RangePrefix, UserFun, UserAcc, Options), + try + fold_range(Tx, FAcc) + after + erase(?PDICT_FOLD_ACC_STATE) + end + end. + + +fold_range(Tx, FAcc) -> + #fold_acc{ + start_key = Start, + end_key = End, + limit = Limit, + base_opts = BaseOpts, + restart_tx = DoRestart + } = FAcc, + case DoRestart of false -> ok; true -> + ok = erlfdb:set_option(Tx, disallow_writes) + end, + Opts = [{limit, Limit} | BaseOpts], + Callback = fun fold_range_cb/2, + try + #fold_acc{ + user_acc = FinalUserAcc + } = erlfdb:fold_range(Tx, Start, End, Callback, FAcc, Opts), + FinalUserAcc + catch error:{erlfdb_error, ?TRANSACTION_TOO_OLD} when DoRestart -> + % Possibly handle cluster_version_changed and future_version as well to + % continue iteration instead fallback to transactional and retrying + % from the beginning which is bound to fail when streaming data out to a + % socket. + fold_range(Tx, restart_fold(Tx, FAcc)) + end. + + +vs_to_seq(VS) when is_tuple(VS) -> + % 51 is the versionstamp type tag + <<51:8, SeqBin:12/binary>> = erlfdb_tuple:pack({VS}), + fabric2_util:to_hex(SeqBin). + + +seq_to_vs(Seq) when is_binary(Seq) -> + Seq1 = fabric2_util:from_hex(Seq), + % 51 is the versionstamp type tag + Seq2 = <<51:8, Seq1/binary>>, + {VS} = erlfdb_tuple:unpack(Seq2), + VS. + + +next_vs({versionstamp, VS, Batch, TxId}) -> + {V, B, T} = case TxId =< 65535 of + true -> + {VS, Batch, TxId + 1}; + false -> + case Batch =< 65535 of + true -> + {VS, Batch + 1, 0}; + false -> + {VS + 1, 0, 0} + end + end, + {versionstamp, V, B, T}. + + +new_versionstamp(Tx) -> + TxId = erlfdb:get_next_tx_id(Tx), + {versionstamp, 16#FFFFFFFFFFFFFFFF, 16#FFFF, TxId}. + + +get_approximate_tx_size(#{} = TxDb) -> + require_transaction(TxDb), + #{tx := Tx} = TxDb, + erlfdb:wait(erlfdb:get_approximate_size(Tx)). + + +chunkify_binary(Data) -> + chunkify_binary(Data, binary_chunk_size()). + + +chunkify_binary(Data, Size) -> + case Data of + <<>> -> + []; + <<Head:Size/binary, Rest/binary>> -> + [Head | chunkify_binary(Rest, Size)]; + <<_/binary>> when size(Data) < Size -> + [Data] + end. + + +debug_cluster() -> + debug_cluster(<<>>, <<16#FE, 16#FF, 16#FF>>). + + +debug_cluster(Start, End) -> + transactional(fun(Tx) -> + lists:foreach(fun({Key, Val}) -> + io:format(standard_error, "~s => ~s~n", [ + string:pad(erlfdb_util:repr(Key), 60), + erlfdb_util:repr(Val) + ]) + end, erlfdb:get_range(Tx, Start, End)) + end). + + +init_db(Tx, DbName, Options) -> + Prefix = get_dir(Tx), + Version = erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)), + #{ + name => DbName, + tx => Tx, + layer_prefix => Prefix, + md_version => Version, + + security_fun => undefined, + db_options => Options + }. + + +load_validate_doc_funs(#{} = Db) -> + FoldFun = fun + ({row, Row}, Acc) -> + DDocInfo = #{id => fabric2_util:get_value(id, Row)}, + {ok, [DDocInfo | Acc]}; + (_, Acc) -> + {ok, Acc} + end, + + Options = [ + {start_key, <<"_design/">>}, + {end_key, <<"_design0">>} + ], + + {ok, Infos1} = fabric2_db:fold_docs(Db, FoldFun, [], Options), + + Infos2 = lists:map(fun(Info) -> + #{ + id := DDocId = <<"_design/", _/binary>> + } = Info, + Info#{ + rev_info => get_winning_revs_future(Db, DDocId, 1) + } + end, Infos1), + + Infos3 = lists:flatmap(fun(Info) -> + #{ + id := DDocId, + rev_info := RevInfoFuture + } = Info, + [RevInfo] = get_revs_wait(Db, RevInfoFuture), + #{deleted := Deleted} = RevInfo, + if Deleted -> []; true -> + [Info#{ + rev_info := RevInfo, + body => get_doc_body_future(Db, DDocId, RevInfo) + }] + end + end, Infos2), + + VDUs = lists:flatmap(fun(Info) -> + #{ + id := DDocId, + rev_info := RevInfo, + body := BodyFuture + } = Info, + #doc{} = Doc = get_doc_body_wait(Db, DDocId, RevInfo, BodyFuture), + case couch_doc:get_validate_doc_fun(Doc) of + nil -> []; + Fun -> [Fun] + end + end, Infos3), + + Db#{ + validate_doc_update_funs := VDUs + }. + + +bump_metadata_version(Tx) -> + % The 14 zero bytes is pulled from the PR for adding the + % metadata version key. Not sure why 14 bytes when version + % stamps are only 80, but whatever for now. + erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>). + + +check_metadata_version(#{} = Db) -> + #{ + tx := Tx, + md_version := Version + } = Db, + + AlreadyChecked = get(?PDICT_CHECKED_MD_IS_CURRENT), + if AlreadyChecked == true -> {current, Db}; true -> + case erlfdb:wait(erlfdb:get_ss(Tx, ?METADATA_VERSION_KEY)) of + Version -> + put(?PDICT_CHECKED_MD_IS_CURRENT, true), + % We want to set a read conflict on the db version as we'd want + % to conflict with any writes to this particular db. However + % during db creation db prefix might not exist yet so we don't + % add a read-conflict on it then. + case maps:get(db_prefix, Db, not_found) of + not_found -> + ok; + <<_/binary>> = DbPrefix -> + DbVerKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + erlfdb:add_read_conflict_key(Tx, DbVerKey) + end, + {current, Db}; + NewVersion -> + {stale, Db#{md_version := NewVersion}} + end + end. + + +bump_db_version(#{} = Db) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + DbVersion = fabric2_util:uuid(), + ok = erlfdb:set(Tx, DbVersionKey, DbVersion), + ok = bump_metadata_version(Tx), + {ok, DbVersion}. + + +check_db_version(#{} = Db, CheckDbVersion) -> + #{ + tx := Tx, + db_prefix := DbPrefix, + db_version := DbVersion + } = Db, + + AlreadyChecked = get(?PDICT_CHECKED_DB_IS_CURRENT), + if not CheckDbVersion orelse AlreadyChecked == true -> current; true -> + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, DbVersionKey)) of + DbVersion -> + put(?PDICT_CHECKED_DB_IS_CURRENT, true), + current; + _NewDBVersion -> + stale + end + end. + + +soft_delete_db(Db) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix, + db_prefix := DbPrefix + } = ensure_current(Db), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + Timestamp = list_to_binary(fabric2_util:iso8601_timestamp()), + DeletedDbKeyTuple = {?DELETED_DBS, DbName, Timestamp}, + DeletedDbKey = erlfdb_tuple:pack(DeletedDbKeyTuple, LayerPrefix), + case erlfdb:wait(erlfdb:get(Tx, DeletedDbKey)) of + not_found -> + erlfdb:set(Tx, DeletedDbKey, DbPrefix), + erlfdb:clear(Tx, DbKey), + bump_db_version(Db), + ok; + _Val -> + {deletion_frequency_exceeded, DbName} + end. + + +hard_delete_db(Db) -> + #{ + name := DbName, + tx := Tx, + layer_prefix := LayerPrefix, + db_prefix := DbPrefix + } = ensure_current(Db), + + DbKey = erlfdb_tuple:pack({?ALL_DBS, DbName}, LayerPrefix), + + erlfdb:clear(Tx, DbKey), + erlfdb:clear_range_startswith(Tx, DbPrefix), + bump_metadata_version(Tx), + ok. + + +write_doc_body(#{} = Db0, #doc{} = Doc) -> + #{ + tx := Tx + } = Db = ensure_current(Db0), + + Rows = doc_to_fdb(Db, Doc), + lists:foreach(fun({Key, Value}) -> + ok = erlfdb:set(Tx, Key, aegis:encrypt(Db, Key, Value)) + end, Rows). + + +clear_doc_body(_Db, _DocId, not_found) -> + % No old body to clear + ok; + +clear_doc_body(#{} = Db, DocId, #{} = RevInfo) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + #{ + rev_id := {RevPos, Rev} + } = RevInfo, + + BaseKey = {?DB_DOCS, DocId, RevPos, Rev}, + {StartKey, EndKey} = erlfdb_tuple:range(BaseKey, DbPrefix), + ok = erlfdb:clear_range(Tx, StartKey, EndKey). + + +cleanup_attachments(Db, DocId, NewDoc, ToRemove) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = Db, + + RemoveRevs = lists:map(fun(#{rev_id := RevId}) -> RevId end, ToRemove), + + % Gather all known document revisions + {ok, DiskDocs} = fabric2_db:open_doc_revs(Db, DocId, all, []), + AllDocs = [{ok, NewDoc} | DiskDocs], + + % Get referenced attachment ids + ActiveIdSet = lists:foldl(fun({ok, Doc}, Acc) -> + #doc{ + revs = {Pos, [Rev | _]} + } = Doc, + case lists:member({Pos, Rev}, RemoveRevs) of + true -> + Acc; + false -> + lists:foldl(fun(Att, InnerAcc) -> + {loc, _Db, _DocId, AttId} = couch_att:fetch(data, Att), + sets:add_element(AttId, InnerAcc) + end, Acc, Doc#doc.atts) + end + end, sets:new(), AllDocs), + + AttPrefix = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId}, DbPrefix), + Options = [{streaming_mode, want_all}], + Future = erlfdb:get_range_startswith(Tx, AttPrefix, Options), + + ExistingIdSet = lists:foldl(fun({K, _}, Acc) -> + {?DB_ATT_NAMES, DocId, AttId} = erlfdb_tuple:unpack(K, DbPrefix), + sets:add_element(AttId, Acc) + end, sets:new(), erlfdb:wait(Future)), + + AttsToRemove = sets:subtract(ExistingIdSet, ActiveIdSet), + + lists:foreach(fun(AttId) -> + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + erlfdb:clear(Tx, IdKey), + + ChunkKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + erlfdb:clear_range_startswith(Tx, ChunkKey) + end, sets:to_list(AttsToRemove)). + + +revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> + #{ + deleted := Deleted, + rev_id := {RevPos, Rev}, + rev_path := RevPath, + branch_count := BranchCount, + att_hash := AttHash, + rev_size := RevSize + } = RevId, + VS = new_versionstamp(Tx), + Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, + Val = { + ?CURR_REV_FORMAT, + VS, + BranchCount, + list_to_tuple(RevPath), + AttHash, + RevSize + }, + KBin = erlfdb_tuple:pack(Key, DbPrefix), + VBin = erlfdb_tuple:pack_vs(Val), + {KBin, VBin, VS}; + +revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) -> + #{ + deleted := Deleted, + rev_id := {RevPos, Rev}, + rev_path := RevPath, + att_hash := AttHash, + rev_size := RevSize + } = RevId, + Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, + Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash, RevSize}, + KBin = erlfdb_tuple:pack(Key, DbPrefix), + VBin = erlfdb_tuple:pack(Val), + {KBin, VBin, undefined}. + + +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) -> + {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, + {_RevFormat, Sequence, BranchCount, RevPath, AttHash, RevSize} = Val, + #{ + winner => true, + exists => true, + deleted => not NotDeleted, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + sequence => Sequence, + branch_count => BranchCount, + att_hash => AttHash, + rev_size => RevSize + }; + +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> + {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, + {_RevFormat, RevPath, AttHash, RevSize} = Val, + #{ + winner => false, + exists => true, + deleted => not NotDeleted, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + sequence => undefined, + branch_count => undefined, + att_hash => AttHash, + rev_size => RevSize + }; + +fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> + Val = {1, Seq, BCount, RPath, <<>>}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {0, RPath}) -> + Val = {1, RPath, <<>>}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) -> + % Don't forget to change ?CURR_REV_FORMAT to 2 here when it increments + Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, AttHash, 0}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {1, RPath, AttHash}) -> + % Don't forget to change ?CURR_REV_FORMAT to 2 here when it increments + Val = {?CURR_REV_FORMAT, RPath, AttHash, 0}, + fdb_to_revinfo(Key, Val). + + +doc_to_fdb(Db, #doc{} = Doc) -> + #{ + db_prefix := DbPrefix + } = Db, + + #doc{ + id = Id, + revs = {Start, [Rev | _]}, + body = Body, + atts = Atts, + deleted = Deleted + } = Doc, + + DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), + + Opts = [{minor_version, 1}, {compressed, 6}], + Value = term_to_binary({Body, DiskAtts, Deleted}, Opts), + Chunks = chunkify_binary(Value), + + {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> + Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix), + {{Key, Chunk}, ChunkId + 1} + end, 0, Chunks), + + Rows. + + +fdb_to_doc(_Db, _DocId, _Pos, _Path, []) -> + {not_found, missing}; + +fdb_to_doc(Db, DocId, Pos, Path, BinRows) when is_list(BinRows) -> + Bin = iolist_to_binary(BinRows), + {Body, DiskAtts, Deleted} = binary_to_term(Bin, [safe]), + Atts = lists:map(fun(Att) -> + couch_att:from_disk_term(Db, DocId, Att) + end, DiskAtts), + Doc0 = #doc{ + id = DocId, + revs = {Pos, Path}, + body = Body, + atts = Atts, + deleted = Deleted + }, + + case Db of + #{after_doc_read := undefined} -> Doc0; + #{after_doc_read := ADR} -> ADR(Doc0, Db) + end. + + +local_doc_to_fdb(Db, #doc{} = Doc) -> + #{ + db_prefix := DbPrefix + } = Db, + + #doc{ + id = Id, + revs = {0, [Rev]}, + body = Body + } = Doc, + + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, Id}, DbPrefix), + + StoreRev = case Rev of + _ when is_integer(Rev) -> integer_to_binary(Rev); + _ when is_binary(Rev) -> Rev + end, + + BVal = term_to_binary(Body, [{minor_version, 1}, {compressed, 6}]), + {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> + K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix), + {{K, Chunk}, ChunkId + 1} + end, 0, chunkify_binary(BVal)), + + NewSize = fabric2_util:ldoc_size(Doc), + RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, NewSize}), + + % Prefix our tuple encoding to make upgrades easier + Value = <<255, RawValue/binary>>, + + {Key, Value, NewSize, Rows}. + + +fdb_to_local_doc(_Db, _DocId, not_found, []) -> + {not_found, missing}; + +fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) -> + % This is an upgrade clause for the old encoding. We allow reading the old + % value and will perform an upgrade of the storage format on an update. + {Rev, Body} = binary_to_term(Val, [safe]), + #doc{ + id = DocId, + revs = {0, [Rev]}, + deleted = false, + body = Body + }; + +fdb_to_local_doc(_Db, DocId, <<255, RevBin/binary>>, Rows) when is_list(Rows) -> + Rev = case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, Rev0, _Size} -> Rev0 + end, + + BodyBin = iolist_to_binary(Rows), + Body = binary_to_term(BodyBin, [safe]), + + #doc{ + id = DocId, + revs = {0, [Rev]}, + deleted = false, + body = Body + }; + +fdb_to_local_doc(Db, DocId, RawRev, Rows) -> + BaseRev = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, RawRev, 0}), + Rev = <<255, BaseRev/binary>>, + fdb_to_local_doc(Db, DocId, Rev, Rows). + + +sum_add_rev_sizes(RevInfos) -> + lists:foldl(fun(RI, Acc) -> + #{ + exists := Exists, + rev_size := Size + } = RI, + case Exists of + true -> Acc; + false -> Size + Acc + end + end, 0, RevInfos). + + +sum_rem_rev_sizes(RevInfos) -> + lists:foldl(fun(RI, Acc) -> + #{ + exists := true, + rev_size := Size + } = RI, + Size + Acc + end, 0, RevInfos). + + +get_fold_acc(Db, RangePrefix, UserCallback, UserAcc, Options) + when is_map(Db) orelse Db =:= undefined -> + + Reverse = case fabric2_util:get_value(dir, Options) of + rev -> true; + _ -> false + end, + + StartKey0 = fabric2_util:get_value(start_key, Options), + EndKeyGt = fabric2_util:get_value(end_key_gt, Options), + EndKey0 = fabric2_util:get_value(end_key, Options, EndKeyGt), + InclusiveEnd = EndKeyGt == undefined, + WrapKeys = fabric2_util:get_value(wrap_keys, Options) /= false, + + % CouchDB swaps the key meanings based on the direction + % of the fold. FoundationDB does not so we have to + % swap back here. + {StartKey1, EndKey1} = case Reverse of + false -> {StartKey0, EndKey0}; + true -> {EndKey0, StartKey0} + end, + + % Set the maximum bounds for the start and endkey + StartKey2 = case StartKey1 of + undefined -> + <<RangePrefix/binary, 16#00>>; + SK2 when not WrapKeys -> + erlfdb_tuple:pack(SK2, RangePrefix); + SK2 -> + erlfdb_tuple:pack({SK2}, RangePrefix) + end, + + EndKey2 = case EndKey1 of + undefined -> + <<RangePrefix/binary, 16#FF>>; + EK2 when Reverse andalso not WrapKeys -> + PackedEK = erlfdb_tuple:pack(EK2, RangePrefix), + <<PackedEK/binary, 16#FF>>; + EK2 when Reverse -> + PackedEK = erlfdb_tuple:pack({EK2}, RangePrefix), + <<PackedEK/binary, 16#FF>>; + EK2 when not WrapKeys -> + erlfdb_tuple:pack(EK2, RangePrefix); + EK2 -> + erlfdb_tuple:pack({EK2}, RangePrefix) + end, + + % FoundationDB ranges are applied as SK <= key < EK + % By default, CouchDB is SK <= key <= EK with the + % optional inclusive_end=false option changing that + % to SK <= key < EK. Also, remember that CouchDB + % swaps the meaning of SK and EK based on direction. + % + % Thus we have this wonderful bit of logic to account + % for all of those combinations. + + StartKey3 = case {Reverse, InclusiveEnd} of + {true, false} -> + erlfdb_key:first_greater_than(StartKey2); + _ -> + StartKey2 + end, + + EndKey3 = case {Reverse, InclusiveEnd} of + {false, true} when EndKey0 /= undefined -> + erlfdb_key:first_greater_than(EndKey2); + {true, _} -> + erlfdb_key:first_greater_than(EndKey2); + _ -> + EndKey2 + end, + + Skip = case fabric2_util:get_value(skip, Options) of + S when is_integer(S), S >= 0 -> S; + _ -> 0 + end, + + Limit = case fabric2_util:get_value(limit, Options) of + L when is_integer(L), L >= 0 -> L + Skip; + undefined -> 0 + end, + + TargetBytes = case fabric2_util:get_value(target_bytes, Options) of + T when is_integer(T), T >= 0 -> [{target_bytes, T}]; + undefined -> [] + end, + + StreamingMode = case fabric2_util:get_value(streaming_mode, Options) of + undefined -> []; + Name when is_atom(Name) -> [{streaming_mode, Name}] + end, + + Snapshot = case fabric2_util:get_value(snapshot, Options) of + undefined -> []; + B when is_boolean(B) -> [{snapshot, B}] + end, + + BaseOpts = [{reverse, Reverse}] + ++ TargetBytes + ++ StreamingMode + ++ Snapshot, + + RestartTx = fabric2_util:get_value(restart_tx, Options, false), + + #fold_acc{ + db = Db, + start_key = StartKey3, + end_key = EndKey3, + skip = Skip, + limit = Limit, + retries = 0, + base_opts = BaseOpts, + restart_tx = RestartTx, + user_fun = UserCallback, + user_acc = UserAcc + }. + + +fold_range_cb({K, V}, #fold_acc{} = Acc) -> + #fold_acc{ + skip = Skip, + limit = Limit, + user_fun = UserFun, + user_acc = UserAcc, + base_opts = Opts + } = Acc, + Acc1 = case Skip =:= 0 of + true -> + UserAcc1 = UserFun({K, V}, UserAcc), + Acc#fold_acc{limit = max(0, Limit - 1), user_acc = UserAcc1}; + false -> + Acc#fold_acc{skip = Skip - 1, limit = Limit - 1} + end, + Acc2 = case fabric2_util:get_value(reverse, Opts, false) of + true -> Acc1#fold_acc{end_key = erlfdb_key:last_less_or_equal(K)}; + false -> Acc1#fold_acc{start_key = erlfdb_key:first_greater_than(K)} + end, + put(?PDICT_FOLD_ACC_STATE, Acc2), + Acc2. + + +restart_fold(Tx, #fold_acc{} = Acc) -> + erase(?PDICT_CHECKED_MD_IS_CURRENT), + + ok = erlfdb:reset(Tx), + + case {erase(?PDICT_FOLD_ACC_STATE), Acc#fold_acc.retries} of + {#fold_acc{db = Db} = Acc1, _} -> + Acc1#fold_acc{db = check_db_instance(Db), retries = 0}; + {undefined, Retries} when Retries < ?MAX_FOLD_RANGE_RETRIES -> + Db = check_db_instance(Acc#fold_acc.db), + Acc#fold_acc{db = Db, retries = Retries + 1}; + {undefined, _} -> + error(fold_range_not_progressing) + end. + + +get_db_handle() -> + case get(?PDICT_DB_KEY) of + undefined -> + {ok, Db} = application:get_env(fabric, db), + put(?PDICT_DB_KEY, Db), + Db; + Db -> + Db + end. + + +require_transaction(#{tx := {erlfdb_transaction, _}} = _Db) -> + ok; +require_transaction(#{} = _Db) -> + erlang:error(transaction_required). + + +ensure_current(Db) -> + ensure_current(Db, true). + + +ensure_current(#{} = Db0, CheckDbVersion) -> + require_transaction(Db0), + Db3 = case check_metadata_version(Db0) of + {current, Db1} -> + Db1; + {stale, Db1} -> + case check_db_version(Db1, CheckDbVersion) of + current -> + % If db version is current, update cache with the latest + % metadata so other requests can immediately see the + % refreshed db handle. + Now = erlang:monotonic_time(millisecond), + Db2 = Db1#{check_current_ts := Now}, + fabric2_server:maybe_update(Db2), + Db2; + stale -> + fabric2_server:maybe_remove(Db1), + throw({?MODULE, reopen}) + end + end, + case maps:get(security_fun, Db3) of + SecurityFun when is_function(SecurityFun, 2) -> + #{security_doc := SecDoc} = Db3, + ok = SecurityFun(Db3, SecDoc), + Db3#{security_fun := undefined}; + undefined -> + Db3 + end. + + +check_db_instance(undefined) -> + undefined; + +check_db_instance(#{} = Db) -> + require_transaction(Db), + case check_metadata_version(Db) of + {current, Db1} -> + Db1; + {stale, Db1} -> + #{ + tx := Tx, + uuid := UUID, + db_prefix := DbPrefix + } = Db1, + UUIDKey = erlfdb_tuple:pack({?DB_CONFIG, <<"uuid">>}, DbPrefix), + case erlfdb:wait(erlfdb:get(Tx, UUIDKey)) of + UUID -> Db1; + _ -> error(database_does_not_exist) + end + end. + + +is_transaction_applied(Tx) -> + is_commit_unknown_result() + andalso has_transaction_id() + andalso transaction_id_exists(Tx). + + +get_previous_transaction_result() -> + get(?PDICT_TX_RES_KEY). + + +execute_transaction(Tx, Fun, LayerPrefix) -> + put(?PDICT_CHECKED_MD_IS_CURRENT, false), + put(?PDICT_CHECKED_DB_IS_CURRENT, false), + Result = Fun(Tx), + case erlfdb:is_read_only(Tx) of + true -> + ok; + false -> + erlfdb:set(Tx, get_transaction_id(Tx, LayerPrefix), <<>>), + put(?PDICT_TX_RES_KEY, Result) + end, + Result. + + +clear_transaction() -> + fabric2_txids:remove(get(?PDICT_TX_ID_KEY)), + erase(?PDICT_CHECKED_DB_IS_CURRENT), + erase(?PDICT_CHECKED_MD_IS_CURRENT), + erase(?PDICT_TX_ID_KEY), + erase(?PDICT_TX_RES_KEY). + + +is_commit_unknown_result() -> + erlfdb:get_last_error() == ?COMMIT_UNKNOWN_RESULT. + + +has_transaction_id() -> + is_binary(get(?PDICT_TX_ID_KEY)). + + +transaction_id_exists(Tx) -> + erlfdb:wait(erlfdb:get(Tx, get(?PDICT_TX_ID_KEY))) == <<>>. + + +get_transaction_id(Tx, LayerPrefix) -> + case get(?PDICT_TX_ID_KEY) of + undefined -> + TxId = fabric2_txids:create(Tx, LayerPrefix), + put(?PDICT_TX_ID_KEY, TxId), + TxId; + TxId when is_binary(TxId) -> + TxId + end. + + +with_span(Operation, ExtraTags, Fun) -> + case ctrace:has_span() of + true -> + Tags = maps:merge(#{ + 'span.kind' => <<"client">>, + component => <<"couchdb.fabric">>, + 'db.instance' => fabric2_server:fdb_cluster(), + 'db.namespace' => fabric2_server:fdb_directory(), + 'db.type' => <<"fdb">>, + nonce => get(nonce), + pid => self() + }, ExtraTags), + ctrace:with_span(Operation, Tags, Fun); + false -> + Fun() + end. + + +get_info_wait_int(#info_future{} = InfoFuture) -> + #info_future{ + db_prefix = DbPrefix, + changes_future = ChangesFuture, + uuid_future = UUIDFuture, + meta_future = MetaFuture + } = InfoFuture, + + RawSeq = case erlfdb:wait(ChangesFuture) of + [] -> + vs_to_seq(fabric2_util:seq_zero_vs()); + [{SeqKey, _}] -> + {?DB_CHANGES, SeqVS} = erlfdb_tuple:unpack(SeqKey, DbPrefix), + vs_to_seq(SeqVS) + end, + CProp = {update_seq, RawSeq}, + + UUIDProp = {uuid, erlfdb:wait(UUIDFuture)}, + + MProps = lists:foldl(fun({K, V}, Acc) -> + case erlfdb_tuple:unpack(K, DbPrefix) of + {?DB_STATS, <<"doc_count">>} -> + [{doc_count, ?bin2uint(V)} | Acc]; + {?DB_STATS, <<"doc_del_count">>} -> + [{doc_del_count, ?bin2uint(V)} | Acc]; + {?DB_STATS, <<"sizes">>, Name} -> + Val = ?bin2uint(V), + {_, {Sizes}} = lists:keyfind(sizes, 1, Acc), + NewSizes = lists:keystore(Name, 1, Sizes, {Name, Val}), + lists:keystore(sizes, 1, Acc, {sizes, {NewSizes}}); + {?DB_STATS, _} -> + Acc + end + end, [{sizes, {[]}}], erlfdb:wait(MetaFuture)), + + [CProp, UUIDProp | MProps]. + + +binary_chunk_size() -> + config:get_integer( + "fabric", "binary_chunk_size", ?DEFAULT_BINARY_CHUNK_SIZE). + + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +fdb_to_revinfo_version_compatibility_test() -> + DocId = <<"doc_id">>, + FirstRevFormat = 0, + RevPos = 1, + Rev = <<60,84,174,140,210,120,192,18,100,148,9,181,129,165,248,92>>, + RevPath = {}, + NotDeleted = true, + Sequence = {versionstamp, 10873034897377, 0, 0}, + BranchCount = 1, + + KeyWinner = {?DB_REVS, DocId, NotDeleted, RevPos, Rev}, + ValWinner = {FirstRevFormat, Sequence, BranchCount, RevPath}, + ExpectedWinner = expected( + true, BranchCount, NotDeleted, RevPos, Rev, RevPath, Sequence), + ?assertEqual(ExpectedWinner, fdb_to_revinfo(KeyWinner, ValWinner)), + + KeyLoser = {?DB_REVS, DocId, NotDeleted, RevPos, Rev}, + ValLoser = {FirstRevFormat, RevPath}, + ExpectedLoser = expected( + false, undefined, NotDeleted, RevPos, Rev, RevPath, undefined), + ?assertEqual(ExpectedLoser, fdb_to_revinfo(KeyLoser, ValLoser)), + ok. + + +expected(Winner, BranchCount, NotDeleted, RevPos, Rev, RevPath, Sequence) -> + #{ + att_hash => <<>>, + branch_count => BranchCount, + deleted => not NotDeleted, + exists => true, + rev_id => {RevPos, Rev}, + rev_path => tuple_to_list(RevPath), + rev_size => 0, + sequence => Sequence, + winner => Winner + }. + + +-endif. diff --git a/src/fabric/src/fabric2_index.erl b/src/fabric/src/fabric2_index.erl new file mode 100644 index 000000000..25c31a8c8 --- /dev/null +++ b/src/fabric/src/fabric2_index.erl @@ -0,0 +1,241 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_index). + + +-behaviour(gen_server). + + +-export([ + register_index/1, + db_updated/1, + cleanup/1, + start_link/0 +]). + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-callback build_indices(Db :: map(), DDocs :: list(#doc{})) -> + [{ok, JobId::binary()} | {error, any()}]. + +-callback cleanup_indices(Db :: map(), DDocs :: list(#doc{})) -> + [ok | {error, any()}]. + + +-define(SHARDS, 32). +-define(DEFAULT_DELAY_MSEC, 60000). +-define(DEFAULT_RESOLUTION_MSEC, 10000). + + +register_index(Mod) when is_atom(Mod) -> + Indices = lists:usort([Mod | registrations()]), + application:set_env(fabric, indices, Indices). + + +db_updated(DbName) when is_binary(DbName) -> + Table = table(erlang:phash2(DbName) rem ?SHARDS), + ets:insert_new(Table, {DbName, now_msec()}). + + +cleanup(Db) -> + try + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs = fabric2_db:get_design_docs(Db), + cleanup_indices(TxDb, DDocs) + end) + catch + error:database_does_not_exist -> + ok; + Tag:Reason -> + Stack = erlang:get_stacktrace(), + DbName = fabric2_db:name(Db), + LogMsg = "~p failed to cleanup indices for `~s` ~p:~p ~p", + couch_log:error(LogMsg, [?MODULE, DbName, Tag, Reason, Stack]) + end. + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +init(_) -> + lists:foreach(fun(T) -> + spawn_link(fun() -> process_loop(T) end) + end, create_tables()), + {ok, nil}. + + +terminate(_M, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +create_tables() -> + Opts = [ + named_table, + public, + {write_concurrency, true}, + {read_concurrency, true} + ], + Tables = [table(N) || N <- lists:seq(0, ?SHARDS - 1)], + [ets:new(T, Opts) || T <- Tables]. + + +table(Id) when is_integer(Id), Id >= 0 andalso Id < ?SHARDS -> + list_to_atom("fabric2_index_" ++ integer_to_list(Id)). + + +process_loop(Table) -> + Now = now_msec(), + Delay = delay_msec(), + Since = Now - Delay, + case is_enabled() of + true -> + process_updates(Table, Since), + clean_stale(Table, Since); + false -> + clean_stale(Table, Now) + end, + Resolution = resolution_msec(), + Jitter = rand:uniform(1 + Resolution div 2), + timer:sleep(Resolution + Jitter), + process_loop(Table). + + +clean_stale(Table, Since) -> + Head = {'_', '$1'}, + Guard = {'<', '$1', Since}, + % Monotonic is not strictly monotonic, so we process items using `=<` but + % clean with `<` in case there was an update with the same timestamp after + % we started processing already at that timestamp. + ets:select_delete(Table, [{Head, [Guard], [true]}]). + + +process_updates(Table, Since) -> + Head = {'$1', '$2'}, + Guard = {'=<', '$2', Since}, + case ets:select(Table, [{Head, [Guard], ['$1']}], 25) of + '$end_of_table' -> ok; + {Match, Cont} -> process_updates_iter(Match, Cont) + end. + + +process_updates_iter([], Cont) -> + case ets:select(Cont) of + '$end_of_table' -> ok; + {Match, Cont1} -> process_updates_iter(Match, Cont1) + end; + +process_updates_iter([Db | Rest], Cont) -> + try + process_db(Db) + catch + error:database_does_not_exist -> + ok; + Tag:Reason -> + Stack = erlang:get_stacktrace(), + LogMsg = "~p failed to build indices for `~s` ~p:~p ~p", + couch_log:error(LogMsg, [?MODULE, Db, Tag, Reason, Stack]) + end, + process_updates_iter(Rest, Cont). + + +process_db(DbName) when is_binary(DbName) -> + {ok, Db} = fabric2_db:open(DbName, [?ADMIN_CTX]), + fabric2_fdb:transactional(Db, fun(TxDb) -> + DDocs1 = fabric2_db:get_design_docs(TxDb), + DDocs2 = lists:filter(fun should_update/1, DDocs1), + DDocs3 = shuffle(DDocs2), + build_indices(TxDb, DDocs3), + case auto_cleanup() of + true -> cleanup_indices(TxDb, DDocs1); + false -> ok + end + end). + + +build_indices(_TxDb, []) -> + []; + +build_indices(TxDb, DDocs) -> + lists:flatmap(fun(Mod) -> + Mod:build_indices(TxDb, DDocs) + end, registrations()). + + +cleanup_indices(TxDb, DDocs) -> + lists:foreach(fun(Mod) -> + Mod:cleanup_indices(TxDb, DDocs) + end, registrations()). + + +registrations() -> + application:get_env(fabric, indices, []). + + +should_update(#doc{body = {Props}}) -> + couch_util:get_value(<<"autoupdate">>, Props, true). + + +shuffle(Items) -> + Tagged = [{rand:uniform(), I} || I <- Items], + Sorted = lists:sort(Tagged), + [I || {_T, I} <- Sorted]. + + +now_msec() -> + erlang:monotonic_time(millisecond). + + +is_enabled() -> + config:get_boolean("fabric", "index_updater_enabled", true). + + +delay_msec() -> + config:get_integer("fabric", "index_updater_delay_msec", + ?DEFAULT_DELAY_MSEC). + + +resolution_msec() -> + config:get_integer("fabric", "index_updater_resolution_msec", + ?DEFAULT_RESOLUTION_MSEC). + + +auto_cleanup() -> + config:get_boolean("fabric", "index_updater_remove_old_indices", false). diff --git a/src/fabric/src/fabric2_node_types.erl b/src/fabric/src/fabric2_node_types.erl new file mode 100644 index 000000000..110f04d15 --- /dev/null +++ b/src/fabric/src/fabric2_node_types.erl @@ -0,0 +1,52 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_node_types). + + +-export([ + is_type/1 +]). + + +is_type(Type) when is_atom(Type) -> + case {from_os_env(Type), from_app_env(Type)} of + {V, _} when is_boolean(V) -> + V; + {undefined, V} when is_boolean(V) -> + V; + {undefined, undefined} -> + % When not defined anywhere assume `true`, that is by default a + % node will perform all the background tasks + true + end. + + +from_os_env(Type) when is_atom(Type) -> + StrType = erlang:atom_to_list(Type), + StrTypeUpper = string:to_upper(StrType), + case os:getenv("COUCHDB_NODE_TYPE_" ++ StrTypeUpper) of + false -> + undefined; + Str when is_list(Str) -> + case string:to_lower(Str) of + "false" -> false; + _ -> true + end + end. + + +from_app_env(Type) when is_atom(Type) -> + case application:get_env(fabric, node_types) of + undefined -> undefined; + {ok, Props} when is_list(Props) -> proplists:get_value(Type, Props) + end. diff --git a/src/fabric/src/fabric2_server.erl b/src/fabric/src/fabric2_server.erl new file mode 100644 index 000000000..be674b10e --- /dev/null +++ b/src/fabric/src/fabric2_server.erl @@ -0,0 +1,276 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_server). +-behaviour(gen_server). +-vsn(1). + + +-export([ + start_link/0, + + fetch/2, + + store/1, + maybe_update/1, + + remove/1, + maybe_remove/1, + + fdb_directory/0, + fdb_cluster/0 +]). + + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +-define(CLUSTER_FILE, "/usr/local/etc/foundationdb/fdb.cluster"). +-define(FDB_DIRECTORY, fdb_directory). +-define(FDB_CLUSTER, fdb_cluster). +-define(DEFAULT_FDB_DIRECTORY, <<"couchdb">>). +-define(TX_OPTIONS_SECTION, "fdb_tx_options"). +-define(RELISTEN_DELAY, 1000). + +-define(DEFAULT_TIMEOUT_MSEC, "60000"). +-define(DEFAULT_RETRY_LIMIT, "100"). + +-define(TX_OPTIONS, #{ + machine_id => {binary, undefined}, + datacenter_id => {binary, undefined}, + transaction_logging_max_field_length => {integer, undefined}, + timeout => {integer, ?DEFAULT_TIMEOUT_MSEC}, + retry_limit => {integer, ?DEFAULT_RETRY_LIMIT}, + max_retry_delay => {integer, undefined}, + size_limit => {integer, undefined} +}). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +fetch(DbName, UUID) when is_binary(DbName) -> + case {UUID, ets:lookup(?MODULE, DbName)} of + {_, []} -> undefined; + {undefined, [{DbName, _UUID, _, #{} = Db}]} -> Db; + {<<_/binary>>, [{DbName, UUID, _, #{} = Db}]} -> Db; + {<<_/binary>>, [{DbName, _UUID, _, #{} = _Db}]} -> undefined + end. + + +store(#{name := DbName} = Db0) when is_binary(DbName) -> + #{ + uuid := UUID, + md_version := MDVer + } = Db0, + Db1 = sanitize(Db0), + case ets:insert_new(?MODULE, {DbName, UUID, MDVer, Db1}) of + true -> ok; + false -> maybe_update(Db1) + end, + ok. + + +maybe_update(#{name := DbName} = Db0) when is_binary(DbName) -> + #{ + uuid := UUID, + md_version := MDVer + } = Db0, + Db1 = sanitize(Db0), + Head = {DbName, UUID, '$1', '_'}, + Guard = {'=<', '$1', MDVer}, + Body = {DbName, UUID, MDVer, {const, Db1}}, + try + 1 =:= ets:select_replace(?MODULE, [{Head, [Guard], [{Body}]}]) + catch + error:badarg -> + false + end. + + +remove(DbName) when is_binary(DbName) -> + true = ets:delete(?MODULE, DbName), + ok. + + +maybe_remove(#{name := DbName} = Db) when is_binary(DbName) -> + #{ + uuid := UUID, + md_version := MDVer + } = Db, + Head = {DbName, UUID, '$1', '_'}, + Guard = {'=<', '$1', MDVer}, + 1 =:= ets:select_delete(?MODULE, [{Head, [Guard], [true]}]). + + +init(_) -> + ets:new(?MODULE, [ + public, + named_table, + {read_concurrency, true}, + {write_concurrency, true} + ]), + {Cluster, Db} = get_db_and_cluster([empty]), + application:set_env(fabric, ?FDB_CLUSTER, Cluster), + application:set_env(fabric, db, Db), + + Dir = case config:get("fabric", "fdb_directory") of + Val when is_list(Val), length(Val) > 0 -> + [?l2b(Val)]; + _ -> + [?DEFAULT_FDB_DIRECTORY] + end, + application:set_env(fabric, ?FDB_DIRECTORY, Dir), + config:subscribe_for_changes([?TX_OPTIONS_SECTION]), + {ok, nil}. + + +terminate(_, _St) -> + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast(Msg, St) -> + {stop, {bad_cast, Msg}, St}. + + +handle_info({config_change, ?TX_OPTIONS_SECTION, _K, deleted, _}, St) -> + % Since we don't know the exact default values to reset the options + % to we recreate the db handle instead which will start with a default + % handle and re-apply all the options + {_Cluster, NewDb} = get_db_and_cluster([]), + application:set_env(fabric, db, NewDb), + {noreply, St}; + +handle_info({config_change, ?TX_OPTIONS_SECTION, K, V, _}, St) -> + {ok, Db} = application:get_env(fabric, db), + apply_tx_options(Db, [{K, V}]), + {noreply, St}; + +handle_info({gen_event_EXIT, _Handler, _Reason}, St) -> + erlang:send_after(?RELISTEN_DELAY, self(), restart_config_listener), + {noreply, St}; + +handle_info(restart_config_listener, St) -> + config:subscribe_for_changes([?TX_OPTIONS_SECTION]), + {noreply, St}; + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +fdb_directory() -> + get_env(?FDB_DIRECTORY). + +fdb_cluster() -> + get_env(?FDB_CLUSTER). + +get_env(Key) -> + case get(Key) of + undefined -> + case application:get_env(fabric, Key) of + undefined -> + erlang:error(fabric_application_not_started); + {ok, Value} -> + put(Key, Value), + Value + end; + Value -> + Value + end. + + +get_db_and_cluster(EunitDbOpts) -> + {Cluster, Db} = case application:get_env(fabric, eunit_run) of + {ok, true} -> + {<<"eunit_test">>, erlfdb_util:get_test_db(EunitDbOpts)}; + undefined -> + ClusterFileStr = config:get("erlfdb", "cluster_file", ?CLUSTER_FILE), + {ok, ConnectionStr} = file:read_file(ClusterFileStr), + DbHandle = erlfdb:open(iolist_to_binary(ClusterFileStr)), + {string:trim(ConnectionStr), DbHandle} + end, + apply_tx_options(Db, config:get(?TX_OPTIONS_SECTION)), + {Cluster, Db}. + + +apply_tx_options(Db, Cfg) -> + maps:map(fun(Option, {Type, Default}) -> + case lists:keyfind(atom_to_list(Option), 1, Cfg) of + false -> + case Default of + undefined -> ok; + _Defined -> apply_tx_option(Db, Option, Default, Type) + end; + {_K, Val} -> + apply_tx_option(Db, Option, Val, Type) + end + end, ?TX_OPTIONS). + + +apply_tx_option(Db, Option, Val, integer) -> + try + set_option(Db, Option, list_to_integer(Val)) + catch + error:badarg -> + Msg = "~p : Invalid integer tx option ~p = ~p", + couch_log:error(Msg, [?MODULE, Option, Val]) + end; + +apply_tx_option(Db, Option, Val, binary) -> + BinVal = list_to_binary(Val), + case size(BinVal) < 16 of + true -> + set_option(Db, Option, BinVal); + false -> + Msg = "~p : String tx option ~p is larger than 16 bytes", + couch_log:error(Msg, [?MODULE, Option]) + end. + + +set_option(Db, Option, Val) -> + try + erlfdb:set_option(Db, Option, Val) + catch + % This could happen if the option is not supported by erlfdb or + % fdbsever. + error:badarg -> + Msg = "~p : Could not set fdb tx option ~p = ~p", + couch_log:error(Msg, [?MODULE, Option, Val]) + end. + + +sanitize(#{} = Db) -> + Db#{ + tx := undefined, + user_ctx := #user_ctx{}, + security_fun := undefined, + interactive := false + }. diff --git a/src/fabric/src/fabric2_sup.erl b/src/fabric/src/fabric2_sup.erl new file mode 100644 index 000000000..874a8c240 --- /dev/null +++ b/src/fabric/src/fabric2_sup.erl @@ -0,0 +1,69 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_sup). +-behaviour(supervisor). +-vsn(1). + + +-export([ + start_link/1 +]). + +-export([ + init/1 +]). + + +start_link(Args) -> + supervisor:start_link({local, ?MODULE}, ?MODULE, Args). + + +init([]) -> + config:enable_feature(fdb), + Flags = {rest_for_one, 1, 5}, + Children = [ + { + fabric2_server, + {fabric2_server, start_link, []}, + permanent, + 5000, + worker, + [fabric2_server] + }, + { + fabric2_txids, + {fabric2_txids, start_link, []}, + permanent, + 5000, + worker, + [fabric2_server] + }, + { + fabric2_index, + {fabric2_index, start_link, []}, + permanent, + 5000, + worker, + [fabric2_index] + }, + { + fabric2_db_expiration, + {fabric2_db_expiration, start_link, []}, + permanent, + 5000, + worker, + [fabric2_db_expiration] + } + ], + ChildrenWithEpi = couch_epi:register_service(fabric2_epi, Children), + {ok, {Flags, ChildrenWithEpi}}. diff --git a/src/fabric/src/fabric2_txids.erl b/src/fabric/src/fabric2_txids.erl new file mode 100644 index 000000000..285e342ed --- /dev/null +++ b/src/fabric/src/fabric2_txids.erl @@ -0,0 +1,153 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_txids). +-behaviour(gen_server). +-vsn(1). + + +-export([ + start_link/0, + create/2, + remove/1 +]). + + +-export([ + init/1, + terminate/2, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3, + format_status/2 +]). + + +-include("fabric2.hrl"). + + +-define(ONE_HOUR, 3600000000). +-define(MAX_TX_IDS, 1000). + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + + +create(Tx, undefined) -> + Prefix = fabric2_fdb:get_dir(Tx), + create(Tx, Prefix); + +create(_Tx, LayerPrefix) -> + {Mega, Secs, Micro} = os:timestamp(), + Key = {?TX_IDS, Mega, Secs, Micro, fabric2_util:uuid()}, + erlfdb_tuple:pack(Key, LayerPrefix). + + +remove(TxId) when is_binary(TxId) -> + gen_server:cast(?MODULE, {remove, TxId}); + +remove(undefined) -> + ok. + + + +init(_) -> + {ok, #{ + last_sweep => os:timestamp(), + txids => [] + }}. + + +terminate(_, #{txids := TxIds}) -> + if TxIds == [] -> ok; true -> + fabric2_fdb:transactional(fun(Tx) -> + lists:foreach(fun(TxId) -> + erlfdb:clear(Tx, TxId) + end, TxIds) + end) + end, + ok. + + +handle_call(Msg, _From, St) -> + {stop, {bad_call, Msg}, {bad_call, Msg}, St}. + + +handle_cast({remove, TxId}, St) -> + #{ + last_sweep := LastSweep, + txids := TxIds + } = St, + + NewTxIds = [TxId | TxIds], + NewSt = St#{txids := NewTxIds}, + + NeedsSweep = timer:now_diff(os:timestamp(), LastSweep) > ?ONE_HOUR, + + case NeedsSweep orelse length(NewTxIds) >= ?MAX_TX_IDS of + true -> + {noreply, clean(NewSt, NeedsSweep)}; + false -> + {noreply, NewSt} + end. + + +handle_info(Msg, St) -> + {stop, {bad_info, Msg}, St}. + + +code_change(_OldVsn, St, _Extra) -> + {ok, St}. + + +format_status(_Opt, [_PDict, State]) -> + #{ + txids := TxIds + } = State, + Scrubbed = State#{ + txids => {length, length(TxIds)} + }, + [{data, [{"State", + Scrubbed + }]}]. + + +clean(St, NeedsSweep) -> + #{ + last_sweep := LastSweep, + txids := TxIds + } = St, + fabric2_fdb:transactional(fun(Tx) -> + lists:foreach(fun(TxId) -> + erlfdb:clear(Tx, TxId) + end, TxIds), + case NeedsSweep of + true -> + sweep(Tx, LastSweep), + St#{ + last_sweep := os:timestamp(), + txids := [] + }; + false -> + St#{txids := []} + end + end). + + +sweep(Tx, {Mega, Secs, Micro}) -> + Prefix = fabric2_fdb:get_dir(Tx), + StartKey = erlfdb_tuple:pack({?TX_IDS}, Prefix), + EndKey = erlfdb_tuple:pack({?TX_IDS, Mega, Secs, Micro}, Prefix), + erlfdb:set_option(Tx, next_write_no_write_conflict_range), + erlfdb:clear_range(Tx, StartKey, EndKey). diff --git a/src/fabric/src/fabric2_users_db.erl b/src/fabric/src/fabric2_users_db.erl new file mode 100644 index 000000000..9a8a462c3 --- /dev/null +++ b/src/fabric/src/fabric2_users_db.erl @@ -0,0 +1,144 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_users_db). + +-export([ + before_doc_update/3, + after_doc_read/2, + strip_non_public_fields/1 +]). + +-include_lib("couch/include/couch_db.hrl"). + +-define(NAME, <<"name">>). +-define(PASSWORD, <<"password">>). +-define(DERIVED_KEY, <<"derived_key">>). +-define(PASSWORD_SCHEME, <<"password_scheme">>). +-define(SIMPLE, <<"simple">>). +-define(PASSWORD_SHA, <<"password_sha">>). +-define(PBKDF2, <<"pbkdf2">>). +-define(ITERATIONS, <<"iterations">>). +-define(SALT, <<"salt">>). +-define(replace(L, K, V), lists:keystore(K, 1, L, {K, V})). + +-define( + DDOCS_ADMIN_ONLY, + <<"Only administrators can view design docs in the users database.">> +). + +% If the request's userCtx identifies an admin +% -> save_doc (see below) +% +% If the request's userCtx.name is null: +% -> save_doc +% // this is an anonymous user registering a new document +% // in case a user doc with the same id already exists, the anonymous +% // user will get a regular doc update conflict. +% If the request's userCtx.name doesn't match the doc's name +% -> 404 // Not Found +% Else +% -> save_doc +before_doc_update(Doc, Db, _UpdateType) -> + #user_ctx{name = Name} = fabric2_db:get_user_ctx(Db), + DocName = get_doc_name(Doc), + case (catch fabric2_db:check_is_admin(Db)) of + ok -> + save_doc(Doc); + _ when Name =:= DocName orelse Name =:= null -> + save_doc(Doc); + _ -> + throw(not_found) + end. + +% If newDoc.password == null || newDoc.password == undefined: +% -> +% noop +% Else -> // calculate password hash server side +% newDoc.password_sha = hash_pw(newDoc.password + salt) +% newDoc.salt = salt +% newDoc.password = null +save_doc(#doc{body={Body}} = Doc) -> + %% Support both schemes to smooth migration from legacy scheme + Scheme = config:get("couch_httpd_auth", "password_scheme", "pbkdf2"), + case {fabric2_util:get_value(?PASSWORD, Body), Scheme} of + {null, _} -> % server admins don't have a user-db password entry + Doc; + {undefined, _} -> + Doc; + {ClearPassword, "simple"} -> % deprecated + Salt = couch_uuids:random(), + PasswordSha = couch_passwords:simple(ClearPassword, Salt), + Body0 = ?replace(Body, ?PASSWORD_SCHEME, ?SIMPLE), + Body1 = ?replace(Body0, ?SALT, Salt), + Body2 = ?replace(Body1, ?PASSWORD_SHA, PasswordSha), + Body3 = proplists:delete(?PASSWORD, Body2), + Doc#doc{body={Body3}}; + {ClearPassword, "pbkdf2"} -> + Iterations = list_to_integer(config:get("couch_httpd_auth", "iterations", "1000")), + Salt = couch_uuids:random(), + DerivedKey = couch_passwords:pbkdf2(ClearPassword, Salt, Iterations), + Body0 = ?replace(Body, ?PASSWORD_SCHEME, ?PBKDF2), + Body1 = ?replace(Body0, ?ITERATIONS, Iterations), + Body2 = ?replace(Body1, ?DERIVED_KEY, DerivedKey), + Body3 = ?replace(Body2, ?SALT, Salt), + Body4 = proplists:delete(?PASSWORD, Body3), + Doc#doc{body={Body4}}; + {_ClearPassword, Scheme} -> + couch_log:error("[couch_httpd_auth] password_scheme value of '~p' is invalid.", [Scheme]), + throw({forbidden, "Server cannot hash passwords at this time."}) + end. + + +% If the doc is a design doc +% If the request's userCtx identifies an admin +% -> return doc +% Else +% -> 403 // Forbidden +% If the request's userCtx identifies an admin +% -> return doc +% If the request's userCtx.name doesn't match the doc's name +% -> 404 // Not Found +% Else +% -> return doc +after_doc_read(#doc{id = <<?DESIGN_DOC_PREFIX, _/binary>>} = Doc, Db) -> + case (catch fabric2_db:check_is_admin(Db)) of + ok -> Doc; + _ -> throw({forbidden, ?DDOCS_ADMIN_ONLY}) + end; +after_doc_read(Doc, Db) -> + #user_ctx{name = Name} = fabric2_db:get_user_ctx(Db), + DocName = get_doc_name(Doc), + case (catch fabric2_db:check_is_admin(Db)) of + ok -> + Doc; + _ when Name =:= DocName -> + Doc; + _ -> + Doc1 = strip_non_public_fields(Doc), + case Doc1 of + #doc{body={[]}} -> throw(not_found); + _ -> Doc1 + end + end. + + +get_doc_name(#doc{id= <<"org.couchdb.user:", Name/binary>>}) -> + Name; +get_doc_name(_) -> + undefined. + + +strip_non_public_fields(#doc{body={Props}}=Doc) -> + PublicFields = config:get("couch_httpd_auth", "public_fields", ""), + Public = re:split(PublicFields, "\\s*,\\s*", [{return, binary}]), + Doc#doc{body={[{K, V} || {K, V} <- Props, lists:member(K, Public)]}}. diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl new file mode 100644 index 000000000..136762b34 --- /dev/null +++ b/src/fabric/src/fabric2_util.erl @@ -0,0 +1,405 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_util). + + +-export([ + revinfo_to_revs/1, + revinfo_to_path/1, + sort_revinfos/1, + rev_size/1, + ldoc_size/1, + + seq_zero_vs/0, + seq_max_vs/0, + + user_ctx_to_json/1, + + validate_security_object/1, + + hash_atts/1, + + dbname_ends_with/2, + + get_value/2, + get_value/3, + to_hex/1, + from_hex/1, + uuid/0, + + encode_all_doc_key/1, + all_docs_view_opts/1, + + iso8601_timestamp/0, + now/1, + do_recovery/0, + + pmap/2, + pmap/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +revinfo_to_revs(RevInfo) -> + #{ + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevInfo, + {RevPos, [Rev | RevPath]}. + + +revinfo_to_path(RevInfo) -> + #{ + rev_id := {RevPos, Rev}, + rev_path := RevPath + } = RevInfo, + Revs = lists:reverse(RevPath, [Rev]), + Path = revinfo_to_path(RevInfo, Revs), + {RevPos - length(Revs) + 1, Path}. + + +revinfo_to_path(RevInfo, [Rev]) -> + {Rev, RevInfo, []}; + +revinfo_to_path(RevInfo, [Rev | Rest]) -> + {Rev, ?REV_MISSING, [revinfo_to_path(RevInfo, Rest)]}. + + +sort_revinfos(RevInfos) -> + CmpFun = fun(A, B) -> rev_sort_key(A) > rev_sort_key(B) end, + lists:sort(CmpFun, RevInfos). + + +rev_sort_key(#{} = RevInfo) -> + #{ + deleted := Deleted, + rev_id := {RevPos, Rev} + } = RevInfo, + {not Deleted, RevPos, Rev}. + + +rev_size(#doc{} = Doc) -> + #doc{ + id = Id, + revs = Revs, + body = Body, + atts = Atts + } = Doc, + + {Start, Rev} = case Revs of + {0, []} -> {0, <<>>}; + {N, [RevId | _]} -> {N, RevId} + end, + + lists:sum([ + size(Id), + size(erlfdb_tuple:pack({Start})), + size(Rev), + 1, % FDB tuple encoding of booleans for deleted flag is 1 byte + couch_ejson_size:encoded_size(Body), + lists:foldl(fun(Att, Acc) -> + couch_att:external_size(Att) + Acc + end, 0, Atts) + ]). + + +ldoc_size(#doc{id = <<"_local/", _/binary>>} = Doc) -> + #doc{ + id = Id, + revs = {0, [Rev]}, + deleted = Deleted, + body = Body + } = Doc, + + StoreRev = case Rev of + _ when is_integer(Rev) -> integer_to_binary(Rev); + _ when is_binary(Rev) -> Rev + end, + + case Deleted of + true -> + 0; + false -> + lists:sum([ + size(Id), + size(StoreRev), + couch_ejson_size:encoded_size(Body) + ]) + end. + + +seq_zero_vs() -> + {versionstamp, 0, 0, 0}. + + +seq_max_vs() -> + {versionstamp, 18446744073709551615, 65535, 65535}. + + +user_ctx_to_json(Db) -> + UserCtx = fabric2_db:get_user_ctx(Db), + {[ + {<<"db">>, fabric2_db:name(Db)}, + {<<"name">>, UserCtx#user_ctx.name}, + {<<"roles">>, UserCtx#user_ctx.roles} + ]}. + + +validate_security_object({SecProps}) -> + Admins = get_value(<<"admins">>, SecProps, {[]}), + ok = validate_names_and_roles(Admins), + + % we fallback to readers here for backwards compatibility + Readers = get_value(<<"readers">>, SecProps, {[]}), + Members = get_value(<<"members">>, SecProps, Readers), + ok = validate_names_and_roles(Members). + + +validate_names_and_roles({Props}) when is_list(Props) -> + validate_json_list_of_strings(<<"names">>, Props), + validate_json_list_of_strings(<<"roles">>, Props); +validate_names_and_roles(_) -> + throw("admins or members must be a JSON list of strings"). + + +validate_json_list_of_strings(Member, Props) -> + case get_value(Member, Props, []) of + Values when is_list(Values) -> + NonBinary = lists:filter(fun(V) -> not is_binary(V) end, Values), + if NonBinary == [] -> ok; true -> + MemberStr = binary_to_list(Member), + throw(MemberStr ++ " must be a JSON list of strings") + end; + _ -> + MemberStr = binary_to_list(Member), + throw(MemberStr ++ " must be a JSON list of strings") + end. + + +hash_atts([]) -> + <<>>; + +hash_atts(Atts) -> + SortedAtts = lists:sort(fun(A, B) -> + couch_att:fetch(name, A) =< couch_att:fetch(name, B) + end, Atts), + Md5St = lists:foldl(fun(Att, Acc) -> + {loc, _Db, _DocId, AttId} = couch_att:fetch(data, Att), + couch_hash:md5_hash_update(Acc, AttId) + end, couch_hash:md5_hash_init(), SortedAtts), + couch_hash:md5_hash_final(Md5St). + + +dbname_ends_with(#{} = Db, Suffix) -> + dbname_ends_with(fabric2_db:name(Db), Suffix); + +dbname_ends_with(DbName, Suffix) when is_binary(DbName), is_binary(Suffix) -> + Suffix == filename:basename(DbName). + + +get_value(Key, List) -> + get_value(Key, List, undefined). + + +get_value(Key, List, Default) -> + case lists:keysearch(Key, 1, List) of + {value, {Key,Value}} -> + Value; + false -> + Default + end. + + +to_hex(Bin) -> + list_to_binary(to_hex_int(Bin)). + + +to_hex_int(<<>>) -> + []; +to_hex_int(<<Hi:4, Lo:4, Rest/binary>>) -> + [nibble_to_hex(Hi), nibble_to_hex(Lo) | to_hex(Rest)]. + + +nibble_to_hex(I) -> + case I of + 0 -> $0; + 1 -> $1; + 2 -> $2; + 3 -> $3; + 4 -> $4; + 5 -> $5; + 6 -> $6; + 7 -> $7; + 8 -> $8; + 9 -> $9; + 10 -> $a; + 11 -> $b; + 12 -> $c; + 13 -> $d; + 14 -> $e; + 15 -> $f + end. + + +from_hex(Bin) -> + iolist_to_binary(from_hex_int(Bin)). + + +from_hex_int(<<>>) -> + []; +from_hex_int(<<Hi:8, Lo:8, RestBinary/binary>>) -> + HiNib = hex_to_nibble(Hi), + LoNib = hex_to_nibble(Lo), + [<<HiNib:4, LoNib:4>> | from_hex_int(RestBinary)]; +from_hex_int(<<BadHex/binary>>) -> + erlang:error({invalid_hex, BadHex}). + + +hex_to_nibble(N) -> + case N of + $0 -> 0; + $1 -> 1; + $2 -> 2; + $3 -> 3; + $4 -> 4; + $5 -> 5; + $6 -> 6; + $7 -> 7; + $8 -> 8; + $9 -> 9; + $a -> 10; + $A -> 10; + $b -> 11; + $B -> 11; + $c -> 12; + $C -> 12; + $d -> 13; + $D -> 13; + $e -> 14; + $E -> 14; + $f -> 15; + $F -> 15; + _ -> erlang:error({invalid_hex, N}) + end. + + +uuid() -> + to_hex(crypto:strong_rand_bytes(16)). + + +encode_all_doc_key(B) when is_binary(B) -> B; +encode_all_doc_key(Term) when Term < <<>> -> <<>>; +encode_all_doc_key(_) -> <<255>>. + + +all_docs_view_opts(#mrargs{} = Args) -> + NS = couch_util:get_value(namespace, Args#mrargs.extra), + StartKey = case Args#mrargs.start_key of + undefined -> Args#mrargs.start_key_docid; + SKey -> SKey + end, + EndKey = case Args#mrargs.end_key of + undefined -> Args#mrargs.end_key_docid; + EKey -> EKey + end, + StartKeyOpts = case StartKey of + undefined -> []; + _ -> [{start_key, encode_all_doc_key(StartKey)}] + end, + EndKeyOpts = case {EndKey, Args#mrargs.inclusive_end} of + {undefined, _} -> []; + {_, false} -> [{end_key_gt, encode_all_doc_key(EndKey)}]; + {_, true} -> [{end_key, encode_all_doc_key(EndKey)}] + end, + + DocOpts = case Args#mrargs.conflicts of + true -> [conflicts | Args#mrargs.doc_options]; + _ -> Args#mrargs.doc_options + end, + + [ + {dir, Args#mrargs.direction}, + {limit, Args#mrargs.limit}, + {skip, Args#mrargs.skip}, + {update_seq, Args#mrargs.update_seq}, + {namespace, NS}, + {include_docs, Args#mrargs.include_docs}, + {doc_opts, DocOpts} + ] ++ StartKeyOpts ++ EndKeyOpts. + + +iso8601_timestamp() -> + Now = os:timestamp(), + {{Year, Month, Date}, {Hour, Minute, Second}} = + calendar:now_to_datetime(Now), + Format = "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0BZ", + io_lib:format(Format, [Year, Month, Date, Hour, Minute, Second]). + + +now(ms) -> + {Mega, Sec, Micro} = os:timestamp(), + (Mega * 1000000 + Sec) * 1000 + round(Micro / 1000); +now(sec) -> + now(ms) div 1000. + + +do_recovery() -> + config:get_boolean("couchdb", + "enable_database_recovery", false). + + +pmap(Fun, Args) -> + pmap(Fun, Args, []). + + +pmap(Fun, Args, Opts) -> + Refs = lists:map(fun(Arg) -> + {_, Ref} = spawn_monitor(fun() -> exit(pmap_exec(Fun, Arg)) end), + Ref + end, Args), + Timeout = fabric2_util:get_value(timeout, Opts, 5000), + lists:map(fun(Ref) -> + receive + {'DOWN', Ref, _, _, {'$res', Res}} -> + Res; + {'DOWN', Ref, _, _, {'$err', Tag, Reason, Stack}} -> + erlang:raise(Tag, Reason, Stack) + after Timeout -> + error({pmap_timeout, Timeout}) + end + end, Refs). + + +% OTP_RELEASE is defined in OTP 21+ only +-ifdef(OTP_RELEASE). + +pmap_exec(Fun, Arg) -> + try + {'$res', Fun(Arg)} + catch Tag:Reason:Stack -> + {'$err', Tag, Reason, Stack} + end. + +-else. + +pmap_exec(Fun, Arg) -> + try + {'$res', Fun(Arg)} + catch Tag:Reason -> + {'$err', Tag, Reason, erlang:get_stacktrace()} + end. + +-endif. diff --git a/src/fabric/src/fabric_db_create.erl b/src/fabric/src/fabric_db_create.erl index 03fabb4ea..a2833e6aa 100644 --- a/src/fabric/src/fabric_db_create.erl +++ b/src/fabric/src/fabric_db_create.erl @@ -185,44 +185,44 @@ make_document([#shard{dbname=DbName}|_] = Shards, Suffix, Options) -> db_exists(DbName) -> is_list(catch mem3:shards(DbName)). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -db_exists_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - [ - fun db_exists_for_existing_db/0, - fun db_exists_for_missing_db/0 - ] - }. - - -setup_all() -> - meck:new(mem3). - - -teardown_all(_) -> - meck:unload(). - - -db_exists_for_existing_db() -> - Mock = fun(DbName) when is_binary(DbName) -> - [#shard{dbname = DbName, range = [0,100]}] - end, - ok = meck:expect(mem3, shards, Mock), - ?assertEqual(true, db_exists(<<"foobar">>)), - ?assertEqual(true, meck:validate(mem3)). - - -db_exists_for_missing_db() -> - Mock = fun(DbName) -> - erlang:error(database_does_not_exist, DbName) - end, - ok = meck:expect(mem3, shards, Mock), - ?assertEqual(false, db_exists(<<"foobar">>)), - ?assertEqual(false, meck:validate(mem3)). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% db_exists_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% [ +%% fun db_exists_for_existing_db/0, +%% fun db_exists_for_missing_db/0 +%% ] +%% }. +%% +%% +%% setup_all() -> +%% meck:new(mem3). +%% +%% +%% teardown_all(_) -> +%% meck:unload(). +%% +%% +%% db_exists_for_existing_db() -> +%% Mock = fun(DbName) when is_binary(DbName) -> +%% [#shard{dbname = DbName, range = [0,100]}] +%% end, +%% ok = meck:expect(mem3, shards, Mock), +%% ?assertEqual(true, db_exists(<<"foobar">>)), +%% ?assertEqual(true, meck:validate(mem3)). +%% +%% +%% db_exists_for_missing_db() -> +%% Mock = fun(DbName) -> +%% erlang:error(database_does_not_exist, DbName) +%% end, +%% ok = meck:expect(mem3, shards, Mock), +%% ?assertEqual(false, db_exists(<<"foobar">>)), +%% ?assertEqual(false, meck:validate(mem3)). +%% +%% -endif. diff --git a/src/fabric/src/fabric_db_info.erl b/src/fabric/src/fabric_db_info.erl index 40da678e5..6c7d2d177 100644 --- a/src/fabric/src/fabric_db_info.erl +++ b/src/fabric/src/fabric_db_info.erl @@ -138,34 +138,34 @@ get_cluster_info(Shards) -> {ok, [{q, Q}, {n, N}, {w, WR}, {r, WR}]}. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -get_cluster_info_test_() -> - { - setup, - fun setup/0, - fun get_cluster_info_test_generator/1 - }. - - -setup() -> - Quorums = [1, 2, 3], - Shards = [1, 3, 5, 8, 12, 24], - [{N, Q} || N <- Quorums, Q <- Shards]. - -get_cluster_info_test_generator([]) -> - []; -get_cluster_info_test_generator([{N, Q} | Rest]) -> - {generator, - fun() -> - Nodes = lists:seq(1, 8), - Shards = mem3_util:create_partition_map(<<"foo">>, N, Q, Nodes), - {ok, Info} = get_cluster_info(Shards), - [ - ?_assertEqual(N, couch_util:get_value(n, Info)), - ?_assertEqual(Q, couch_util:get_value(q, Info)) - ] ++ get_cluster_info_test_generator(Rest) - end}. - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% get_cluster_info_test_() -> +%% { +%% setup, +%% fun setup/0, +%% fun get_cluster_info_test_generator/1 +%% }. +%% +%% +%% setup() -> +%% Quorums = [1, 2, 3], +%% Shards = [1, 3, 5, 8, 12, 24], +%% [{N, Q} || N <- Quorums, Q <- Shards]. +%% +%% get_cluster_info_test_generator([]) -> +%% []; +%% get_cluster_info_test_generator([{N, Q} | Rest]) -> +%% {generator, +%% fun() -> +%% Nodes = lists:seq(1, 8), +%% Shards = mem3_util:create_partition_map(<<"foo">>, N, Q, Nodes), +%% {ok, Info} = get_cluster_info(Shards), +%% [ +%% ?_assertEqual(N, couch_util:get_value(n, Info)), +%% ?_assertEqual(Q, couch_util:get_value(q, Info)) +%% ] ++ get_cluster_info_test_generator(Rest) +%% end}. +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_open.erl b/src/fabric/src/fabric_doc_open.erl index 8ef604b60..fe3a79a1f 100644 --- a/src/fabric/src/fabric_doc_open.erl +++ b/src/fabric/src/fabric_doc_open.erl @@ -182,429 +182,429 @@ format_reply(Else, _) -> Else. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - --define(MECK_MODS, [ - couch_log, - couch_stats, - fabric, - fabric_util, - mem3, - rexi, - rexi_monitor -]). - - -setup_all() -> - meck:new(?MECK_MODS, [passthrough]). - - -teardown_all(_) -> - meck:unload(). - - -setup() -> - meck:reset(?MECK_MODS). - - -teardown(_) -> - ok. - - -open_doc_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - t_is_r_met(), - t_handle_message_down(), - t_handle_message_exit(), - t_handle_message_reply(), - t_store_node_revs(), - t_read_repair(), - t_handle_response_quorum_met(), - t_get_doc_info() - ] - } - }. - - -t_is_r_met() -> - ?_test(begin - Workers0 = [], - Workers1 = [nil], - Workers2 = [nil, nil], - - SuccessCases = [ - {{true, foo}, [fabric_util:kv(foo, 2)], 2}, - {{true, foo}, [fabric_util:kv(foo, 3)], 2}, - {{true, foo}, [fabric_util:kv(foo, 1)], 1}, - {{true, foo}, [fabric_util:kv(foo, 2), fabric_util:kv(bar, 1)], 2}, - {{true, bar}, [fabric_util:kv(bar, 1), fabric_util:kv(bar, 2)], 2}, - {{true, bar}, [fabric_util:kv(bar, 2), fabric_util:kv(foo, 1)], 2} - ], - lists:foreach(fun({Expect, Replies, Q}) -> - ?assertEqual(Expect, is_r_met(Workers0, Replies, Q)) - end, SuccessCases), - - WaitForMoreCases = [ - {[fabric_util:kv(foo, 1)], 2}, - {[fabric_util:kv(foo, 2)], 3}, - {[fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2} - ], - lists:foreach(fun({Replies, Q}) -> - ?assertEqual(wait_for_more, is_r_met(Workers2, Replies, Q)) - end, WaitForMoreCases), - - FailureCases = [ - {Workers0, [fabric_util:kv(foo, 1)], 2}, - {Workers1, [fabric_util:kv(foo, 1)], 2}, - {Workers1, [fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2}, - {Workers1, [fabric_util:kv(foo, 2)], 3} - ], - lists:foreach(fun({Workers, Replies, Q}) -> - ?assertEqual(no_more_workers, is_r_met(Workers, Replies, Q)) - end, FailureCases) - end). - - -t_handle_message_down() -> - Node0 = 'foo@localhost', - Node1 = 'bar@localhost', - Down0 = {rexi_DOWN, nil, {nil, Node0}, nil}, - Down1 = {rexi_DOWN, nil, {nil, Node1}, nil}, - Workers0 = [#shard{node=Node0} || _ <- [a, b]], - Worker1 = #shard{node=Node1}, - Workers1 = Workers0 ++ [Worker1], - - ?_test(begin - % Stop when no more workers are left - ?assertEqual( - {stop, #acc{workers=[]}}, - handle_message(Down0, nil, #acc{workers=Workers0}) - ), - - % Continue when we have more workers - ?assertEqual( - {ok, #acc{workers=[Worker1]}}, - handle_message(Down0, nil, #acc{workers=Workers1}) - ), - - % A second DOWN removes the remaining workers - ?assertEqual( - {stop, #acc{workers=[]}}, - handle_message(Down1, nil, #acc{workers=[Worker1]}) - ) - end). - - -t_handle_message_exit() -> - Exit = {rexi_EXIT, nil}, - Worker0 = #shard{ref=erlang:make_ref()}, - Worker1 = #shard{ref=erlang:make_ref()}, - - ?_test(begin - % Only removes the specified worker - ?assertEqual( - {ok, #acc{workers=[Worker1]}}, - handle_message(Exit, Worker0, #acc{workers=[Worker0, Worker1]}) - ), - - ?assertEqual( - {ok, #acc{workers=[Worker0]}}, - handle_message(Exit, Worker1, #acc{workers=[Worker0, Worker1]}) - ), - - % We bail if it was the last worker - ?assertEqual( - {stop, #acc{workers=[]}}, - handle_message(Exit, Worker0, #acc{workers=[Worker0]}) - ) - end). - - -t_handle_message_reply() -> - Worker0 = #shard{ref=erlang:make_ref()}, - Worker1 = #shard{ref=erlang:make_ref()}, - Worker2 = #shard{ref=erlang:make_ref()}, - Workers = [Worker0, Worker1, Worker2], - Acc0 = #acc{workers=Workers, r=2, replies=[]}, - - ?_test(begin - meck:expect(rexi, kill_all, fun(_) -> ok end), - - % Test that we continue when we haven't met R yet - ?assertMatch( - {ok, #acc{ - workers=[Worker0, Worker1], - replies=[{foo, {foo, 1}}] - }}, - handle_message(foo, Worker2, Acc0) - ), - - ?assertMatch( - {ok, #acc{ - workers=[Worker0, Worker1], - replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] - }}, - handle_message(bar, Worker2, Acc0#acc{ - replies=[{foo, {foo, 1}}] - }) - ), - - % Test that we don't get a quorum when R isn't met. q_reply - % isn't set and state remains unchanged and {stop, NewAcc} - % is returned. Bit subtle on the assertions here. - - ?assertMatch( - {stop, #acc{workers=[], replies=[{foo, {foo, 1}}]}}, - handle_message(foo, Worker0, Acc0#acc{workers=[Worker0]}) - ), - - ?assertMatch( - {stop, #acc{ - workers=[], - replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] - }}, - handle_message(bar, Worker0, Acc0#acc{ - workers=[Worker0], - replies=[{foo, {foo, 1}}] - }) - ), - - % Check that when R is met we stop with a new state and - % a q_reply. - - ?assertMatch( - {stop, #acc{ - workers=[], - replies=[{foo, {foo, 2}}], - state=r_met, - q_reply=foo - }}, - handle_message(foo, Worker1, Acc0#acc{ - workers=[Worker0, Worker1], - replies=[{foo, {foo, 1}}] - }) - ), - - ?assertEqual( - {stop, #acc{ - workers=[], - r=1, - replies=[{foo, {foo, 1}}], - state=r_met, - q_reply=foo - }}, - handle_message(foo, Worker0, Acc0#acc{r=1}) - ), - - ?assertMatch( - {stop, #acc{ - workers=[], - replies=[{bar, {bar, 1}}, {foo, {foo, 2}}], - state=r_met, - q_reply=foo - }}, - handle_message(foo, Worker0, Acc0#acc{ - workers=[Worker0], - replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] - }) - ) - end). - - -t_store_node_revs() -> - W1 = #shard{node = w1, ref = erlang:make_ref()}, - W2 = #shard{node = w2, ref = erlang:make_ref()}, - W3 = #shard{node = w3, ref = erlang:make_ref()}, - Foo1 = {ok, #doc{id = <<"bar">>, revs = {1, [<<"foo">>]}}}, - Foo2 = {ok, #doc{id = <<"bar">>, revs = {2, [<<"foo2">>, <<"foo">>]}}}, - NFM = {not_found, missing}, - - InitAcc = #acc{workers = [W1, W2, W3], replies = [], r = 2}, - - ?_test(begin - meck:expect(rexi, kill_all, fun(_) -> ok end), - - % Simple case - {ok, #acc{node_revs = NodeRevs1}} = handle_message(Foo1, W1, InitAcc), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs1), - - % Make sure we only hold the head rev - {ok, #acc{node_revs = NodeRevs2}} = handle_message(Foo2, W1, InitAcc), - ?assertEqual([{w1, [{2, <<"foo2">>}]}], NodeRevs2), - - % Make sure we don't capture anything on error - {ok, #acc{node_revs = NodeRevs3}} = handle_message(NFM, W1, InitAcc), - ?assertEqual([], NodeRevs3), - - % Make sure we accumulate node revs - Acc1 = InitAcc#acc{node_revs = [{w1, [{1, <<"foo">>}]}]}, - {ok, #acc{node_revs = NodeRevs4}} = handle_message(Foo2, W2, Acc1), - ?assertEqual( - [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], - NodeRevs4 - ), - - % Make sure rexi_DOWN doesn't modify node_revs - Down = {rexi_DOWN, nil, {nil, w1}, nil}, - {ok, #acc{node_revs = NodeRevs5}} = handle_message(Down, W2, Acc1), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs5), - - % Make sure rexi_EXIT doesn't modify node_revs - Exit = {rexi_EXIT, reason}, - {ok, #acc{node_revs = NodeRevs6}} = handle_message(Exit, W2, Acc1), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs6), - - % Make sure an error doesn't remove any node revs - {ok, #acc{node_revs = NodeRevs7}} = handle_message(NFM, W2, Acc1), - ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs7), - - % Make sure we have all of our node_revs when meeting - % quorum - {ok, Acc2} = handle_message(Foo1, W1, InitAcc), - {ok, Acc3} = handle_message(Foo2, W2, Acc2), - {stop, Acc4} = handle_message(NFM, W3, Acc3), - ?assertEqual( - [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], - Acc4#acc.node_revs - ) - end). - - -t_read_repair() -> - Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, - Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, - NFM = {not_found, missing}, - - ?_test(begin - meck:expect(couch_log, notice, fun(_, _) -> ok end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - - % Test when we have actual doc data to repair - meck:expect(fabric, update_docs, fun(_, [_], _) -> {ok, []} end), - Acc0 = #acc{ - dbname = <<"name">>, - replies = [fabric_util:kv(Foo1,1)] - }, - ?assertEqual(Foo1, read_repair(Acc0)), - - meck:expect(fabric, update_docs, fun(_, [_, _], _) -> {ok, []} end), - Acc1 = #acc{ - dbname = <<"name">>, - replies = [fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,1)] - }, - ?assertEqual(Foo2, read_repair(Acc1)), - - % Test when we have nothing but errors - Acc2 = #acc{replies=[fabric_util:kv(NFM, 1)]}, - ?assertEqual(NFM, read_repair(Acc2)), - - Acc3 = #acc{replies=[fabric_util:kv(NFM,1), fabric_util:kv(foo,2)]}, - ?assertEqual(NFM, read_repair(Acc3)), - - Acc4 = #acc{replies=[fabric_util:kv(foo,1), fabric_util:kv(bar,1)]}, - ?assertEqual(bar, read_repair(Acc4)) - end). - - -t_handle_response_quorum_met() -> - Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, - Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, - Bar1 = {ok, #doc{revs = {1,[<<"bar">>]}}}, - - ?_test(begin - meck:expect(couch_log, notice, fun(_, _) -> ok end), - meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - - BasicOkAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,2)], - q_reply=Foo1 - }, - ?assertEqual(Foo1, handle_response(BasicOkAcc)), - - WithAncestorsAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,2)], - q_reply=Foo2 - }, - ?assertEqual(Foo2, handle_response(WithAncestorsAcc)), - - % This also checks when the quorum isn't the most recent - % revision. - DeeperWinsAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,2), fabric_util:kv(Foo2,1)], - q_reply=Foo1 - }, - ?assertEqual(Foo2, handle_response(DeeperWinsAcc)), - - % Check that we return the proper doc based on rev - % (ie, pos is equal) - BiggerRevWinsAcc = #acc{ - state=r_met, - replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Bar1,2)], - q_reply=Bar1 - }, - ?assertEqual(Foo1, handle_response(BiggerRevWinsAcc)) - - % r_not_met is a proxy to read_repair so we rely on - % read_repair_test for those conditions. - end). - - -t_get_doc_info() -> - ?_test(begin - meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - meck:expect(fabric_util, submit_jobs, fun(_, _, _) -> ok end), - meck:expect(fabric_util, create_monitors, fun(_) -> ok end), - meck:expect(rexi_monitor, stop, fun(_) -> ok end), - meck:expect(mem3, shards, fun(_, _) -> ok end), - meck:expect(mem3, n, fun(_) -> 3 end), - meck:expect(mem3, quorum, fun(_) -> 2 end), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - {ok, #acc{state = r_not_met}} - end), - Rsp1 = fabric_doc_open:go("test", "one", [doc_info]), - ?assertEqual({error, quorum_not_met}, Rsp1), - - Rsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), - ?assertEqual({error, quorum_not_met}, Rsp2), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - {ok, #acc{state = r_met, q_reply = not_found}} - end), - MissingRsp1 = fabric_doc_open:go("test", "one", [doc_info]), - ?assertEqual({not_found, missing}, MissingRsp1), - MissingRsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), - ?assertEqual({not_found, missing}, MissingRsp2), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - A = #doc_info{}, - {ok, #acc{state = r_met, q_reply = {ok, A}}} - end), - {ok, Rec1} = fabric_doc_open:go("test", "one", [doc_info]), - ?assert(is_record(Rec1, doc_info)), - - meck:expect(fabric_util, recv, fun(_, _, _, _) -> - A = #full_doc_info{deleted = true}, - {ok, #acc{state = r_met, q_reply = {ok, A}}} - end), - Rsp3 = fabric_doc_open:go("test", "one", [{doc_info, full}]), - ?assertEqual({not_found, deleted}, Rsp3), - {ok, Rec2} = fabric_doc_open:go("test", "one", [{doc_info, full},deleted]), - ?assert(is_record(Rec2, full_doc_info)) - end). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% -define(MECK_MODS, [ +%% couch_log, +%% couch_stats, +%% fabric, +%% fabric_util, +%% mem3, +%% rexi, +%% rexi_monitor +%% ]). +%% +%% +%% setup_all() -> +%% meck:new(?MECK_MODS, [passthrough]). +%% +%% +%% teardown_all(_) -> +%% meck:unload(). +%% +%% +%% setup() -> +%% meck:reset(?MECK_MODS). +%% +%% +%% teardown(_) -> +%% ok. +%% +%% +%% open_doc_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% { +%% foreach, +%% fun setup/0, +%% fun teardown/1, +%% [ +%% t_is_r_met(), +%% t_handle_message_down(), +%% t_handle_message_exit(), +%% t_handle_message_reply(), +%% t_store_node_revs(), +%% t_read_repair(), +%% t_handle_response_quorum_met(), +%% t_get_doc_info() +%% ] +%% } +%% }. +%% +%% +%% t_is_r_met() -> +%% ?_test(begin +%% Workers0 = [], +%% Workers1 = [nil], +%% Workers2 = [nil, nil], +%% +%% SuccessCases = [ +%% {{true, foo}, [fabric_util:kv(foo, 2)], 2}, +%% {{true, foo}, [fabric_util:kv(foo, 3)], 2}, +%% {{true, foo}, [fabric_util:kv(foo, 1)], 1}, +%% {{true, foo}, [fabric_util:kv(foo, 2), fabric_util:kv(bar, 1)], 2}, +%% {{true, bar}, [fabric_util:kv(bar, 1), fabric_util:kv(bar, 2)], 2}, +%% {{true, bar}, [fabric_util:kv(bar, 2), fabric_util:kv(foo, 1)], 2} +%% ], +%% lists:foreach(fun({Expect, Replies, Q}) -> +%% ?assertEqual(Expect, is_r_met(Workers0, Replies, Q)) +%% end, SuccessCases), +%% +%% WaitForMoreCases = [ +%% {[fabric_util:kv(foo, 1)], 2}, +%% {[fabric_util:kv(foo, 2)], 3}, +%% {[fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2} +%% ], +%% lists:foreach(fun({Replies, Q}) -> +%% ?assertEqual(wait_for_more, is_r_met(Workers2, Replies, Q)) +%% end, WaitForMoreCases), +%% +%% FailureCases = [ +%% {Workers0, [fabric_util:kv(foo, 1)], 2}, +%% {Workers1, [fabric_util:kv(foo, 1)], 2}, +%% {Workers1, [fabric_util:kv(foo, 1), fabric_util:kv(bar, 1)], 2}, +%% {Workers1, [fabric_util:kv(foo, 2)], 3} +%% ], +%% lists:foreach(fun({Workers, Replies, Q}) -> +%% ?assertEqual(no_more_workers, is_r_met(Workers, Replies, Q)) +%% end, FailureCases) +%% end). +%% +%% +%% t_handle_message_down() -> +%% Node0 = 'foo@localhost', +%% Node1 = 'bar@localhost', +%% Down0 = {rexi_DOWN, nil, {nil, Node0}, nil}, +%% Down1 = {rexi_DOWN, nil, {nil, Node1}, nil}, +%% Workers0 = [#shard{node=Node0} || _ <- [a, b]], +%% Worker1 = #shard{node=Node1}, +%% Workers1 = Workers0 ++ [Worker1], +%% +%% ?_test(begin +%% % Stop when no more workers are left +%% ?assertEqual( +%% {stop, #acc{workers=[]}}, +%% handle_message(Down0, nil, #acc{workers=Workers0}) +%% ), +%% +%% % Continue when we have more workers +%% ?assertEqual( +%% {ok, #acc{workers=[Worker1]}}, +%% handle_message(Down0, nil, #acc{workers=Workers1}) +%% ), +%% +%% % A second DOWN removes the remaining workers +%% ?assertEqual( +%% {stop, #acc{workers=[]}}, +%% handle_message(Down1, nil, #acc{workers=[Worker1]}) +%% ) +%% end). +%% +%% +%% t_handle_message_exit() -> +%% Exit = {rexi_EXIT, nil}, +%% Worker0 = #shard{ref=erlang:make_ref()}, +%% Worker1 = #shard{ref=erlang:make_ref()}, +%% +%% ?_test(begin +%% % Only removes the specified worker +%% ?assertEqual( +%% {ok, #acc{workers=[Worker1]}}, +%% handle_message(Exit, Worker0, #acc{workers=[Worker0, Worker1]}) +%% ), +%% +%% ?assertEqual( +%% {ok, #acc{workers=[Worker0]}}, +%% handle_message(Exit, Worker1, #acc{workers=[Worker0, Worker1]}) +%% ), +%% +%% % We bail if it was the last worker +%% ?assertEqual( +%% {stop, #acc{workers=[]}}, +%% handle_message(Exit, Worker0, #acc{workers=[Worker0]}) +%% ) +%% end). +%% +%% +%% t_handle_message_reply() -> +%% Worker0 = #shard{ref=erlang:make_ref()}, +%% Worker1 = #shard{ref=erlang:make_ref()}, +%% Worker2 = #shard{ref=erlang:make_ref()}, +%% Workers = [Worker0, Worker1, Worker2], +%% Acc0 = #acc{workers=Workers, r=2, replies=[]}, +%% +%% ?_test(begin +%% meck:expect(rexi, kill_all, fun(_) -> ok end), +%% +%% % Test that we continue when we haven't met R yet +%% ?assertMatch( +%% {ok, #acc{ +%% workers=[Worker0, Worker1], +%% replies=[{foo, {foo, 1}}] +%% }}, +%% handle_message(foo, Worker2, Acc0) +%% ), +%% +%% ?assertMatch( +%% {ok, #acc{ +%% workers=[Worker0, Worker1], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] +%% }}, +%% handle_message(bar, Worker2, Acc0#acc{ +%% replies=[{foo, {foo, 1}}] +%% }) +%% ), +%% +%% % Test that we don't get a quorum when R isn't met. q_reply +%% % isn't set and state remains unchanged and {stop, NewAcc} +%% % is returned. Bit subtle on the assertions here. +%% +%% ?assertMatch( +%% {stop, #acc{workers=[], replies=[{foo, {foo, 1}}]}}, +%% handle_message(foo, Worker0, Acc0#acc{workers=[Worker0]}) +%% ), +%% +%% ?assertMatch( +%% {stop, #acc{ +%% workers=[], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] +%% }}, +%% handle_message(bar, Worker0, Acc0#acc{ +%% workers=[Worker0], +%% replies=[{foo, {foo, 1}}] +%% }) +%% ), +%% +%% % Check that when R is met we stop with a new state and +%% % a q_reply. +%% +%% ?assertMatch( +%% {stop, #acc{ +%% workers=[], +%% replies=[{foo, {foo, 2}}], +%% state=r_met, +%% q_reply=foo +%% }}, +%% handle_message(foo, Worker1, Acc0#acc{ +%% workers=[Worker0, Worker1], +%% replies=[{foo, {foo, 1}}] +%% }) +%% ), +%% +%% ?assertEqual( +%% {stop, #acc{ +%% workers=[], +%% r=1, +%% replies=[{foo, {foo, 1}}], +%% state=r_met, +%% q_reply=foo +%% }}, +%% handle_message(foo, Worker0, Acc0#acc{r=1}) +%% ), +%% +%% ?assertMatch( +%% {stop, #acc{ +%% workers=[], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 2}}], +%% state=r_met, +%% q_reply=foo +%% }}, +%% handle_message(foo, Worker0, Acc0#acc{ +%% workers=[Worker0], +%% replies=[{bar, {bar, 1}}, {foo, {foo, 1}}] +%% }) +%% ) +%% end). +%% +%% +%% t_store_node_revs() -> +%% W1 = #shard{node = w1, ref = erlang:make_ref()}, +%% W2 = #shard{node = w2, ref = erlang:make_ref()}, +%% W3 = #shard{node = w3, ref = erlang:make_ref()}, +%% Foo1 = {ok, #doc{id = <<"bar">>, revs = {1, [<<"foo">>]}}}, +%% Foo2 = {ok, #doc{id = <<"bar">>, revs = {2, [<<"foo2">>, <<"foo">>]}}}, +%% NFM = {not_found, missing}, +%% +%% InitAcc = #acc{workers = [W1, W2, W3], replies = [], r = 2}, +%% +%% ?_test(begin +%% meck:expect(rexi, kill_all, fun(_) -> ok end), +%% +%% % Simple case +%% {ok, #acc{node_revs = NodeRevs1}} = handle_message(Foo1, W1, InitAcc), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs1), +%% +%% % Make sure we only hold the head rev +%% {ok, #acc{node_revs = NodeRevs2}} = handle_message(Foo2, W1, InitAcc), +%% ?assertEqual([{w1, [{2, <<"foo2">>}]}], NodeRevs2), +%% +%% % Make sure we don't capture anything on error +%% {ok, #acc{node_revs = NodeRevs3}} = handle_message(NFM, W1, InitAcc), +%% ?assertEqual([], NodeRevs3), +%% +%% % Make sure we accumulate node revs +%% Acc1 = InitAcc#acc{node_revs = [{w1, [{1, <<"foo">>}]}]}, +%% {ok, #acc{node_revs = NodeRevs4}} = handle_message(Foo2, W2, Acc1), +%% ?assertEqual( +%% [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], +%% NodeRevs4 +%% ), +%% +%% % Make sure rexi_DOWN doesn't modify node_revs +%% Down = {rexi_DOWN, nil, {nil, w1}, nil}, +%% {ok, #acc{node_revs = NodeRevs5}} = handle_message(Down, W2, Acc1), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs5), +%% +%% % Make sure rexi_EXIT doesn't modify node_revs +%% Exit = {rexi_EXIT, reason}, +%% {ok, #acc{node_revs = NodeRevs6}} = handle_message(Exit, W2, Acc1), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs6), +%% +%% % Make sure an error doesn't remove any node revs +%% {ok, #acc{node_revs = NodeRevs7}} = handle_message(NFM, W2, Acc1), +%% ?assertEqual([{w1, [{1, <<"foo">>}]}], NodeRevs7), +%% +%% % Make sure we have all of our node_revs when meeting +%% % quorum +%% {ok, Acc2} = handle_message(Foo1, W1, InitAcc), +%% {ok, Acc3} = handle_message(Foo2, W2, Acc2), +%% {stop, Acc4} = handle_message(NFM, W3, Acc3), +%% ?assertEqual( +%% [{w2, [{2, <<"foo2">>}]}, {w1, [{1, <<"foo">>}]}], +%% Acc4#acc.node_revs +%% ) +%% end). +%% +%% +%% t_read_repair() -> +%% Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, +%% Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, +%% NFM = {not_found, missing}, +%% +%% ?_test(begin +%% meck:expect(couch_log, notice, fun(_, _) -> ok end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% +%% % Test when we have actual doc data to repair +%% meck:expect(fabric, update_docs, fun(_, [_], _) -> {ok, []} end), +%% Acc0 = #acc{ +%% dbname = <<"name">>, +%% replies = [fabric_util:kv(Foo1,1)] +%% }, +%% ?assertEqual(Foo1, read_repair(Acc0)), +%% +%% meck:expect(fabric, update_docs, fun(_, [_, _], _) -> {ok, []} end), +%% Acc1 = #acc{ +%% dbname = <<"name">>, +%% replies = [fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,1)] +%% }, +%% ?assertEqual(Foo2, read_repair(Acc1)), +%% +%% % Test when we have nothing but errors +%% Acc2 = #acc{replies=[fabric_util:kv(NFM, 1)]}, +%% ?assertEqual(NFM, read_repair(Acc2)), +%% +%% Acc3 = #acc{replies=[fabric_util:kv(NFM,1), fabric_util:kv(foo,2)]}, +%% ?assertEqual(NFM, read_repair(Acc3)), +%% +%% Acc4 = #acc{replies=[fabric_util:kv(foo,1), fabric_util:kv(bar,1)]}, +%% ?assertEqual(bar, read_repair(Acc4)) +%% end). +%% +%% +%% t_handle_response_quorum_met() -> +%% Foo1 = {ok, #doc{revs = {1,[<<"foo">>]}}}, +%% Foo2 = {ok, #doc{revs = {2,[<<"foo2">>,<<"foo">>]}}}, +%% Bar1 = {ok, #doc{revs = {1,[<<"bar">>]}}}, +%% +%% ?_test(begin +%% meck:expect(couch_log, notice, fun(_, _) -> ok end), +%% meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% +%% BasicOkAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,2)], +%% q_reply=Foo1 +%% }, +%% ?assertEqual(Foo1, handle_response(BasicOkAcc)), +%% +%% WithAncestorsAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Foo2,2)], +%% q_reply=Foo2 +%% }, +%% ?assertEqual(Foo2, handle_response(WithAncestorsAcc)), +%% +%% % This also checks when the quorum isn't the most recent +%% % revision. +%% DeeperWinsAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,2), fabric_util:kv(Foo2,1)], +%% q_reply=Foo1 +%% }, +%% ?assertEqual(Foo2, handle_response(DeeperWinsAcc)), +%% +%% % Check that we return the proper doc based on rev +%% % (ie, pos is equal) +%% BiggerRevWinsAcc = #acc{ +%% state=r_met, +%% replies=[fabric_util:kv(Foo1,1), fabric_util:kv(Bar1,2)], +%% q_reply=Bar1 +%% }, +%% ?assertEqual(Foo1, handle_response(BiggerRevWinsAcc)) +%% +%% % r_not_met is a proxy to read_repair so we rely on +%% % read_repair_test for those conditions. +%% end). +%% +%% +%% t_get_doc_info() -> +%% ?_test(begin +%% meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, []} end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% meck:expect(fabric_util, submit_jobs, fun(_, _, _) -> ok end), +%% meck:expect(fabric_util, create_monitors, fun(_) -> ok end), +%% meck:expect(rexi_monitor, stop, fun(_) -> ok end), +%% meck:expect(mem3, shards, fun(_, _) -> ok end), +%% meck:expect(mem3, n, fun(_) -> 3 end), +%% meck:expect(mem3, quorum, fun(_) -> 2 end), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% {ok, #acc{state = r_not_met}} +%% end), +%% Rsp1 = fabric_doc_open:go("test", "one", [doc_info]), +%% ?assertEqual({error, quorum_not_met}, Rsp1), +%% +%% Rsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), +%% ?assertEqual({error, quorum_not_met}, Rsp2), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% {ok, #acc{state = r_met, q_reply = not_found}} +%% end), +%% MissingRsp1 = fabric_doc_open:go("test", "one", [doc_info]), +%% ?assertEqual({not_found, missing}, MissingRsp1), +%% MissingRsp2 = fabric_doc_open:go("test", "one", [{doc_info, full}]), +%% ?assertEqual({not_found, missing}, MissingRsp2), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% A = #doc_info{}, +%% {ok, #acc{state = r_met, q_reply = {ok, A}}} +%% end), +%% {ok, Rec1} = fabric_doc_open:go("test", "one", [doc_info]), +%% ?assert(is_record(Rec1, doc_info)), +%% +%% meck:expect(fabric_util, recv, fun(_, _, _, _) -> +%% A = #full_doc_info{deleted = true}, +%% {ok, #acc{state = r_met, q_reply = {ok, A}}} +%% end), +%% Rsp3 = fabric_doc_open:go("test", "one", [{doc_info, full}]), +%% ?assertEqual({not_found, deleted}, Rsp3), +%% {ok, Rec2} = fabric_doc_open:go("test", "one", [{doc_info, full},deleted]), +%% ?assert(is_record(Rec2, full_doc_info)) +%% end). +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_open_revs.erl b/src/fabric/src/fabric_doc_open_revs.erl index 3d7b9dc3c..aa7f53e9b 100644 --- a/src/fabric/src/fabric_doc_open_revs.erl +++ b/src/fabric/src/fabric_doc_open_revs.erl @@ -313,487 +313,487 @@ collapse_duplicate_revs_int([Reply | Rest]) -> [Reply | collapse_duplicate_revs(Rest)]. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -setup_all() -> - config:start_link([]), - meck:new([fabric, couch_stats, couch_log]), - meck:new(fabric_util, [passthrough]), - meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, nil} end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end), - meck:expect(couch_log, notice, fun(_, _) -> ok end), - meck:expect(fabric_util, cleanup, fun(_) -> ok end). - - - -teardown_all(_) -> - meck:unload(), - config:stop(). - - -setup() -> - meck:reset([ - couch_log, - couch_stats, - fabric, - fabric_util - ]). - - -teardown(_) -> - ok. - - -state0(Revs, Latest) -> - #state{ - worker_count = 3, - workers = - [#shard{node='node1'}, #shard{node='node2'}, #shard{node='node3'}], - r = 2, - revs = Revs, - latest = Latest - }. - - -revs() -> [{1,<<"foo">>}, {1,<<"bar">>}, {1,<<"baz">>}]. - - -foo1() -> {ok, #doc{revs = {1, [<<"foo">>]}}}. -foo2() -> {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}. -foo2stemmed() -> {ok, #doc{revs = {2, [<<"foo2">>]}}}. -fooNF() -> {{not_found, missing}, {1,<<"foo">>}}. -foo2NF() -> {{not_found, missing}, {2, <<"foo2">>}}. -bar1() -> {ok, #doc{revs = {1, [<<"bar">>]}}}. -barNF() -> {{not_found, missing}, {1,<<"bar">>}}. -bazNF() -> {{not_found, missing}, {1,<<"baz">>}}. -baz1() -> {ok, #doc{revs = {1, [<<"baz">>]}}}. - - - -open_doc_revs_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - { - foreach, - fun setup/0, - fun teardown/1, - [ - check_empty_response_not_quorum(), - check_basic_response(), - check_finish_quorum(), - check_finish_quorum_newer(), - check_no_quorum_on_second(), - check_done_on_third(), - check_specific_revs_first_msg(), - check_revs_done_on_agreement(), - check_latest_true(), - check_ancestor_counted_in_quorum(), - check_not_found_counts_for_descendant(), - check_worker_error_skipped(), - check_quorum_only_counts_valid_responses(), - check_empty_list_when_no_workers_reply(), - check_node_rev_stored(), - check_node_rev_store_head_only(), - check_node_rev_store_multiple(), - check_node_rev_dont_store_errors(), - check_node_rev_store_non_errors(), - check_node_rev_store_concatenate(), - check_node_rev_store_concantenate_multiple(), - check_node_rev_unmodified_on_down_or_exit(), - check_not_found_replies_are_removed_when_doc_found(), - check_not_found_returned_when_one_of_docs_not_found(), - check_not_found_returned_when_doc_not_found(), - check_longer_rev_list_returned(), - check_longer_rev_list_not_combined(), - check_not_found_removed_and_longer_rev_list() - ] - } - }. - - -% Tests for revs=all - - -check_empty_response_not_quorum() -> - % Simple smoke test that we don't think we're - % done with a first empty response - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - ?_assertMatch( - {ok, #state{workers = [W2, W3]}}, - handle_message({ok, []}, W1, state0(all, false)) - ). - - -check_basic_response() -> - % Check that we've handle a response - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - ?_assertMatch( - {ok, #state{reply_count = 1, workers = [W2, W3]}}, - handle_message({ok, [foo1(), bar1()]}, W1, state0(all, false)) - ). - - -check_finish_quorum() -> - % Two messages with the same revisions means we're done - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - Expect = {stop, [bar1(), foo1()]}, - ?assertEqual(Expect, handle_message({ok, [foo1(), bar1()]}, W2, S1)) - end). - - -check_finish_quorum_newer() -> - % We count a descendant of a revision for quorum so - % foo1 should count for foo2 which means we're finished. - % We also validate that read_repair was triggered. - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - Expect = {stop, [bar1(), foo2()]}, - ok = meck:reset(fabric), - ?assertEqual(Expect, handle_message({ok, [foo2(), bar1()]}, W2, S1)), - ok = meck:wait(fabric, update_docs, '_', 5000), - ?assertMatch( - [{_, {fabric, update_docs, [_, _, _]}, _}], - meck:history(fabric) - ) - end). - - -check_no_quorum_on_second() -> - % Quorum not yet met for the foo revision so we - % would wait for w3 - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - ?assertMatch( - {ok, #state{workers = [W3]}}, - handle_message({ok, [bar1()]}, W2, S1) - ) - end). - - -check_done_on_third() -> - % The third message of three means we're done no matter - % what. Every revision seen in this pattern should be - % included. - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(all, false), - {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), - {ok, S2} = handle_message({ok, [bar1()]}, W2, S1), - Expect = {stop, [bar1(), foo1()]}, - ?assertEqual(Expect, handle_message({ok, [bar1()]}, W3, S2)) - end). - - -% Tests for a specific list of revs - - -check_specific_revs_first_msg() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), false), - ?assertMatch( - {ok, #state{reply_count = 1, workers = [W2, W3]}}, - handle_message({ok, [foo1(), bar1(), bazNF()]}, W1, S0) - ) - end). - - -check_revs_done_on_agreement() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), false), - Msg = {ok, [foo1(), bar1(), bazNF()]}, - {ok, S1} = handle_message(Msg, W1, S0), - Expect = {stop, [bar1(), foo1(), bazNF()]}, - ?assertEqual(Expect, handle_message(Msg, W2, S1)) - end). - - -check_latest_true() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo2(), bar1(), bazNF()]}, - Msg2 = {ok, [foo2(), bar1(), bazNF()]}, - {ok, S1} = handle_message(Msg1, W1, S0), - Expect = {stop, [bar1(), foo2(), bazNF()]}, - ?assertEqual(Expect, handle_message(Msg2, W2, S1)) - end). - - -check_ancestor_counted_in_quorum() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo1(), bar1(), bazNF()]}, - Msg2 = {ok, [foo2(), bar1(), bazNF()]}, - Expect = {stop, [bar1(), foo2(), bazNF()]}, - - % Older first - {ok, S1} = handle_message(Msg1, W1, S0), - ?assertEqual(Expect, handle_message(Msg2, W2, S1)), - - % Newer first - {ok, S2} = handle_message(Msg2, W2, S0), - ?assertEqual(Expect, handle_message(Msg1, W1, S2)) - end). - - -check_not_found_counts_for_descendant() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo1(), bar1(), bazNF()]}, - Msg2 = {ok, [foo1(), bar1(), baz1()]}, - Expect = {stop, [bar1(), baz1(), foo1()]}, - - % not_found first - {ok, S1} = handle_message(Msg1, W1, S0), - ?assertEqual(Expect, handle_message(Msg2, W2, S1)), - - % not_found second - {ok, S2} = handle_message(Msg2, W2, S0), - ?assertEqual(Expect, handle_message(Msg1, W1, S2)) - end). - - -check_worker_error_skipped() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), true), - Msg1 = {ok, [foo1(), bar1(), baz1()]}, - Msg2 = {rexi_EXIT, reason}, - Msg3 = {ok, [foo1(), bar1(), baz1()]}, - Expect = {stop, [bar1(), baz1(), foo1()]}, - - {ok, S1} = handle_message(Msg1, W1, S0), - {ok, S2} = handle_message(Msg2, W2, S1), - ?assertEqual(Expect, handle_message(Msg3, W3, S2)) - end). - - -check_quorum_only_counts_valid_responses() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), true), - Msg1 = {rexi_EXIT, reason}, - Msg2 = {rexi_EXIT, reason}, - Msg3 = {ok, [foo1(), bar1(), baz1()]}, - Expect = {stop, [bar1(), baz1(), foo1()]}, - - {ok, S1} = handle_message(Msg1, W1, S0), - {ok, S2} = handle_message(Msg2, W2, S1), - ?assertEqual(Expect, handle_message(Msg3, W3, S2)) - end). - - -check_empty_list_when_no_workers_reply() -> - ?_test(begin - W1 = #shard{node='node1'}, - W2 = #shard{node='node2'}, - W3 = #shard{node='node3'}, - S0 = state0(revs(), true), - Msg1 = {rexi_EXIT, reason}, - Msg2 = {rexi_EXIT, reason}, - Msg3 = {rexi_DOWN, nodedown, {nil, node()}, nil}, - Expect = {stop, all_workers_died}, - - {ok, S1} = handle_message(Msg1, W1, S0), - {ok, S2} = handle_message(Msg2, W2, S1), - ?assertEqual(Expect, handle_message(Msg3, W3, S2)) - end). - - -check_node_rev_stored() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo1()]}, W1, S0), - ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) - end). - - -check_node_rev_store_head_only() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo2()]}, W1, S0), - ?assertEqual([{node1, [{2, <<"foo2">>}]}], S1#state.node_revs) - end). - - -check_node_rev_store_multiple() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo1(), foo2()]}, W1, S0), - ?assertEqual( - [{node1, [{2, <<"foo2">>}, {1, <<"foo">>}]}], - S1#state.node_revs - ) - end). - - -check_node_rev_dont_store_errors() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [barNF()]}, W1, S0), - ?assertEqual([], S1#state.node_revs) - end). - - -check_node_rev_store_non_errors() -> - ?_test(begin - W1 = #shard{node = node1}, - S0 = state0([], true), - - {ok, S1} = handle_message({ok, [foo1(), barNF()]}, W1, S0), - ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) - end). - - -check_node_rev_store_concatenate() -> - ?_test(begin - W2 = #shard{node = node2}, - S0 = state0([], true), - S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, - - {ok, S2} = handle_message({ok, [foo2()]}, W2, S1), - ?assertEqual( - [{node2, [{2, <<"foo2">>}]}, {node1, [{1, <<"foo">>}]}], - S2#state.node_revs - ) - end). - - -check_node_rev_store_concantenate_multiple() -> - ?_test(begin - W2 = #shard{node = node2}, - S0 = state0([], true), - S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, - - {ok, S2} = handle_message({ok, [foo2(), bar1()]}, W2, S1), - ?assertEqual( - [ - {node2, [{1, <<"bar">>}, {2, <<"foo2">>}]}, - {node1, [{1, <<"foo">>}]} - ], - S2#state.node_revs - ) - end). - - -check_node_rev_unmodified_on_down_or_exit() -> - ?_test(begin - W2 = #shard{node = node2}, - S0 = state0([], true), - S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, - - Down = {rexi_DOWN, nodedown, {nil, node()}, nil}, - {ok, S2} = handle_message(Down, W2, S1), - ?assertEqual( - [{node1, [{1, <<"foo">>}]}], - S2#state.node_revs - ), - - Exit = {rexi_EXIT, reason}, - {ok, S3} = handle_message(Exit, W2, S1), - ?assertEqual( - [{node1, [{1, <<"foo">>}]}], - S3#state.node_revs - ) - end). - - -check_not_found_replies_are_removed_when_doc_found() -> - ?_test(begin - Replies = replies_to_dict([foo1(), bar1(), fooNF()]), - Expect = [bar1(), foo1()], - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_not_found_returned_when_one_of_docs_not_found() -> - ?_test(begin - Replies = replies_to_dict([foo1(), foo2(), barNF()]), - Expect = [foo1(), foo2(), barNF()], - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_not_found_returned_when_doc_not_found() -> - ?_test(begin - Replies = replies_to_dict([fooNF(), barNF(), bazNF()]), - Expect = [barNF(), bazNF(), fooNF()], - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_longer_rev_list_returned() -> - ?_test(begin - Replies = replies_to_dict([foo2(), foo2stemmed()]), - Expect = [foo2()], - ?assertEqual(2, length(Replies)), - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_longer_rev_list_not_combined() -> - ?_test(begin - Replies = replies_to_dict([foo2(), foo2stemmed(), bar1()]), - Expect = [bar1(), foo2()], - ?assertEqual(3, length(Replies)), - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - -check_not_found_removed_and_longer_rev_list() -> - ?_test(begin - Replies = replies_to_dict([foo2(), foo2stemmed(), foo2NF()]), - Expect = [foo2()], - ?assertEqual(3, length(Replies)), - ?assertEqual(Expect, dict_format_replies(Replies)) - end). - - -replies_to_dict(Replies) -> - [reply_to_element(R) || R <- Replies]. - -reply_to_element({ok, #doc{revs = Revs}} = Reply) -> - {_, [Rev | _]} = Revs, - {{Rev, Revs}, {Reply, 1}}; -reply_to_element(Reply) -> - {Reply, {Reply, 1}}. - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% +%% setup_all() -> +%% config:start_link([]), +%% meck:new([fabric, couch_stats, couch_log]), +%% meck:new(fabric_util, [passthrough]), +%% meck:expect(fabric, update_docs, fun(_, _, _) -> {ok, nil} end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end), +%% meck:expect(couch_log, notice, fun(_, _) -> ok end), +%% meck:expect(fabric_util, cleanup, fun(_) -> ok end). +%% +%% +%% +%% teardown_all(_) -> +%% meck:unload(), +%% config:stop(). +%% +%% +%% setup() -> +%% meck:reset([ +%% couch_log, +%% couch_stats, +%% fabric, +%% fabric_util +%% ]). +%% +%% +%% teardown(_) -> +%% ok. +%% +%% +%% state0(Revs, Latest) -> +%% #state{ +%% worker_count = 3, +%% workers = +%% [#shard{node='node1'}, #shard{node='node2'}, #shard{node='node3'}], +%% r = 2, +%% revs = Revs, +%% latest = Latest +%% }. +%% +%% +%% revs() -> [{1,<<"foo">>}, {1,<<"bar">>}, {1,<<"baz">>}]. +%% +%% +%% foo1() -> {ok, #doc{revs = {1, [<<"foo">>]}}}. +%% foo2() -> {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}. +%% foo2stemmed() -> {ok, #doc{revs = {2, [<<"foo2">>]}}}. +%% fooNF() -> {{not_found, missing}, {1,<<"foo">>}}. +%% foo2NF() -> {{not_found, missing}, {2, <<"foo2">>}}. +%% bar1() -> {ok, #doc{revs = {1, [<<"bar">>]}}}. +%% barNF() -> {{not_found, missing}, {1,<<"bar">>}}. +%% bazNF() -> {{not_found, missing}, {1,<<"baz">>}}. +%% baz1() -> {ok, #doc{revs = {1, [<<"baz">>]}}}. +%% +%% +%% +%% open_doc_revs_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% { +%% foreach, +%% fun setup/0, +%% fun teardown/1, +%% [ +%% check_empty_response_not_quorum(), +%% check_basic_response(), +%% check_finish_quorum(), +%% check_finish_quorum_newer(), +%% check_no_quorum_on_second(), +%% check_done_on_third(), +%% check_specific_revs_first_msg(), +%% check_revs_done_on_agreement(), +%% check_latest_true(), +%% check_ancestor_counted_in_quorum(), +%% check_not_found_counts_for_descendant(), +%% check_worker_error_skipped(), +%% check_quorum_only_counts_valid_responses(), +%% check_empty_list_when_no_workers_reply(), +%% check_node_rev_stored(), +%% check_node_rev_store_head_only(), +%% check_node_rev_store_multiple(), +%% check_node_rev_dont_store_errors(), +%% check_node_rev_store_non_errors(), +%% check_node_rev_store_concatenate(), +%% check_node_rev_store_concantenate_multiple(), +%% check_node_rev_unmodified_on_down_or_exit(), +%% check_not_found_replies_are_removed_when_doc_found(), +%% check_not_found_returned_when_one_of_docs_not_found(), +%% check_not_found_returned_when_doc_not_found(), +%% check_longer_rev_list_returned(), +%% check_longer_rev_list_not_combined(), +%% check_not_found_removed_and_longer_rev_list() +%% ] +%% } +%% }. +%% +%% +%% % Tests for revs=all +%% +%% +%% check_empty_response_not_quorum() -> +%% % Simple smoke test that we don't think we're +%% % done with a first empty response +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% ?_assertMatch( +%% {ok, #state{workers = [W2, W3]}}, +%% handle_message({ok, []}, W1, state0(all, false)) +%% ). +%% +%% +%% check_basic_response() -> +%% % Check that we've handle a response +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% ?_assertMatch( +%% {ok, #state{reply_count = 1, workers = [W2, W3]}}, +%% handle_message({ok, [foo1(), bar1()]}, W1, state0(all, false)) +%% ). +%% +%% +%% check_finish_quorum() -> +%% % Two messages with the same revisions means we're done +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% Expect = {stop, [bar1(), foo1()]}, +%% ?assertEqual(Expect, handle_message({ok, [foo1(), bar1()]}, W2, S1)) +%% end). +%% +%% +%% check_finish_quorum_newer() -> +%% % We count a descendant of a revision for quorum so +%% % foo1 should count for foo2 which means we're finished. +%% % We also validate that read_repair was triggered. +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% Expect = {stop, [bar1(), foo2()]}, +%% ok = meck:reset(fabric), +%% ?assertEqual(Expect, handle_message({ok, [foo2(), bar1()]}, W2, S1)), +%% ok = meck:wait(fabric, update_docs, '_', 5000), +%% ?assertMatch( +%% [{_, {fabric, update_docs, [_, _, _]}, _}], +%% meck:history(fabric) +%% ) +%% end). +%% +%% +%% check_no_quorum_on_second() -> +%% % Quorum not yet met for the foo revision so we +%% % would wait for w3 +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% ?assertMatch( +%% {ok, #state{workers = [W3]}}, +%% handle_message({ok, [bar1()]}, W2, S1) +%% ) +%% end). +%% +%% +%% check_done_on_third() -> +%% % The third message of three means we're done no matter +%% % what. Every revision seen in this pattern should be +%% % included. +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(all, false), +%% {ok, S1} = handle_message({ok, [foo1(), bar1()]}, W1, S0), +%% {ok, S2} = handle_message({ok, [bar1()]}, W2, S1), +%% Expect = {stop, [bar1(), foo1()]}, +%% ?assertEqual(Expect, handle_message({ok, [bar1()]}, W3, S2)) +%% end). +%% +%% +%% % Tests for a specific list of revs +%% +%% +%% check_specific_revs_first_msg() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), false), +%% ?assertMatch( +%% {ok, #state{reply_count = 1, workers = [W2, W3]}}, +%% handle_message({ok, [foo1(), bar1(), bazNF()]}, W1, S0) +%% ) +%% end). +%% +%% +%% check_revs_done_on_agreement() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), false), +%% Msg = {ok, [foo1(), bar1(), bazNF()]}, +%% {ok, S1} = handle_message(Msg, W1, S0), +%% Expect = {stop, [bar1(), foo1(), bazNF()]}, +%% ?assertEqual(Expect, handle_message(Msg, W2, S1)) +%% end). +%% +%% +%% check_latest_true() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo2(), bar1(), bazNF()]}, +%% Msg2 = {ok, [foo2(), bar1(), bazNF()]}, +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% Expect = {stop, [bar1(), foo2(), bazNF()]}, +%% ?assertEqual(Expect, handle_message(Msg2, W2, S1)) +%% end). +%% +%% +%% check_ancestor_counted_in_quorum() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo1(), bar1(), bazNF()]}, +%% Msg2 = {ok, [foo2(), bar1(), bazNF()]}, +%% Expect = {stop, [bar1(), foo2(), bazNF()]}, +%% +%% % Older first +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% ?assertEqual(Expect, handle_message(Msg2, W2, S1)), +%% +%% % Newer first +%% {ok, S2} = handle_message(Msg2, W2, S0), +%% ?assertEqual(Expect, handle_message(Msg1, W1, S2)) +%% end). +%% +%% +%% check_not_found_counts_for_descendant() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo1(), bar1(), bazNF()]}, +%% Msg2 = {ok, [foo1(), bar1(), baz1()]}, +%% Expect = {stop, [bar1(), baz1(), foo1()]}, +%% +%% % not_found first +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% ?assertEqual(Expect, handle_message(Msg2, W2, S1)), +%% +%% % not_found second +%% {ok, S2} = handle_message(Msg2, W2, S0), +%% ?assertEqual(Expect, handle_message(Msg1, W1, S2)) +%% end). +%% +%% +%% check_worker_error_skipped() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), true), +%% Msg1 = {ok, [foo1(), bar1(), baz1()]}, +%% Msg2 = {rexi_EXIT, reason}, +%% Msg3 = {ok, [foo1(), bar1(), baz1()]}, +%% Expect = {stop, [bar1(), baz1(), foo1()]}, +%% +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% {ok, S2} = handle_message(Msg2, W2, S1), +%% ?assertEqual(Expect, handle_message(Msg3, W3, S2)) +%% end). +%% +%% +%% check_quorum_only_counts_valid_responses() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), true), +%% Msg1 = {rexi_EXIT, reason}, +%% Msg2 = {rexi_EXIT, reason}, +%% Msg3 = {ok, [foo1(), bar1(), baz1()]}, +%% Expect = {stop, [bar1(), baz1(), foo1()]}, +%% +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% {ok, S2} = handle_message(Msg2, W2, S1), +%% ?assertEqual(Expect, handle_message(Msg3, W3, S2)) +%% end). +%% +%% +%% check_empty_list_when_no_workers_reply() -> +%% ?_test(begin +%% W1 = #shard{node='node1'}, +%% W2 = #shard{node='node2'}, +%% W3 = #shard{node='node3'}, +%% S0 = state0(revs(), true), +%% Msg1 = {rexi_EXIT, reason}, +%% Msg2 = {rexi_EXIT, reason}, +%% Msg3 = {rexi_DOWN, nodedown, {nil, node()}, nil}, +%% Expect = {stop, all_workers_died}, +%% +%% {ok, S1} = handle_message(Msg1, W1, S0), +%% {ok, S2} = handle_message(Msg2, W2, S1), +%% ?assertEqual(Expect, handle_message(Msg3, W3, S2)) +%% end). +%% +%% +%% check_node_rev_stored() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo1()]}, W1, S0), +%% ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_head_only() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo2()]}, W1, S0), +%% ?assertEqual([{node1, [{2, <<"foo2">>}]}], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_multiple() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo1(), foo2()]}, W1, S0), +%% ?assertEqual( +%% [{node1, [{2, <<"foo2">>}, {1, <<"foo">>}]}], +%% S1#state.node_revs +%% ) +%% end). +%% +%% +%% check_node_rev_dont_store_errors() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [barNF()]}, W1, S0), +%% ?assertEqual([], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_non_errors() -> +%% ?_test(begin +%% W1 = #shard{node = node1}, +%% S0 = state0([], true), +%% +%% {ok, S1} = handle_message({ok, [foo1(), barNF()]}, W1, S0), +%% ?assertEqual([{node1, [{1, <<"foo">>}]}], S1#state.node_revs) +%% end). +%% +%% +%% check_node_rev_store_concatenate() -> +%% ?_test(begin +%% W2 = #shard{node = node2}, +%% S0 = state0([], true), +%% S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, +%% +%% {ok, S2} = handle_message({ok, [foo2()]}, W2, S1), +%% ?assertEqual( +%% [{node2, [{2, <<"foo2">>}]}, {node1, [{1, <<"foo">>}]}], +%% S2#state.node_revs +%% ) +%% end). +%% +%% +%% check_node_rev_store_concantenate_multiple() -> +%% ?_test(begin +%% W2 = #shard{node = node2}, +%% S0 = state0([], true), +%% S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, +%% +%% {ok, S2} = handle_message({ok, [foo2(), bar1()]}, W2, S1), +%% ?assertEqual( +%% [ +%% {node2, [{1, <<"bar">>}, {2, <<"foo2">>}]}, +%% {node1, [{1, <<"foo">>}]} +%% ], +%% S2#state.node_revs +%% ) +%% end). +%% +%% +%% check_node_rev_unmodified_on_down_or_exit() -> +%% ?_test(begin +%% W2 = #shard{node = node2}, +%% S0 = state0([], true), +%% S1 = S0#state{node_revs = [{node1, [{1, <<"foo">>}]}]}, +%% +%% Down = {rexi_DOWN, nodedown, {nil, node()}, nil}, +%% {ok, S2} = handle_message(Down, W2, S1), +%% ?assertEqual( +%% [{node1, [{1, <<"foo">>}]}], +%% S2#state.node_revs +%% ), +%% +%% Exit = {rexi_EXIT, reason}, +%% {ok, S3} = handle_message(Exit, W2, S1), +%% ?assertEqual( +%% [{node1, [{1, <<"foo">>}]}], +%% S3#state.node_revs +%% ) +%% end). +%% +%% +%% check_not_found_replies_are_removed_when_doc_found() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo1(), bar1(), fooNF()]), +%% Expect = [bar1(), foo1()], +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_not_found_returned_when_one_of_docs_not_found() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo1(), foo2(), barNF()]), +%% Expect = [foo1(), foo2(), barNF()], +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_not_found_returned_when_doc_not_found() -> +%% ?_test(begin +%% Replies = replies_to_dict([fooNF(), barNF(), bazNF()]), +%% Expect = [barNF(), bazNF(), fooNF()], +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_longer_rev_list_returned() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo2(), foo2stemmed()]), +%% Expect = [foo2()], +%% ?assertEqual(2, length(Replies)), +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_longer_rev_list_not_combined() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo2(), foo2stemmed(), bar1()]), +%% Expect = [bar1(), foo2()], +%% ?assertEqual(3, length(Replies)), +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% check_not_found_removed_and_longer_rev_list() -> +%% ?_test(begin +%% Replies = replies_to_dict([foo2(), foo2stemmed(), foo2NF()]), +%% Expect = [foo2()], +%% ?assertEqual(3, length(Replies)), +%% ?assertEqual(Expect, dict_format_replies(Replies)) +%% end). +%% +%% +%% replies_to_dict(Replies) -> +%% [reply_to_element(R) || R <- Replies]. +%% +%% reply_to_element({ok, #doc{revs = Revs}} = Reply) -> +%% {_, [Rev | _]} = Revs, +%% {{Rev, Revs}, {Reply, 1}}; +%% reply_to_element(Reply) -> +%% {Reply, {Reply, 1}}. +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_purge.erl b/src/fabric/src/fabric_doc_purge.erl index 3492f88c5..bda9039ba 100644 --- a/src/fabric/src/fabric_doc_purge.erl +++ b/src/fabric/src/fabric_doc_purge.erl @@ -224,348 +224,348 @@ has_quorum(Resps, Count, W) -> end. --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -purge_test_() -> - { - setup, - fun setup/0, - fun teardown/1, - [ - t_w2_ok(), - t_w3_ok(), - - t_w2_mixed_accepted(), - t_w3_mixed_accepted(), - - t_w2_exit1_ok(), - t_w2_exit2_accepted(), - t_w2_exit3_error(), - - t_w4_accepted(), - - t_mixed_ok_accepted(), - t_mixed_errors() - ] - }. - - -setup() -> - meck:new(couch_log), - meck:expect(couch_log, warning, fun(_, _) -> ok end), - meck:expect(couch_log, notice, fun(_, _) -> ok end). - - -teardown(_) -> - meck:unload(). - - -t_w2_ok() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {stop, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, true), - - Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), - ?assertEqual(Expect, Resps), - ?assertEqual(ok, resp_health(Resps)) - end). - - -t_w3_ok() -> - ?_test(begin - Acc0 = create_init_acc(3), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(ok, resp_health(Resps)) - end). - - -t_w2_mixed_accepted() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, - Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, - - {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg1, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [ - {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, - {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} - ], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_w3_mixed_accepted() -> - ?_test(begin - Acc0 = create_init_acc(3), - Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, - Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, - - {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg2, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [ - {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, - {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} - ], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_w2_exit1_ok() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(ok, resp_health(Resps)) - end). - - -t_w2_exit2_accepted() -> - ?_test(begin - Acc0 = create_init_acc(2), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_w2_exit3_error() -> - ?_test(begin - Acc0 = create_init_acc(2), - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(ExitMsg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [ - {error, internal_server_error}, - {error, internal_server_error} - ], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(error, resp_health(Resps)) - end). - - -t_w4_accepted() -> - % Make sure we return when all workers have responded - % rather than wait around for a timeout if a user asks - % for a qourum with more than the available number of - % shards. - ?_test(begin - Acc0 = create_init_acc(4), - Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - ?assertEqual(2, length(Acc1#acc.worker_uuids)), - check_quorum(Acc1, false), - - {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - ?assertEqual(1, length(Acc2#acc.worker_uuids)), - check_quorum(Acc2, false), - - {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), - ?assertEqual(0, length(Acc3#acc.worker_uuids)), - check_quorum(Acc3, true), - - Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_mixed_ok_accepted() -> - ?_test(begin - WorkerUUIDs = [ - {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, - - {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} - ], - - Acc0 = #acc{ - worker_uuids = WorkerUUIDs, - resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), - uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), - w = 2 - }, - - Msg1 = {ok, [{ok, [{1, <<"foo">>}]}]}, - Msg2 = {ok, [{ok, [{2, <<"bar">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), - {ok, Acc2} = handle_message(Msg1, worker(2, Acc0), Acc1), - {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), - {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), - {stop, Acc5} = handle_message(Msg2, worker(6, Acc0), Acc4), - - Expect = [{ok, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), - ?assertEqual(Expect, Resps), - ?assertEqual(accepted, resp_health(Resps)) - end). - - -t_mixed_errors() -> - ?_test(begin - WorkerUUIDs = [ - {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, - {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, - - {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, - {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} - ], - - Acc0 = #acc{ - worker_uuids = WorkerUUIDs, - resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), - uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), - w = 2 - }, - - Msg = {ok, [{ok, [{1, <<"foo">>}]}]}, - ExitMsg = {rexi_EXIT, blargh}, - - {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), - {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), - {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), - {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), - {stop, Acc5} = handle_message(ExitMsg, worker(6, Acc0), Acc4), - - Expect = [{ok, [{1, <<"foo">>}]}, {error, internal_server_error}], - Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), - ?assertEqual(Expect, Resps), - ?assertEqual(error, resp_health(Resps)) - end). - - -create_init_acc(W) -> - UUID1 = <<"uuid1">>, - UUID2 = <<"uuid2">>, - - Nodes = [node1, node2, node3], - Shards = mem3_util:create_partition_map(<<"foo">>, 3, 1, Nodes), - - % Create our worker_uuids. We're relying on the fact that - % we're using a fake Q=1 db so we don't have to worry - % about any hashing here. - WorkerUUIDs = lists:map(fun(Shard) -> - {Shard#shard{ref = erlang:make_ref()}, [UUID1, UUID2]} - end, Shards), - - #acc{ - worker_uuids = WorkerUUIDs, - resps = dict:from_list([{UUID1, []}, {UUID2, []}]), - uuid_counts = dict:from_list([{UUID1, 3}, {UUID2, 3}]), - w = W - }. - - -worker(N, #acc{worker_uuids = WorkerUUIDs}) -> - {Worker, _} = lists:nth(N, WorkerUUIDs), - Worker. - - -check_quorum(Acc, Expect) -> - dict:fold(fun(_Shard, Resps, _) -> - ?assertEqual(Expect, has_quorum(Resps, 3, Acc#acc.w)) - end, nil, Acc#acc.resps). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% purge_test_() -> +%% { +%% setup, +%% fun setup/0, +%% fun teardown/1, +%% [ +%% t_w2_ok(), +%% t_w3_ok(), +%% +%% t_w2_mixed_accepted(), +%% t_w3_mixed_accepted(), +%% +%% t_w2_exit1_ok(), +%% t_w2_exit2_accepted(), +%% t_w2_exit3_error(), +%% +%% t_w4_accepted(), +%% +%% t_mixed_ok_accepted(), +%% t_mixed_errors() +%% ] +%% }. +%% +%% +%% setup() -> +%% meck:new(couch_log), +%% meck:expect(couch_log, warning, fun(_, _) -> ok end), +%% meck:expect(couch_log, notice, fun(_, _) -> ok end). +%% +%% +%% teardown(_) -> +%% meck:unload(). +%% +%% +%% t_w2_ok() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {stop, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, true), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(ok, resp_health(Resps)) +%% end). +%% +%% +%% t_w3_ok() -> +%% ?_test(begin +%% Acc0 = create_init_acc(3), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(ok, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_mixed_accepted() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, +%% Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg1, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [ +%% {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, +%% {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} +%% ], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_w3_mixed_accepted() -> +%% ?_test(begin +%% Acc0 = create_init_acc(3), +%% Msg1 = {ok, [{ok, [{1, <<"foo1">>}]}, {ok, [{2, <<"bar1">>}]}]}, +%% Msg2 = {ok, [{ok, [{1, <<"foo2">>}]}, {ok, [{2, <<"bar2">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg2, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg2, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [ +%% {accepted, [{1, <<"foo1">>}, {1, <<"foo2">>}]}, +%% {accepted, [{2, <<"bar1">>}, {2, <<"bar2">>}]} +%% ], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc2), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_exit1_ok() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(ok, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_exit2_accepted() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_w2_exit3_error() -> +%% ?_test(begin +%% Acc0 = create_init_acc(2), +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(ExitMsg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(ExitMsg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(ExitMsg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [ +%% {error, internal_server_error}, +%% {error, internal_server_error} +%% ], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(error, resp_health(Resps)) +%% end). +%% +%% +%% t_w4_accepted() -> +%% % Make sure we return when all workers have responded +%% % rather than wait around for a timeout if a user asks +%% % for a qourum with more than the available number of +%% % shards. +%% ?_test(begin +%% Acc0 = create_init_acc(4), +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}, {ok, [{2, <<"bar">>}]}]}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% ?assertEqual(2, length(Acc1#acc.worker_uuids)), +%% check_quorum(Acc1, false), +%% +%% {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% ?assertEqual(1, length(Acc2#acc.worker_uuids)), +%% check_quorum(Acc2, false), +%% +%% {stop, Acc3} = handle_message(Msg, worker(3, Acc0), Acc2), +%% ?assertEqual(0, length(Acc3#acc.worker_uuids)), +%% check_quorum(Acc3, true), +%% +%% Expect = [{accepted, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc3), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_mixed_ok_accepted() -> +%% ?_test(begin +%% WorkerUUIDs = [ +%% {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, +%% +%% {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} +%% ], +%% +%% Acc0 = #acc{ +%% worker_uuids = WorkerUUIDs, +%% resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), +%% uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), +%% w = 2 +%% }, +%% +%% Msg1 = {ok, [{ok, [{1, <<"foo">>}]}]}, +%% Msg2 = {ok, [{ok, [{2, <<"bar">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg1, worker(1, Acc0), Acc0), +%% {ok, Acc2} = handle_message(Msg1, worker(2, Acc0), Acc1), +%% {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), +%% {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), +%% {stop, Acc5} = handle_message(Msg2, worker(6, Acc0), Acc4), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {accepted, [{2, <<"bar">>}]}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(accepted, resp_health(Resps)) +%% end). +%% +%% +%% t_mixed_errors() -> +%% ?_test(begin +%% WorkerUUIDs = [ +%% {#shard{node = a, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = b, range = [1, 2]}, [<<"uuid1">>]}, +%% {#shard{node = c, range = [1, 2]}, [<<"uuid1">>]}, +%% +%% {#shard{node = a, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = b, range = [3, 4]}, [<<"uuid2">>]}, +%% {#shard{node = c, range = [3, 4]}, [<<"uuid2">>]} +%% ], +%% +%% Acc0 = #acc{ +%% worker_uuids = WorkerUUIDs, +%% resps = dict:from_list([{<<"uuid1">>, []}, {<<"uuid2">>, []}]), +%% uuid_counts = dict:from_list([{<<"uuid1">>, 3}, {<<"uuid2">>, 3}]), +%% w = 2 +%% }, +%% +%% Msg = {ok, [{ok, [{1, <<"foo">>}]}]}, +%% ExitMsg = {rexi_EXIT, blargh}, +%% +%% {ok, Acc1} = handle_message(Msg, worker(1, Acc0), Acc0), +%% {ok, Acc2} = handle_message(Msg, worker(2, Acc0), Acc1), +%% {ok, Acc3} = handle_message(ExitMsg, worker(4, Acc0), Acc2), +%% {ok, Acc4} = handle_message(ExitMsg, worker(5, Acc0), Acc3), +%% {stop, Acc5} = handle_message(ExitMsg, worker(6, Acc0), Acc4), +%% +%% Expect = [{ok, [{1, <<"foo">>}]}, {error, internal_server_error}], +%% Resps = format_resps([<<"uuid1">>, <<"uuid2">>], Acc5), +%% ?assertEqual(Expect, Resps), +%% ?assertEqual(error, resp_health(Resps)) +%% end). +%% +%% +%% create_init_acc(W) -> +%% UUID1 = <<"uuid1">>, +%% UUID2 = <<"uuid2">>, +%% +%% Nodes = [node1, node2, node3], +%% Shards = mem3_util:create_partition_map(<<"foo">>, 3, 1, Nodes), +%% +%% % Create our worker_uuids. We're relying on the fact that +%% % we're using a fake Q=1 db so we don't have to worry +%% % about any hashing here. +%% WorkerUUIDs = lists:map(fun(Shard) -> +%% {Shard#shard{ref = erlang:make_ref()}, [UUID1, UUID2]} +%% end, Shards), +%% +%% #acc{ +%% worker_uuids = WorkerUUIDs, +%% resps = dict:from_list([{UUID1, []}, {UUID2, []}]), +%% uuid_counts = dict:from_list([{UUID1, 3}, {UUID2, 3}]), +%% w = W +%% }. +%% +%% +%% worker(N, #acc{worker_uuids = WorkerUUIDs}) -> +%% {Worker, _} = lists:nth(N, WorkerUUIDs), +%% Worker. +%% +%% +%% check_quorum(Acc, Expect) -> +%% dict:fold(fun(_Shard, Resps, _) -> +%% ?assertEqual(Expect, has_quorum(Resps, 3, Acc#acc.w)) +%% end, nil, Acc#acc.resps). +%% +%% -endif. diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl index 69babc14b..d670e3ccf 100644 --- a/src/fabric/src/fabric_doc_update.erl +++ b/src/fabric/src/fabric_doc_update.erl @@ -220,158 +220,158 @@ validate_atomic_update(_DbName, AllDocs, true) -> throw({aborted, PreCommitFailures}). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -setup_all() -> - meck:new([couch_log, couch_stats]), - meck:expect(couch_log, warning, fun(_,_) -> ok end), - meck:expect(couch_stats, increment_counter, fun(_) -> ok end). - - -teardown_all(_) -> - meck:unload(). - - -doc_update_test_() -> - { - setup, - fun setup_all/0, - fun teardown_all/1, - [ - fun doc_update1/0, - fun doc_update2/0, - fun doc_update3/0 - ] - }. - - -% eunits -doc_update1() -> - Doc1 = #doc{revs = {1,[<<"foo">>]}}, - Doc2 = #doc{revs = {1,[<<"bar">>]}}, - Docs = [Doc1], - Docs2 = [Doc2, Doc1], - Dict = dict:from_list([{Doc,[]} || Doc <- Docs]), - Dict2 = dict:from_list([{Doc,[]} || Doc <- Docs2]), - - Shards = - mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), - GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), - - - % test for W = 2 - AccW2 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, - Dict}, - - {ok,{WaitingCountW2_1,_,_,_,_}=AccW2_1} = - handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW2), - ?assertEqual(WaitingCountW2_1,2), - {stop, FinalReplyW2 } = - handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW2_1), - ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW2), - - % test for W = 3 - AccW3 = {length(Shards), length(Docs), list_to_integer("3"), GroupedDocs, - Dict}, - - {ok,{WaitingCountW3_1,_,_,_,_}=AccW3_1} = - handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW3), - ?assertEqual(WaitingCountW3_1,2), - - {ok,{WaitingCountW3_2,_,_,_,_}=AccW3_2} = - handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW3_1), - ?assertEqual(WaitingCountW3_2,1), - - {stop, FinalReplyW3 } = - handle_message({ok, [{ok, Doc1}]},lists:nth(3,Shards),AccW3_2), - ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW3), - - % test w quorum > # shards, which should fail immediately - - Shards2 = mem3_util:create_partition_map("foo",1,1,["node1"]), - GroupedDocs2 = group_docs_by_shard_hack(<<"foo">>,Shards2,Docs), - - AccW4 = - {length(Shards2), length(Docs), list_to_integer("2"), GroupedDocs2, Dict}, - Bool = - case handle_message({ok, [{ok, Doc1}]},hd(Shards2),AccW4) of - {stop, _Reply} -> - true; - _ -> false - end, - ?assertEqual(Bool,true), - - % Docs with no replies should end up as {error, internal_server_error} - SA1 = #shard{node=a, range=1}, - SB1 = #shard{node=b, range=1}, - SA2 = #shard{node=a, range=2}, - SB2 = #shard{node=b, range=2}, - GroupedDocs3 = [{SA1,[Doc1]}, {SB1,[Doc1]}, {SA2,[Doc2]}, {SB2,[Doc2]}], - StW5_0 = {length(GroupedDocs3), length(Docs2), 2, GroupedDocs3, Dict2}, - {ok, StW5_1} = handle_message({ok, [{ok, "A"}]}, SA1, StW5_0), - {ok, StW5_2} = handle_message({rexi_EXIT, nil}, SB1, StW5_1), - {ok, StW5_3} = handle_message({rexi_EXIT, nil}, SA2, StW5_2), - {stop, ReplyW5} = handle_message({rexi_EXIT, nil}, SB2, StW5_3), - ?assertEqual( - {error, [{Doc1,{accepted,"A"}},{Doc2,{error,internal_server_error}}]}, - ReplyW5 - ). - -doc_update2() -> - Doc1 = #doc{revs = {1,[<<"foo">>]}}, - Doc2 = #doc{revs = {1,[<<"bar">>]}}, - Docs = [Doc2, Doc1], - Shards = - mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), - GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), - Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, - dict:from_list([{Doc,[]} || Doc <- Docs])}, - - {ok,{WaitingCount1,_,_,_,_}=Acc1} = - handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), - ?assertEqual(WaitingCount1,2), - - {ok,{WaitingCount2,_,_,_,_}=Acc2} = - handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), - ?assertEqual(WaitingCount2,1), - - {stop, Reply} = - handle_message({rexi_EXIT, 1},lists:nth(3,Shards),Acc2), - - ?assertEqual({accepted, [{Doc1,{accepted,Doc2}}, {Doc2,{accepted,Doc1}}]}, - Reply). - -doc_update3() -> - Doc1 = #doc{revs = {1,[<<"foo">>]}}, - Doc2 = #doc{revs = {1,[<<"bar">>]}}, - Docs = [Doc2, Doc1], - Shards = - mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), - GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), - Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, - dict:from_list([{Doc,[]} || Doc <- Docs])}, - - {ok,{WaitingCount1,_,_,_,_}=Acc1} = - handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), - ?assertEqual(WaitingCount1,2), - - {ok,{WaitingCount2,_,_,_,_}=Acc2} = - handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), - ?assertEqual(WaitingCount2,1), - - {stop, Reply} = - handle_message({ok, [{ok, Doc1},{ok, Doc2}]},lists:nth(3,Shards),Acc2), - - ?assertEqual({ok, [{Doc1, {ok, Doc2}},{Doc2, {ok,Doc1}}]},Reply). - -% needed for testing to avoid having to start the mem3 application -group_docs_by_shard_hack(_DbName, Shards, Docs) -> - dict:to_list(lists:foldl(fun(#doc{id=_Id} = Doc, D0) -> - lists:foldl(fun(Shard, D1) -> - dict:append(Shard, Doc, D1) - end, D0, Shards) - end, dict:new(), Docs)). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% +%% setup_all() -> +%% meck:new([couch_log, couch_stats]), +%% meck:expect(couch_log, warning, fun(_,_) -> ok end), +%% meck:expect(couch_stats, increment_counter, fun(_) -> ok end). +%% +%% +%% teardown_all(_) -> +%% meck:unload(). +%% +%% +%% doc_update_test_() -> +%% { +%% setup, +%% fun setup_all/0, +%% fun teardown_all/1, +%% [ +%% fun doc_update1/0, +%% fun doc_update2/0, +%% fun doc_update3/0 +%% ] +%% }. +%% +%% +%% % eunits +%% doc_update1() -> +%% Doc1 = #doc{revs = {1,[<<"foo">>]}}, +%% Doc2 = #doc{revs = {1,[<<"bar">>]}}, +%% Docs = [Doc1], +%% Docs2 = [Doc2, Doc1], +%% Dict = dict:from_list([{Doc,[]} || Doc <- Docs]), +%% Dict2 = dict:from_list([{Doc,[]} || Doc <- Docs2]), +%% +%% Shards = +%% mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), +%% GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), +%% +%% +%% % test for W = 2 +%% AccW2 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, +%% Dict}, +%% +%% {ok,{WaitingCountW2_1,_,_,_,_}=AccW2_1} = +%% handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW2), +%% ?assertEqual(WaitingCountW2_1,2), +%% {stop, FinalReplyW2 } = +%% handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW2_1), +%% ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW2), +%% +%% % test for W = 3 +%% AccW3 = {length(Shards), length(Docs), list_to_integer("3"), GroupedDocs, +%% Dict}, +%% +%% {ok,{WaitingCountW3_1,_,_,_,_}=AccW3_1} = +%% handle_message({ok, [{ok, Doc1}]},hd(Shards),AccW3), +%% ?assertEqual(WaitingCountW3_1,2), +%% +%% {ok,{WaitingCountW3_2,_,_,_,_}=AccW3_2} = +%% handle_message({ok, [{ok, Doc1}]},lists:nth(2,Shards),AccW3_1), +%% ?assertEqual(WaitingCountW3_2,1), +%% +%% {stop, FinalReplyW3 } = +%% handle_message({ok, [{ok, Doc1}]},lists:nth(3,Shards),AccW3_2), +%% ?assertEqual({ok, [{Doc1, {ok,Doc1}}]},FinalReplyW3), +%% +%% % test w quorum > # shards, which should fail immediately +%% +%% Shards2 = mem3_util:create_partition_map("foo",1,1,["node1"]), +%% GroupedDocs2 = group_docs_by_shard_hack(<<"foo">>,Shards2,Docs), +%% +%% AccW4 = +%% {length(Shards2), length(Docs), list_to_integer("2"), GroupedDocs2, Dict}, +%% Bool = +%% case handle_message({ok, [{ok, Doc1}]},hd(Shards2),AccW4) of +%% {stop, _Reply} -> +%% true; +%% _ -> false +%% end, +%% ?assertEqual(Bool,true), +%% +%% % Docs with no replies should end up as {error, internal_server_error} +%% SA1 = #shard{node=a, range=1}, +%% SB1 = #shard{node=b, range=1}, +%% SA2 = #shard{node=a, range=2}, +%% SB2 = #shard{node=b, range=2}, +%% GroupedDocs3 = [{SA1,[Doc1]}, {SB1,[Doc1]}, {SA2,[Doc2]}, {SB2,[Doc2]}], +%% StW5_0 = {length(GroupedDocs3), length(Docs2), 2, GroupedDocs3, Dict2}, +%% {ok, StW5_1} = handle_message({ok, [{ok, "A"}]}, SA1, StW5_0), +%% {ok, StW5_2} = handle_message({rexi_EXIT, nil}, SB1, StW5_1), +%% {ok, StW5_3} = handle_message({rexi_EXIT, nil}, SA2, StW5_2), +%% {stop, ReplyW5} = handle_message({rexi_EXIT, nil}, SB2, StW5_3), +%% ?assertEqual( +%% {error, [{Doc1,{accepted,"A"}},{Doc2,{error,internal_server_error}}]}, +%% ReplyW5 +%% ). +%% +%% doc_update2() -> +%% Doc1 = #doc{revs = {1,[<<"foo">>]}}, +%% Doc2 = #doc{revs = {1,[<<"bar">>]}}, +%% Docs = [Doc2, Doc1], +%% Shards = +%% mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), +%% GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), +%% Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, +%% dict:from_list([{Doc,[]} || Doc <- Docs])}, +%% +%% {ok,{WaitingCount1,_,_,_,_}=Acc1} = +%% handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), +%% ?assertEqual(WaitingCount1,2), +%% +%% {ok,{WaitingCount2,_,_,_,_}=Acc2} = +%% handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), +%% ?assertEqual(WaitingCount2,1), +%% +%% {stop, Reply} = +%% handle_message({rexi_EXIT, 1},lists:nth(3,Shards),Acc2), +%% +%% ?assertEqual({accepted, [{Doc1,{accepted,Doc2}}, {Doc2,{accepted,Doc1}}]}, +%% Reply). +%% +%% doc_update3() -> +%% Doc1 = #doc{revs = {1,[<<"foo">>]}}, +%% Doc2 = #doc{revs = {1,[<<"bar">>]}}, +%% Docs = [Doc2, Doc1], +%% Shards = +%% mem3_util:create_partition_map("foo",3,1,["node1","node2","node3"]), +%% GroupedDocs = group_docs_by_shard_hack(<<"foo">>,Shards,Docs), +%% Acc0 = {length(Shards), length(Docs), list_to_integer("2"), GroupedDocs, +%% dict:from_list([{Doc,[]} || Doc <- Docs])}, +%% +%% {ok,{WaitingCount1,_,_,_,_}=Acc1} = +%% handle_message({ok, [{ok, Doc1},{ok, Doc2}]},hd(Shards),Acc0), +%% ?assertEqual(WaitingCount1,2), +%% +%% {ok,{WaitingCount2,_,_,_,_}=Acc2} = +%% handle_message({rexi_EXIT, 1},lists:nth(2,Shards),Acc1), +%% ?assertEqual(WaitingCount2,1), +%% +%% {stop, Reply} = +%% handle_message({ok, [{ok, Doc1},{ok, Doc2}]},lists:nth(3,Shards),Acc2), +%% +%% ?assertEqual({ok, [{Doc1, {ok, Doc2}},{Doc2, {ok,Doc1}}]},Reply). +%% +%% % needed for testing to avoid having to start the mem3 application +%% group_docs_by_shard_hack(_DbName, Shards, Docs) -> +%% dict:to_list(lists:foldl(fun(#doc{id=_Id} = Doc, D0) -> +%% lists:foldl(fun(Shard, D1) -> +%% dict:append(Shard, Doc, D1) +%% end, D0, Shards) +%% end, dict:new(), Docs)). +%% +%% -endif. diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index 7b688b2b9..6fdc76595 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -439,7 +439,7 @@ get_node_seqs(Db, Nodes) -> get_or_create_db(DbName, Options) -> - couch_db:open_int(DbName, [{create_if_missing, true} | Options]). + mem3_util:get_or_create_db(DbName, Options). get_view_cb(#mrargs{extra = Options}) -> @@ -515,7 +515,8 @@ changes_enumerator(DocInfo, Acc) -> [] -> ChangesRow = {no_pass, [ {pending, Pending-1}, - {seq, Seq}]}; + {seq, {Seq, uuid(Db), couch_db:owner_of(Epochs, Seq)}} + ]}; Results -> Opts = if Conflicts -> [conflicts | DocOptions]; true -> DocOptions end, ChangesRow = {change, [ @@ -642,22 +643,22 @@ uuid(Db) -> uuid_prefix_len() -> list_to_integer(config:get("fabric", "uuid_prefix_len", "7")). --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -maybe_filtered_json_doc_no_filter_test() -> - Body = {[{<<"a">>, 1}]}, - Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, - {JDocProps} = maybe_filtered_json_doc(Doc, [], x), - ExpectedProps = [{<<"_id">>, <<"1">>}, {<<"_rev">>, <<"1-r1">>}, {<<"a">>, 1}], - ?assertEqual(lists:keysort(1, JDocProps), ExpectedProps). - -maybe_filtered_json_doc_with_filter_test() -> - Body = {[{<<"a">>, 1}]}, - Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, - Fields = [<<"a">>, <<"nonexistent">>], - Filter = {selector, main_only, {some_selector, Fields}}, - {JDocProps} = maybe_filtered_json_doc(Doc, [], Filter), - ?assertEqual(JDocProps, [{<<"a">>, 1}]). - --endif. +%% -ifdef(TEST). +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% maybe_filtered_json_doc_no_filter_test() -> +%% Body = {[{<<"a">>, 1}]}, +%% Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, +%% {JDocProps} = maybe_filtered_json_doc(Doc, [], x), +%% ExpectedProps = [{<<"_id">>, <<"1">>}, {<<"_rev">>, <<"1-r1">>}, {<<"a">>, 1}], +%% ?assertEqual(lists:keysort(1, JDocProps), ExpectedProps). +%% +%% maybe_filtered_json_doc_with_filter_test() -> +%% Body = {[{<<"a">>, 1}]}, +%% Doc = #doc{id = <<"1">>, revs = {1, [<<"r1">>]}, body = Body}, +%% Fields = [<<"a">>, <<"nonexistent">>], +%% Filter = {selector, main_only, {some_selector, Fields}}, +%% {JDocProps} = maybe_filtered_json_doc(Doc, [], Filter), +%% ?assertEqual(JDocProps, [{<<"a">>, 1}]). +%% +%% -endif. diff --git a/src/fabric/src/fabric_streams.erl b/src/fabric/src/fabric_streams.erl index 59c8b8a6b..98e285081 100644 --- a/src/fabric/src/fabric_streams.erl +++ b/src/fabric/src/fabric_streams.erl @@ -192,82 +192,83 @@ add_worker_to_cleaner(CoordinatorPid, Worker) -> --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - -worker_cleaner_test_() -> - { - "Fabric spawn_worker_cleaner test", { - setup, fun setup/0, fun teardown/1, - fun(_) -> [ - should_clean_workers(), - does_not_fire_if_cleanup_called(), - should_clean_additional_worker_too() - ] end - } - }. - - -should_clean_workers() -> - ?_test(begin - meck:reset(rexi), - erase(?WORKER_CLEANER), - Workers = [ - #shard{node = 'n1', ref = make_ref()}, - #shard{node = 'n2', ref = make_ref()} - ], - {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), - Cleaner = spawn_worker_cleaner(Coord, Workers), - Ref = erlang:monitor(process, Cleaner), - Coord ! die, - receive {'DOWN', Ref, _, Cleaner, _} -> ok end, - ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) - end). - - -does_not_fire_if_cleanup_called() -> - ?_test(begin - meck:reset(rexi), - erase(?WORKER_CLEANER), - Workers = [ - #shard{node = 'n1', ref = make_ref()}, - #shard{node = 'n2', ref = make_ref()} - ], - {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), - Cleaner = spawn_worker_cleaner(Coord, Workers), - Ref = erlang:monitor(process, Cleaner), - cleanup(Workers), - Coord ! die, - receive {'DOWN', Ref, _, _, _} -> ok end, - % 2 calls would be from cleanup/1 function. If cleanup process fired - % too it would have been 4 calls total. - ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) - end). - - -should_clean_additional_worker_too() -> - ?_test(begin - meck:reset(rexi), - erase(?WORKER_CLEANER), - Workers = [ - #shard{node = 'n1', ref = make_ref()} - ], - {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), - Cleaner = spawn_worker_cleaner(Coord, Workers), - add_worker_to_cleaner(Coord, #shard{node = 'n2', ref = make_ref()}), - Ref = erlang:monitor(process, Cleaner), - Coord ! die, - receive {'DOWN', Ref, _, Cleaner, _} -> ok end, - ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) - end). - - -setup() -> - ok = meck:expect(rexi, kill_all, fun(_) -> ok end). - - -teardown(_) -> - meck:unload(). - --endif. +%% -ifdef(TEST). +%% +%% -include_lib("eunit/include/eunit.hrl"). +%% +%% worker_cleaner_test_() -> +%% { +%% "Fabric spawn_worker_cleaner test", { +%% setup, fun setup/0, fun teardown/1, +%% fun(_) -> [ +%% should_clean_workers(), +%% does_not_fire_if_cleanup_called(), +%% should_clean_additional_worker_too() +%% ] end +%% } +%% }. +%% +%% +%% should_clean_workers() -> +%% ?_test(begin +%% meck:reset(rexi), +%% erase(?WORKER_CLEANER), +%% Workers = [ +%% #shard{node = 'n1', ref = make_ref()}, +%% #shard{node = 'n2', ref = make_ref()} +%% ], +%% {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), +%% Cleaner = spawn_worker_cleaner(Coord, Workers), +%% Ref = erlang:monitor(process, Cleaner), +%% Coord ! die, +%% receive {'DOWN', Ref, _, Cleaner, _} -> ok end, +%% ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) +%% end). +%% +%% +%% does_not_fire_if_cleanup_called() -> +%% ?_test(begin +%% meck:reset(rexi), +%% erase(?WORKER_CLEANER), +%% Workers = [ +%% #shard{node = 'n1', ref = make_ref()}, +%% #shard{node = 'n2', ref = make_ref()} +%% ], +%% {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), +%% Cleaner = spawn_worker_cleaner(Coord, Workers), +%% Ref = erlang:monitor(process, Cleaner), +%% cleanup(Workers), +%% Coord ! die, +%% receive {'DOWN', Ref, _, _, _} -> ok end, +%% % 2 calls would be from cleanup/1 function. If cleanup process fired +%% % too it would have been 4 calls total. +%% ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) +%% end). +%% +%% +%% should_clean_additional_worker_too() -> +%% ?_test(begin +%% meck:reset(rexi), +%% erase(?WORKER_CLEANER), +%% Workers = [ +%% #shard{node = 'n1', ref = make_ref()} +%% ], +%% {Coord, _} = spawn_monitor(fun() -> receive die -> ok end end), +%% Cleaner = spawn_worker_cleaner(Coord, Workers), +%% add_worker_to_cleaner(Coord, #shard{node = 'n2', ref = make_ref()}), +%% Ref = erlang:monitor(process, Cleaner), +%% Coord ! die, +%% receive {'DOWN', Ref, _, Cleaner, _} -> ok end, +%% ?assertEqual(1, meck:num_calls(rexi, kill_all, 1)) +%% end). +%% +%% +%% setup() -> +%% ok = meck:expect(rexi, kill_all, fun(_) -> ok end). +%% +%% +%% teardown(_) -> +%% meck:unload(). +%% +%% -endif. diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl index 8aa14e73a..1c1ee80b7 100644 --- a/src/fabric/src/fabric_util.erl +++ b/src/fabric/src/fabric_util.erl @@ -192,30 +192,30 @@ create_monitors(Shards) -> ]), rexi_monitor:start(MonRefs). -%% verify only id and rev are used in key. -update_counter_test() -> - Reply = {ok, #doc{id = <<"id">>, revs = <<"rev">>, - body = <<"body">>, atts = <<"atts">>}}, - ?assertEqual([{{<<"id">>,<<"rev">>}, {Reply, 1}}], - update_counter(Reply, 1, [])). - -remove_ancestors_test() -> - Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}}, - Foo2 = {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}, - Bar1 = {ok, #doc{revs = {1, [<<"bar">>]}}}, - Bar2 = {not_found, {1,<<"bar">>}}, - ?assertEqual( - [kv(Bar1,1), kv(Foo1,1)], - remove_ancestors([kv(Bar1,1), kv(Foo1,1)], []) - ), - ?assertEqual( - [kv(Bar1,1), kv(Foo2,2)], - remove_ancestors([kv(Bar1,1), kv(Foo1,1), kv(Foo2,1)], []) - ), - ?assertEqual( - [kv(Bar1,2)], - remove_ancestors([kv(Bar2,1), kv(Bar1,1)], []) - ). +%% %% verify only id and rev are used in key. +%% update_counter_test() -> +%% Reply = {ok, #doc{id = <<"id">>, revs = <<"rev">>, +%% body = <<"body">>, atts = <<"atts">>}}, +%% ?assertEqual([{{<<"id">>,<<"rev">>}, {Reply, 1}}], +%% update_counter(Reply, 1, [])). +%% +%% remove_ancestors_test() -> +%% Foo1 = {ok, #doc{revs = {1, [<<"foo">>]}}}, +%% Foo2 = {ok, #doc{revs = {2, [<<"foo2">>, <<"foo">>]}}}, +%% Bar1 = {ok, #doc{revs = {1, [<<"bar">>]}}}, +%% Bar2 = {not_found, {1,<<"bar">>}}, +%% ?assertEqual( +%% [kv(Bar1,1), kv(Foo1,1)], +%% remove_ancestors([kv(Bar1,1), kv(Foo1,1)], []) +%% ), +%% ?assertEqual( +%% [kv(Bar1,1), kv(Foo2,2)], +%% remove_ancestors([kv(Bar1,1), kv(Foo1,1), kv(Foo2,1)], []) +%% ), +%% ?assertEqual( +%% [kv(Bar1,2)], +%% remove_ancestors([kv(Bar2,1), kv(Bar1,1)], []) +%% ). is_replicator_db(DbName) -> path_ends_with(DbName, <<"_replicator">>). diff --git a/src/fabric/src/fabric_view.erl b/src/fabric/src/fabric_view.erl index 425f864c4..6c33e1e32 100644 --- a/src/fabric/src/fabric_view.erl +++ b/src/fabric/src/fabric_view.erl @@ -413,66 +413,66 @@ remove_finalizer(Args) -> couch_mrview_util:set_extra(Args, finalizer, null). -remove_overlapping_shards_test() -> - Cb = undefined, - - Shards = mk_cnts([[0, 10], [11, 20], [21, ?RING_END]], 3), - - % Simple (exact) overlap - Shard1 = mk_shard("node-3", [11, 20]), - Shards1 = fabric_dict:store(Shard1, nil, Shards), - R1 = remove_overlapping_shards(Shard1, Shards1, Cb), - ?assertEqual([{0, 10}, {11, 20}, {21, ?RING_END}], - fabric_util:worker_ranges(R1)), - ?assert(fabric_dict:is_key(Shard1, R1)), - - % Split overlap (shard overlap multiple workers) - Shard2 = mk_shard("node-3", [0, 20]), - Shards2 = fabric_dict:store(Shard2, nil, Shards), - R2 = remove_overlapping_shards(Shard2, Shards2, Cb), - ?assertEqual([{0, 20}, {21, ?RING_END}], - fabric_util:worker_ranges(R2)), - ?assert(fabric_dict:is_key(Shard2, R2)). - - -get_shard_replacements_test() -> - Unused = [mk_shard(N, [B, E]) || {N, B, E} <- [ - {"n1", 11, 20}, {"n1", 21, ?RING_END}, - {"n2", 0, 4}, {"n2", 5, 10}, {"n2", 11, 20}, - {"n3", 0, 21, ?RING_END} - ]], - Used = [mk_shard(N, [B, E]) || {N, B, E} <- [ - {"n2", 21, ?RING_END}, - {"n3", 0, 10}, {"n3", 11, 20} - ]], - Res = lists:sort(get_shard_replacements_int(Unused, Used)), - % Notice that [0, 10] range can be replaced by spawning the [0, 4] and [5, - % 10] workers on n1 - Expect = [ - {[0, 10], [mk_shard("n2", [0, 4]), mk_shard("n2", [5, 10])]}, - {[11, 20], [mk_shard("n1", [11, 20]), mk_shard("n2", [11, 20])]}, - {[21, ?RING_END], [mk_shard("n1", [21, ?RING_END])]} - ], - ?assertEqual(Expect, Res). - - -mk_cnts(Ranges, NoNodes) -> - orddict:from_list([{Shard,nil} - || Shard <- - lists:flatten(lists:map( - fun(Range) -> - mk_shards(NoNodes,Range,[]) - end, Ranges))] - ). - -mk_shards(0,_Range,Shards) -> - Shards; -mk_shards(NoNodes,Range,Shards) -> - Name ="node-" ++ integer_to_list(NoNodes), - mk_shards(NoNodes-1,Range, [mk_shard(Name, Range) | Shards]). - - -mk_shard(Name, Range) -> - Node = list_to_atom(Name), - BName = list_to_binary(Name), - #shard{name = BName, node = Node, range = Range}. +%% remove_overlapping_shards_test() -> +%% Cb = undefined, +%% +%% Shards = mk_cnts([[0, 10], [11, 20], [21, ?RING_END]], 3), +%% +%% % Simple (exact) overlap +%% Shard1 = mk_shard("node-3", [11, 20]), +%% Shards1 = fabric_dict:store(Shard1, nil, Shards), +%% R1 = remove_overlapping_shards(Shard1, Shards1, Cb), +%% ?assertEqual([{0, 10}, {11, 20}, {21, ?RING_END}], +%% fabric_util:worker_ranges(R1)), +%% ?assert(fabric_dict:is_key(Shard1, R1)), +%% +%% % Split overlap (shard overlap multiple workers) +%% Shard2 = mk_shard("node-3", [0, 20]), +%% Shards2 = fabric_dict:store(Shard2, nil, Shards), +%% R2 = remove_overlapping_shards(Shard2, Shards2, Cb), +%% ?assertEqual([{0, 20}, {21, ?RING_END}], +%% fabric_util:worker_ranges(R2)), +%% ?assert(fabric_dict:is_key(Shard2, R2)). +%% +%% +%% get_shard_replacements_test() -> +%% Unused = [mk_shard(N, [B, E]) || {N, B, E} <- [ +%% {"n1", 11, 20}, {"n1", 21, ?RING_END}, +%% {"n2", 0, 4}, {"n2", 5, 10}, {"n2", 11, 20}, +%% {"n3", 0, 21, ?RING_END} +%% ]], +%% Used = [mk_shard(N, [B, E]) || {N, B, E} <- [ +%% {"n2", 21, ?RING_END}, +%% {"n3", 0, 10}, {"n3", 11, 20} +%% ]], +%% Res = lists:sort(get_shard_replacements_int(Unused, Used)), +%% % Notice that [0, 10] range can be replaced by spawning the [0, 4] and [5, +%% % 10] workers on n1 +%% Expect = [ +%% {[0, 10], [mk_shard("n2", [0, 4]), mk_shard("n2", [5, 10])]}, +%% {[11, 20], [mk_shard("n1", [11, 20]), mk_shard("n2", [11, 20])]}, +%% {[21, ?RING_END], [mk_shard("n1", [21, ?RING_END])]} +%% ], +%% ?assertEqual(Expect, Res). +%% +%% +%% mk_cnts(Ranges, NoNodes) -> +%% orddict:from_list([{Shard,nil} +%% || Shard <- +%% lists:flatten(lists:map( +%% fun(Range) -> +%% mk_shards(NoNodes,Range,[]) +%% end, Ranges))] +%% ). +%% +%% mk_shards(0,_Range,Shards) -> +%% Shards; +%% mk_shards(NoNodes,Range,Shards) -> +%% Name ="node-" ++ integer_to_list(NoNodes), +%% mk_shards(NoNodes-1,Range, [mk_shard(Name, Range) | Shards]). +%% +%% +%% mk_shard(Name, Range) -> +%% Node = list_to_atom(Name), +%% BName = list_to_binary(Name), +%% #shard{name = BName, node = Node, range = Range}. diff --git a/src/fabric/src/fabric_view_changes.erl b/src/fabric/src/fabric_view_changes.erl index febbd3169..3f684a3cc 100644 --- a/src/fabric/src/fabric_view_changes.erl +++ b/src/fabric/src/fabric_view_changes.erl @@ -637,184 +637,184 @@ increment_changes_epoch() -> application:set_env(fabric, changes_epoch, os:timestamp()). -unpack_seq_setup() -> - meck:new(mem3), - meck:new(fabric_view), - meck:expect(mem3, get_shard, fun(_, _, _) -> {ok, #shard{}} end), - meck:expect(fabric_ring, is_progress_possible, fun(_) -> true end), - ok. - - -unpack_seqs_test_() -> - { - setup, - fun unpack_seq_setup/0, - fun (_) -> meck:unload() end, - [ - t_unpack_seqs() - ] - }. - - -t_unpack_seqs() -> - ?_test(begin - % BigCouch 0.3 style. - assert_shards("23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA"), - - % BigCouch 0.4 style. - assert_shards([23423,<<"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA">>]), - - % BigCouch 0.4 style (as string). - assert_shards("[23423,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - assert_shards("[23423 ,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - assert_shards("[23423, \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - assert_shards("[23423 , \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), - - % with internal hypen - assert_shards("651-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" - "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" - "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"), - assert_shards([651,"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" - "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" - "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"]), - - % CouchDB 1.2 style - assert_shards("\"23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" - "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" - "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"") - end). - - -assert_shards(Packed) -> - ?assertMatch([{#shard{},_}|_], unpack_seqs(Packed, <<"foo">>)). - - -find_replacements_test() -> - % None of the workers are in the live list of shard but there is a - % replacement on n3 for the full range. It should get picked instead of - % the two smaller one on n2. - Workers1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), - AllShards1 = [ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10), - mk_shard("n3", 0, ?RING_END) - ], - {WorkersRes1, Dead1, Reps1} = find_replacements(Workers1, AllShards1), - ?assertEqual([], WorkersRes1), - ?assertEqual(Workers1, Dead1), - ?assertEqual([mk_shard("n3", 0, ?RING_END)], Reps1), - - % None of the workers are in the live list of shards and there is a - % split replacement from n2 (range [0, 10] replaced with [0, 4], [5, 10]) - Workers2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), - AllShards2 = [ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10) - ], - {WorkersRes2, Dead2, Reps2} = find_replacements(Workers2, AllShards2), - ?assertEqual([], WorkersRes2), - ?assertEqual(Workers2, Dead2), - ?assertEqual([ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10) - ], lists:sort(Reps2)), - - % One worker is available and one needs to be replaced. Replacement will be - % from two split shards - Workers3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), - AllShards3 = [ - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10), - mk_shard("n2", 11, ?RING_END) - ], - {WorkersRes3, Dead3, Reps3} = find_replacements(Workers3, AllShards3), - ?assertEqual(mk_workers([{"n2", 11, ?RING_END}]), WorkersRes3), - ?assertEqual(mk_workers([{"n1", 0, 10}]), Dead3), - ?assertEqual([ - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10) - ], lists:sort(Reps3)), - - % All workers are available. Make sure they are not killed even if there is - % a longer (single) shard to replace them. - Workers4 = mk_workers([{"n1", 0, 10}, {"n1", 11, ?RING_END}]), - AllShards4 = [ - mk_shard("n1", 0, 10), - mk_shard("n1", 11, ?RING_END), - mk_shard("n2", 0, 4), - mk_shard("n2", 5, 10), - mk_shard("n3", 0, ?RING_END) - ], - {WorkersRes4, Dead4, Reps4} = find_replacements(Workers4, AllShards4), - ?assertEqual(Workers4, WorkersRes4), - ?assertEqual([], Dead4), - ?assertEqual([], Reps4). - - -mk_workers(NodesRanges) -> - mk_workers(NodesRanges, nil). - -mk_workers(NodesRanges, Val) -> - orddict:from_list([{mk_shard(N, B, E), Val} || {N, B, E} <- NodesRanges]). - - -mk_shard(Name, B, E) -> - Node = list_to_atom(Name), - BName = list_to_binary(Name), - #shard{name = BName, node = Node, range = [B, E]}. - - -find_split_shard_replacements_test() -> - % One worker is can be replaced and one can't - Dead1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), - Shards1 = [ - mk_shard("n1", 0, 4), - mk_shard("n1", 5, 10), - mk_shard("n3", 11, ?RING_END) - ], - {Workers1, ShardsLeft1} = find_split_shard_replacements(Dead1, Shards1), - ?assertEqual(mk_workers([{"n1", 0, 4}, {"n1", 5, 10}], 42), Workers1), - ?assertEqual([mk_shard("n3", 11, ?RING_END)], ShardsLeft1), - - % All workers can be replaced - one by 1 shard, another by 3 smaller shards - Dead2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), - Shards2 = [ - mk_shard("n1", 0, 10), - mk_shard("n2", 11, 12), - mk_shard("n2", 13, 14), - mk_shard("n2", 15, ?RING_END) - ], - {Workers2, ShardsLeft2} = find_split_shard_replacements(Dead2, Shards2), - ?assertEqual(mk_workers([ - {"n1", 0, 10}, - {"n2", 11, 12}, - {"n2", 13, 14}, - {"n2", 15, ?RING_END} - ], 42), Workers2), - ?assertEqual([], ShardsLeft2), - - % No workers can be replaced. Ranges match but they are on different nodes - Dead3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), - Shards3 = [ - mk_shard("n2", 0, 10), - mk_shard("n3", 11, ?RING_END) - ], - {Workers3, ShardsLeft3} = find_split_shard_replacements(Dead3, Shards3), - ?assertEqual([], Workers3), - ?assertEqual(Shards3, ShardsLeft3). +%% unpack_seq_setup() -> +%% meck:new(mem3), +%% meck:new(fabric_view), +%% meck:expect(mem3, get_shard, fun(_, _, _) -> {ok, #shard{}} end), +%% meck:expect(fabric_ring, is_progress_possible, fun(_) -> true end), +%% ok. +%% +%% +%% unpack_seqs_test_() -> +%% { +%% setup, +%% fun unpack_seq_setup/0, +%% fun (_) -> meck:unload() end, +%% [ +%% t_unpack_seqs() +%% ] +%% }. +%% +%% +%% t_unpack_seqs() -> +%% ?_test(begin +%% % BigCouch 0.3 style. +%% assert_shards("23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA"), +%% +%% % BigCouch 0.4 style. +%% assert_shards([23423,<<"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA">>]), +%% +%% % BigCouch 0.4 style (as string). +%% assert_shards("[23423,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% assert_shards("[23423 ,\"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% assert_shards("[23423, \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% assert_shards("[23423 , \"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"]"), +%% +%% % with internal hypen +%% assert_shards("651-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" +%% "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" +%% "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"), +%% assert_shards([651,"g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwNDLXMwBCwxygOFMiQ" +%% "5L8____sxJTcalIUgCSSfZgReE4FTmAFMWDFYXgVJQAUlQPVuSKS1EeC5BkaABSQHXz8" +%% "VgJUbgAonB_VqIPfoUHIArvE7T6AUQh0I1-WQAzp1XB"]), +%% +%% % CouchDB 1.2 style +%% assert_shards("\"23423-g1AAAAE7eJzLYWBg4MhgTmHgS0ktM3QwND" +%% "LXMwBCwxygOFMiQ5L8____sxIZcKlIUgCSSfZgRUw4FTmAFMWDFTHiVJQAUlSPX1Ee" +%% "C5BkaABSQHXzsxKZ8StcAFG4H4_bIAoPQBTeJ2j1A4hCUJBkAQC7U1NA\"") +%% end). +%% +%% +%% assert_shards(Packed) -> +%% ?assertMatch([{#shard{},_}|_], unpack_seqs(Packed, <<"foo">>)). +%% +%% +%% find_replacements_test() -> +%% % None of the workers are in the live list of shard but there is a +%% % replacement on n3 for the full range. It should get picked instead of +%% % the two smaller one on n2. +%% Workers1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), +%% AllShards1 = [ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10), +%% mk_shard("n3", 0, ?RING_END) +%% ], +%% {WorkersRes1, Dead1, Reps1} = find_replacements(Workers1, AllShards1), +%% ?assertEqual([], WorkersRes1), +%% ?assertEqual(Workers1, Dead1), +%% ?assertEqual([mk_shard("n3", 0, ?RING_END)], Reps1), +%% +%% % None of the workers are in the live list of shards and there is a +%% % split replacement from n2 (range [0, 10] replaced with [0, 4], [5, 10]) +%% Workers2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), +%% AllShards2 = [ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10) +%% ], +%% {WorkersRes2, Dead2, Reps2} = find_replacements(Workers2, AllShards2), +%% ?assertEqual([], WorkersRes2), +%% ?assertEqual(Workers2, Dead2), +%% ?assertEqual([ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10) +%% ], lists:sort(Reps2)), +%% +%% % One worker is available and one needs to be replaced. Replacement will be +%% % from two split shards +%% Workers3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}]), +%% AllShards3 = [ +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10), +%% mk_shard("n2", 11, ?RING_END) +%% ], +%% {WorkersRes3, Dead3, Reps3} = find_replacements(Workers3, AllShards3), +%% ?assertEqual(mk_workers([{"n2", 11, ?RING_END}]), WorkersRes3), +%% ?assertEqual(mk_workers([{"n1", 0, 10}]), Dead3), +%% ?assertEqual([ +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10) +%% ], lists:sort(Reps3)), +%% +%% % All workers are available. Make sure they are not killed even if there is +%% % a longer (single) shard to replace them. +%% Workers4 = mk_workers([{"n1", 0, 10}, {"n1", 11, ?RING_END}]), +%% AllShards4 = [ +%% mk_shard("n1", 0, 10), +%% mk_shard("n1", 11, ?RING_END), +%% mk_shard("n2", 0, 4), +%% mk_shard("n2", 5, 10), +%% mk_shard("n3", 0, ?RING_END) +%% ], +%% {WorkersRes4, Dead4, Reps4} = find_replacements(Workers4, AllShards4), +%% ?assertEqual(Workers4, WorkersRes4), +%% ?assertEqual([], Dead4), +%% ?assertEqual([], Reps4). +%% +%% +%% mk_workers(NodesRanges) -> +%% mk_workers(NodesRanges, nil). +%% +%% mk_workers(NodesRanges, Val) -> +%% orddict:from_list([{mk_shard(N, B, E), Val} || {N, B, E} <- NodesRanges]). +%% +%% +%% mk_shard(Name, B, E) -> +%% Node = list_to_atom(Name), +%% BName = list_to_binary(Name), +%% #shard{name = BName, node = Node, range = [B, E]}. +%% +%% +%% find_split_shard_replacements_test() -> +%% % One worker is can be replaced and one can't +%% Dead1 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), +%% Shards1 = [ +%% mk_shard("n1", 0, 4), +%% mk_shard("n1", 5, 10), +%% mk_shard("n3", 11, ?RING_END) +%% ], +%% {Workers1, ShardsLeft1} = find_split_shard_replacements(Dead1, Shards1), +%% ?assertEqual(mk_workers([{"n1", 0, 4}, {"n1", 5, 10}], 42), Workers1), +%% ?assertEqual([mk_shard("n3", 11, ?RING_END)], ShardsLeft1), +%% +%% % All workers can be replaced - one by 1 shard, another by 3 smaller shards +%% Dead2 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), +%% Shards2 = [ +%% mk_shard("n1", 0, 10), +%% mk_shard("n2", 11, 12), +%% mk_shard("n2", 13, 14), +%% mk_shard("n2", 15, ?RING_END) +%% ], +%% {Workers2, ShardsLeft2} = find_split_shard_replacements(Dead2, Shards2), +%% ?assertEqual(mk_workers([ +%% {"n1", 0, 10}, +%% {"n2", 11, 12}, +%% {"n2", 13, 14}, +%% {"n2", 15, ?RING_END} +%% ], 42), Workers2), +%% ?assertEqual([], ShardsLeft2), +%% +%% % No workers can be replaced. Ranges match but they are on different nodes +%% Dead3 = mk_workers([{"n1", 0, 10}, {"n2", 11, ?RING_END}], 42), +%% Shards3 = [ +%% mk_shard("n2", 0, 10), +%% mk_shard("n3", 11, ?RING_END) +%% ], +%% {Workers3, ShardsLeft3} = find_split_shard_replacements(Dead3, Shards3), +%% ?assertEqual([], Workers3), +%% ?assertEqual(Shards3, ShardsLeft3). diff --git a/src/fabric/test/eunit/fabric_rpc_purge_tests.erl b/src/fabric/test/eunit/fabric_rpc_purge_tests.erl deleted file mode 100644 index 6db6a70aa..000000000 --- a/src/fabric/test/eunit/fabric_rpc_purge_tests.erl +++ /dev/null @@ -1,307 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(fabric_rpc_purge_tests). - - --include_lib("couch/include/couch_eunit.hrl"). --include_lib("couch/include/couch_db.hrl"). - - --define(TDEF(A), {A, fun A/1}). - -% TODO: Add tests: -% - filter some updates -% - allow for an update that was filtered by a node -% - ignore lagging nodes - -main_test_() -> - { - setup, - spawn, - fun setup_all/0, - fun teardown_all/1, - [ - { - foreach, - fun setup_no_purge/0, - fun teardown_no_purge/1, - lists:map(fun wrap/1, [ - ?TDEF(t_no_purge_no_filter) - ]) - }, - { - foreach, - fun setup_single_purge/0, - fun teardown_single_purge/1, - lists:map(fun wrap/1, [ - ?TDEF(t_filter), - ?TDEF(t_filter_unknown_node), - ?TDEF(t_filter_local_node), - ?TDEF(t_no_filter_old_node), - ?TDEF(t_no_filter_different_node), - ?TDEF(t_no_filter_after_repl) - ]) - }, - { - foreach, - fun setup_multi_purge/0, - fun teardown_multi_purge/1, - lists:map(fun wrap/1, [ - ?TDEF(t_filter), - ?TDEF(t_filter_unknown_node), - ?TDEF(t_filter_local_node), - ?TDEF(t_no_filter_old_node), - ?TDEF(t_no_filter_different_node), - ?TDEF(t_no_filter_after_repl) - ]) - } - ] - }. - - -setup_all() -> - test_util:start_couch(). - - -teardown_all(Ctx) -> - test_util:stop_couch(Ctx). - - -setup_no_purge() -> - {ok, Db} = create_db(), - populate_db(Db), - couch_db:name(Db). - - -teardown_no_purge(DbName) -> - ok = couch_server:delete(DbName, []). - - -setup_single_purge() -> - DbName = setup_no_purge(), - DocId = <<"0003">>, - {ok, OldDoc} = open_doc(DbName, DocId), - purge_doc(DbName, DocId), - {DbName, DocId, OldDoc, 1}. - - -teardown_single_purge({DbName, _, _, _}) -> - teardown_no_purge(DbName). - - -setup_multi_purge() -> - DbName = setup_no_purge(), - DocId = <<"0003">>, - {ok, OldDoc} = open_doc(DbName, DocId), - lists:foreach(fun(I) -> - PDocId = iolist_to_binary(io_lib:format("~4..0b", [I])), - purge_doc(DbName, PDocId) - end, lists:seq(1, 5)), - {DbName, DocId, OldDoc, 3}. - - -teardown_multi_purge(Ctx) -> - teardown_single_purge(Ctx). - - -t_no_purge_no_filter(DbName) -> - DocId = <<"0003">>, - - {ok, OldDoc} = open_doc(DbName, DocId), - NewDoc = create_update(OldDoc, 2), - - rpc_update_doc(DbName, NewDoc), - - {ok, CurrDoc} = open_doc(DbName, DocId), - ?assert(CurrDoc /= OldDoc), - ?assert(CurrDoc == NewDoc). - - -t_filter({DbName, DocId, OldDoc, _PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, 0), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)). - - -t_filter_unknown_node({DbName, DocId, OldDoc, _PSeq}) -> - % Unknown nodes are assumed to start at PurgeSeq = 0 - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, 0), - - {Pos, [Rev | _]} = OldDoc#doc.revs, - RROpt = {read_repair, [{'blargh@127.0.0.1', [{Pos, Rev}]}]}, - rpc_update_doc(DbName, OldDoc, [RROpt]), - - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)). - - -t_no_filter_old_node({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - % The random UUID is to generate a badarg exception when - % we try and convert it to an existing atom. - create_purge_checkpoint(DbName, 0, couch_uuids:random()), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -t_no_filter_different_node({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - % Create a valid purge for a different node - TgtNode = list_to_binary(atom_to_list('notfoo@127.0.0.1')), - create_purge_checkpoint(DbName, 0, TgtNode), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -t_filter_local_node({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - % Create a valid purge for a different node - TgtNode = list_to_binary(atom_to_list('notfoo@127.0.0.1')), - create_purge_checkpoint(DbName, 0, TgtNode), - - % Add a local node rev to the list of node revs. It should - % be filtered out - {Pos, [Rev | _]} = OldDoc#doc.revs, - RROpts = [{read_repair, [ - {tgt_node(), [{Pos, Rev}]}, - {node(), [{1, <<"123">>}]} - ]}], - rpc_update_doc(DbName, OldDoc, RROpts), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -t_no_filter_after_repl({DbName, DocId, OldDoc, PSeq}) -> - ?assertEqual({not_found, missing}, open_doc(DbName, DocId)), - create_purge_checkpoint(DbName, PSeq), - - rpc_update_doc(DbName, OldDoc), - - ?assertEqual({ok, OldDoc}, open_doc(DbName, DocId)). - - -wrap({Name, Fun}) -> - fun(Arg) -> - {timeout, 60, {atom_to_list(Name), fun() -> - process_flag(trap_exit, true), - Fun(Arg) - end}} - end. - - -create_db() -> - DbName = ?tempdb(), - couch_db:create(DbName, [?ADMIN_CTX]). - - -populate_db(Db) -> - Docs = lists:map(fun(Idx) -> - DocId = lists:flatten(io_lib:format("~4..0b", [Idx])), - #doc{ - id = list_to_binary(DocId), - body = {[{<<"int">>, Idx}, {<<"vsn">>, 2}]} - } - end, lists:seq(1, 100)), - {ok, _} = couch_db:update_docs(Db, Docs). - - -open_doc(DbName, DocId) -> - couch_util:with_db(DbName, fun(Db) -> - couch_db:open_doc(Db, DocId, []) - end). - - -create_update(Doc, NewVsn) -> - #doc{ - id = DocId, - revs = {Pos, [Rev | _] = Revs}, - body = {Props} - } = Doc, - NewProps = lists:keyreplace(<<"vsn">>, 1, Props, {<<"vsn">>, NewVsn}), - NewRev = couch_hash:md5_hash(term_to_binary({DocId, Rev, {NewProps}})), - Doc#doc{ - revs = {Pos + 1, [NewRev | Revs]}, - body = {NewProps} - }. - - -purge_doc(DbName, DocId) -> - {ok, Doc} = open_doc(DbName, DocId), - {Pos, [Rev | _]} = Doc#doc.revs, - PInfo = {couch_uuids:random(), DocId, [{Pos, Rev}]}, - Resp = couch_util:with_db(DbName, fun(Db) -> - couch_db:purge_docs(Db, [PInfo], []) - end), - ?assertEqual({ok, [{ok, [{Pos, Rev}]}]}, Resp). - - -create_purge_checkpoint(DbName, PurgeSeq) -> - create_purge_checkpoint(DbName, PurgeSeq, tgt_node_bin()). - - -create_purge_checkpoint(DbName, PurgeSeq, TgtNode) when is_binary(TgtNode) -> - Resp = couch_util:with_db(DbName, fun(Db) -> - SrcUUID = couch_db:get_uuid(Db), - TgtUUID = couch_uuids:random(), - CPDoc = #doc{ - id = mem3_rep:make_purge_id(SrcUUID, TgtUUID), - body = {[ - {<<"target_node">>, TgtNode}, - {<<"purge_seq">>, PurgeSeq} - ]} - }, - couch_db:update_docs(Db, [CPDoc], []) - end), - ?assertMatch({ok, [_]}, Resp). - - -rpc_update_doc(DbName, Doc) -> - {Pos, [Rev | _]} = Doc#doc.revs, - RROpt = {read_repair, [{tgt_node(), [{Pos, Rev}]}]}, - rpc_update_doc(DbName, Doc, [RROpt]). - - -rpc_update_doc(DbName, Doc, Opts) -> - Ref = erlang:make_ref(), - put(rexi_from, {self(), Ref}), - fabric_rpc:update_docs(DbName, [Doc], Opts), - Reply = test_util:wait(fun() -> - receive - {Ref, Reply} -> - Reply - after 0 -> - wait - end - end), - ?assertEqual({ok, []}, Reply). - - -tgt_node() -> - 'foo@127.0.0.1'. - - -tgt_node_bin() -> - iolist_to_binary(atom_to_list(tgt_node())). diff --git a/src/fabric/test/eunit/fabric_rpc_tests.erl b/src/fabric/test/eunit/fabric_rpc_tests.erl new file mode 100644 index 000000000..b94caf659 --- /dev/null +++ b/src/fabric/test/eunit/fabric_rpc_tests.erl @@ -0,0 +1,181 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric_rpc_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + + +-define(TDEF(A), {A, fun A/1}). + + +main_test_() -> + { + setup, + spawn, + fun setup_all/0, + fun teardown_all/1, + [ + { + foreach, + fun setup_no_db_or_config/0, + fun teardown_db/1, + lists:map(fun wrap/1, [ + ?TDEF(t_no_config_non_shard_db_create_succeeds) + ]) + }, + { + foreach, + fun setup_shard/0, + fun teardown_noop/1, + lists:map(fun wrap/1, [ + ?TDEF(t_no_db), + ?TDEF(t_no_config_db_create_fails_for_shard), + ?TDEF(t_no_config_db_create_fails_for_shard_rpc) + ]) + }, + { + foreach, + fun setup_shard/0, + fun teardown_db/1, + lists:map(fun wrap/1, [ + ?TDEF(t_db_create_with_config) + ]) + } + + ] + }. + + +setup_all() -> + test_util:start_couch([rexi, mem3, fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup_no_db_or_config() -> + ?tempdb(). + + +setup_shard() -> + ?tempshard(). + + +teardown_noop(_DbName) -> + ok. + +teardown_db(DbName) -> + ok = couch_server:delete(DbName, []). + + +wrap({Name, Fun}) -> + fun(Arg) -> + {timeout, 60, {atom_to_list(Name), fun() -> + process_flag(trap_exit, true), + Fun(Arg) + end}} + end. + + +t_no_db(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])). + + +t_no_config_non_shard_db_create_succeeds(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + ?assertEqual(DbName, mem3:dbname(DbName)), + ?assertMatch({ok, _}, mem3_util:get_or_create_db(DbName, [?ADMIN_CTX])). + + +t_no_config_db_create_fails_for_shard(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + ?assertException(throw, {error, missing_target}, mem3_util:get_or_create_db(DbName, [?ADMIN_CTX])). + + +t_no_config_db_create_fails_for_shard_rpc(DbName) -> + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + ?assertException(throw, {error, missing_target}, mem3_util:get_or_create_db(DbName, [?ADMIN_CTX])), + MFA = {fabric_rpc, get_db_info, [DbName]}, + Ref = rexi:cast(node(), self(), MFA), + Resp = receive + Resp0 -> Resp0 + end, + ?assertMatch({Ref, {'rexi_EXIT', {{error, missing_target}, _}}}, Resp). + + +t_db_create_with_config(DbName) -> + MDbName = mem3:dbname(DbName), + DbDoc = #doc{id = MDbName, body = test_db_doc()}, + + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + + %% Write the dbs db config + couch_util:with_db(mem3_sync:shards_db(), fun(Db) -> + ?assertEqual({not_found, missing}, couch_db:open_doc(Db, MDbName, [ejson_body])), + ?assertMatch({ok, _}, couch_db:update_docs(Db, [DbDoc])) + end), + + %% Test get_or_create_db loads the properties as expected + couch_util:with_db(mem3_sync:shards_db(), fun(Db) -> + ?assertMatch({ok, _}, couch_db:open_doc(Db, MDbName, [ejson_body])), + ?assertEqual({not_found, no_db_file}, couch_db:open_int(DbName, [?ADMIN_CTX])), + Resp = mem3_util:get_or_create_db(DbName, [?ADMIN_CTX]), + ?assertMatch({ok, _}, Resp), + {ok, LDb} = Resp, + + {Body} = test_db_doc(), + DbProps = mem3_util:get_shard_opts(Body), + {Props} = case couch_db_engine:get_props(LDb) of + undefined -> {[]}; + Else -> {Else} + end, + %% We don't normally store the default engine name + EngineProps = case couch_db_engine:get_engine(LDb) of + couch_bt_engine -> + []; + EngineName -> + [{engine, EngineName}] + end, + ?assertEqual([{props, Props} | EngineProps], DbProps) + end). + + +test_db_doc() -> + {[ + {<<"shard_suffix">>, ".1584997648"}, + {<<"changelog">>, [ + [<<"add">>, <<"00000000-7fffffff">>, <<"node1@127.0.0.1">>], + [<<"add">>, <<"00000000-7fffffff">>, <<"node2@127.0.0.1">>], + [<<"add">>, <<"00000000-7fffffff">>, <<"node3@127.0.0.1">>], + [<<"add">>, <<"80000000-ffffffff">>, <<"node1@127.0.0.1">>], + [<<"add">>, <<"80000000-ffffffff">>, <<"node2@127.0.0.1">>], + [<<"add">>, <<"80000000-ffffffff">>, <<"node3@127.0.0.1">>] + ]}, + {<<"by_node">>, {[ + {<<"node1@127.0.0.1">>, [<<"00000000-7fffffff">>, <<"80000000-ffffffff">>]}, + {<<"node2@127.0.0.1">>, [<<"00000000-7fffffff">>, <<"80000000-ffffffff">>]}, + {<<"node3@127.0.0.1">>, [<<"00000000-7fffffff">>, <<"80000000-ffffffff">>]} + ]}}, + {<<"by_range">>, {[ + {<<"00000000-7fffffff">>, [<<"node1@127.0.0.1">>, <<"node2@127.0.0.1">>, <<"node3@127.0.0.1">>]}, + {<<"80000000-ffffffff">>, [<<"node1@127.0.0.1">>, <<"node2@127.0.0.1">>, <<"node3@127.0.0.1">>]} + ]}}, + {<<"props">>, {[ + {partitioned, true}, + {hash, [couch_partition, hash, []]} + ]}} + ]}. + diff --git a/src/fabric/test/fabric2_active_tasks_tests.erl b/src/fabric/test/fabric2_active_tasks_tests.erl new file mode 100644 index 000000000..891450027 --- /dev/null +++ b/src/fabric/test/fabric2_active_tasks_tests.erl @@ -0,0 +1,120 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_active_tasks_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2_test.hrl"). + + +-define(JOB_TYPE, <<"fabric2_active_tasks_tests_type">>). +-define(JOB_ID, <<"job_id">>). + + +active_tasks_test_() -> + { + "Test cleanup of stale indices", + { + setup, + fun setup_all/0, + fun cleanup_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(no_active_tasks_defined), + ?TDEF_FE(empty_map_info), + ?TDEF_FE(can_read_active_tasks), + ?TDEF_FE(only_running_tasks_appear) + ] + } + } + }. + + +setup_all() -> + Ctx = test_util:start_couch([fabric, couch_jobs]), + couch_jobs:set_type_timeout(?JOB_TYPE, 5000), + meck:new(couch_jobs, [passthrough]), + meck:expect(couch_jobs, get_types, 1, [?JOB_TYPE]), + Ctx. + + +cleanup_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +setup() -> + ok = couch_jobs:add(undefined, ?JOB_TYPE, ?JOB_ID, #{}), + ok. + + +cleanup(_) -> + meck:reset(couch_jobs), + couch_jobs:remove(undefined, ?JOB_TYPE, ?JOB_ID). + + +no_active_tasks_defined(_) -> + {ok, Job1, #{}} = couch_jobs:accept(?JOB_TYPE), + ?assertEqual([], fabric2_active_tasks:get_active_tasks()), + ok = couch_jobs:finish(undefined, Job1). + + +empty_map_info(_) -> + {ok, Job1, Data} = couch_jobs:accept(?JOB_TYPE), + + Data1 = fabric2_active_tasks:update_active_task_info(Data, #{}), + {ok, Job2} = couch_jobs:update(undefined, Job1, Data1), + ?assertEqual([], fabric2_active_tasks:get_active_tasks()), + ok = couch_jobs:finish(undefined, Job2). + + +can_read_active_tasks(_) -> + {ok, Job1, Data} = couch_jobs:accept(?JOB_TYPE), + + Info = #{<<"x">> => 1}, + Data1 = fabric2_active_tasks:update_active_task_info(Data, Info), + {ok, Job2} = couch_jobs:update(undefined, Job1, Data1), + ?assertEqual([#{<<"x">> => 1}], fabric2_active_tasks:get_active_tasks()), + + Info1 = fabric2_active_tasks:get_active_task_info(Data1), + Info2 = Info1#{<<"y">> => 2}, + Data2 = fabric2_active_tasks:update_active_task_info(Data1, Info2), + {ok, Job3} = couch_jobs:update(undefined, Job2, Data2), + ?assertEqual([#{<<"x">> => 1, <<"y">> => 2}], + fabric2_active_tasks:get_active_tasks()), + ok = couch_jobs:finish(undefined, Job3). + + +only_running_tasks_appear(_) -> + {ok, Job1, Data} = couch_jobs:accept(?JOB_TYPE), + + Info = #{<<"x">> => 1}, + Data1 = fabric2_active_tasks:update_active_task_info(Data, Info), + {ok, Job2} = couch_jobs:update(undefined, Job1, Data1), + + ?assertEqual([#{<<"x">> => 1}], fabric2_active_tasks:get_active_tasks()), + {ok, _} = couch_jobs:resubmit(undefined, Job2), + + ok = couch_jobs:finish(undefined, Job2), + + ?assertEqual([], fabric2_active_tasks:get_active_tasks()), + {ok, Job3, #{}} = couch_jobs:accept(?JOB_TYPE), + ?assertEqual([#{<<"x">> => 1}], fabric2_active_tasks:get_active_tasks()), + + ok = couch_jobs:finish(undefined, Job3), + ?assertEqual([], fabric2_active_tasks:get_active_tasks()). diff --git a/src/fabric/test/fabric2_changes_fold_tests.erl b/src/fabric/test/fabric2_changes_fold_tests.erl new file mode 100644 index 000000000..8541d973c --- /dev/null +++ b/src/fabric/test/fabric2_changes_fold_tests.erl @@ -0,0 +1,241 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_changes_fold_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +-define(DOC_COUNT, 25). + + +changes_fold_test_() -> + { + "Test changes fold operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(fold_changes_basic), + ?TDEF_FE(fold_changes_since_now), + ?TDEF_FE(fold_changes_since_seq), + ?TDEF_FE(fold_changes_basic_rev), + ?TDEF_FE(fold_changes_since_now_rev), + ?TDEF_FE(fold_changes_since_seq_rev), + ?TDEF_FE(fold_changes_basic_tx_too_old), + ?TDEF_FE(fold_changes_reverse_tx_too_old), + ?TDEF_FE(fold_changes_tx_too_old_with_single_row_emits), + ?TDEF_FE(fold_changes_since_seq_tx_too_old), + ?TDEF_FE(fold_changes_not_progressing) + ] + } + } + }. + + +setup_all() -> + Ctx = test_util:start_couch([fabric]), + meck:new(erlfdb, [passthrough]), + Ctx. + + +teardown_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +setup() -> + fabric2_test_util:tx_too_old_mock_erlfdb(), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Rows = lists:map(fun(Val) -> + DocId = fabric2_util:uuid(), + Doc = #doc{ + id = DocId, + body = {[{<<"value">>, Val}]} + }, + {ok, RevId} = fabric2_db:update_doc(Db, Doc, []), + UpdateSeq = fabric2_db:get_update_seq(Db), + #{ + id => DocId, + sequence => UpdateSeq, + deleted => false, + rev_id => RevId + } + end, lists:seq(1, ?DOC_COUNT)), + {Db, Rows}. + + +cleanup({Db, _DocIdRevs}) -> + fabric2_test_util:tx_too_old_reset_errors(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +fold_changes_basic({Db, DocRows}) -> + ?assertEqual(lists:reverse(DocRows), changes(Db)). + + +fold_changes_since_now({Db, _}) -> + ?assertEqual([], changes(Db, now, [])). + + +fold_changes_since_seq({_, []}) -> + ok; + +fold_changes_since_seq({Db, [Row | RestRows]}) -> + #{sequence := Since} = Row, + ?assertEqual(lists:reverse(RestRows), changes(Db, Since, [])), + fold_changes_since_seq({Db, RestRows}). + + +fold_changes_basic_rev({Db, _}) -> + ?assertEqual([], changes(Db, 0, [{dir, rev}])). + + +fold_changes_since_now_rev({Db, DocRows}) -> + ?assertEqual(DocRows, changes(Db, now, [{dir, rev}])). + + +fold_changes_since_seq_rev({_, []}) -> + ok; + +fold_changes_since_seq_rev({Db, DocRows}) -> + #{sequence := Since} = lists:last(DocRows), + Opts = [{dir, rev}], + ?assertEqual(DocRows, changes(Db, Since, Opts)), + RestRows = lists:sublist(DocRows, length(DocRows) - 1), + fold_changes_since_seq_rev({Db, RestRows}). + + +fold_changes_basic_tx_too_old({Db, DocRows0}) -> + DocRows = lists:reverse(DocRows0), + + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual(DocRows, changes(Db)), + + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual(DocRows, changes(Db)), + + % Blow up in user fun but after emitting one row successfully. + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual(DocRows, changes(Db)), + + % Blow up before last document + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual(DocRows, changes(Db)), + + % Emit one value, then blow up in user function and then blow up twice in + % fold_range. But it is not enough to stop the iteration. + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 2}), + ?assertEqual(DocRows, changes(Db)). + + +fold_changes_reverse_tx_too_old({Db, DocRows}) -> + Opts = [{dir, rev}], + + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual([], changes(Db, 0, Opts)), + + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual([], changes(Db, 0, Opts)), + + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + % Blow up in user fun but after emitting one row successfully. + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + % Blow up before last document + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual(DocRows, changes(Db, now, Opts)), + + % Emit value, blow up in user function, and twice in fold_range + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 2}), + ?assertEqual(DocRows, changes(Db, now, Opts)). + + +fold_changes_tx_too_old_with_single_row_emits({Db, DocRows0}) -> + % This test does a few basic operations while forcing erlfdb range fold to + % emit a single row at a time, thus forcing it to use continuations while + % also inducing tx errors + Opts = [{target_bytes, 1}], + DocRows = lists:reverse(DocRows0), + + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual(DocRows, changes(Db, 0, Opts)), + + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual(DocRows, changes(Db, 0, Opts)), + + % Blow up in user fun but after emitting one row successfully. + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual(DocRows, changes(Db, 0, Opts)), + + % Blow up before last document + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual(DocRows, changes(Db, 0, Opts)). + + +fold_changes_since_seq_tx_too_old({Db, Rows}) -> + % Blow up after after a successful emit, then twice + % in range fold call. Also re-use already existing basic + % fold_changes_since_seq test function. + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 2}), + fold_changes_since_seq({Db, Rows}). + + +fold_changes_not_progressing({Db, _}) -> + % Fail in first fold range call. + fabric2_test_util:tx_too_old_setup_errors(5, 0), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Fail in first user fun call. + fabric2_test_util:tx_too_old_setup_errors(0, 5), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Blow up in last user fun call + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 5}, 0), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Blow up in user function after one success. + fabric2_test_util:tx_too_old_setup_errors({1, 5}, 0), + ?assertError(fold_range_not_progressing, changes(Db)), + + % Emit value, blow up in user function, then keep blowing up in fold_range. + fabric2_test_util:tx_too_old_setup_errors({1, 1}, {1, 4}), + ?assertError(fold_range_not_progressing, changes(Db)). + + +fold_fun(#{} = Change, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + {ok, [Change | Acc]}. + + +changes(Db) -> + changes(Db, 0, []). + + +changes(Db, Since, Opts) -> + {ok, Rows} = fabric2_db:fold_changes(Db, Since, fun fold_fun/2, [], Opts), + Rows. diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl new file mode 100644 index 000000000..3d90c65b5 --- /dev/null +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -0,0 +1,750 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_crud_tests). + + +-include_lib("fabric/include/fabric2.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +-define(PDICT_RAISE_IN_ERLFDB_WAIT, '$pdict_raise_in_erlfdb_wait'). + + +crud_test_() -> + { + "Test database CRUD operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(create_db), + ?TDEF_FE(open_db), + ?TDEF_FE(delete_db), + ?TDEF_FE(recreate_db), + ?TDEF_FE(recreate_db_interactive), + ?TDEF_FE(recreate_db_non_interactive), + ?TDEF_FE(undelete_db), + ?TDEF_FE(remove_deleted_db), + ?TDEF_FE(scheduled_remove_deleted_db, 15), + ?TDEF_FE(scheduled_remove_deleted_dbs, 15), + ?TDEF_FE(old_db_handle), + ?TDEF_FE(list_dbs), + ?TDEF_FE(list_dbs_user_fun), + ?TDEF_FE(list_dbs_user_fun_partial), + ?TDEF_FE(list_dbs_info), + ?TDEF_FE(list_dbs_info_partial), + ?TDEF_FE(list_dbs_tx_too_old), + ?TDEF_FE(list_dbs_info_tx_too_old, 15), + ?TDEF_FE(list_deleted_dbs_info), + ?TDEF_FE(list_deleted_dbs_info_user_fun), + ?TDEF_FE(list_deleted_dbs_info_user_fun_partial), + ?TDEF_FE(list_deleted_dbs_info_with_timestamps), + ?TDEF_FE(get_info_wait_retry_on_tx_too_old), + ?TDEF_FE(get_info_wait_retry_on_tx_abort) + ] + } + } + }. + + +scheduled_db_remove_error_test_() -> + { + "Test scheduled database remove operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(scheduled_remove_deleted_dbs_with_error) + ] + } + } + }. + + +setup_all() -> + meck:new(config, [passthrough]), + meck:expect(config, get_integer, fun + ("couchdb", "db_expiration_schedule_sec", _) -> 2; + ("couchdb", "db_expiration_retention_sec", _) -> 0; + (_, _, Default) -> Default + end), + Ctx = test_util:start_couch([fabric, couch_jobs]), + meck:new(erlfdb, [passthrough]), + meck:new(fabric2_db_expiration, [passthrough]), + Ctx. + + +teardown_all(Ctx) -> + meck:unload(), + test_util:stop_couch(Ctx). + + +setup() -> + fabric2_test_util:tx_too_old_mock_erlfdb(). + + +cleanup(_) -> + ok = config:set("couchdb", "db_expiration_enabled", "false", false), + ok = config:set("couchdb", "enable_database_recovery", "false", false), + fabric2_test_util:tx_too_old_reset_errors(), + reset_fail_erfdb_wait(), + meck:reset([fabric2_db_expiration]), + meck:reset([config]), + meck:reset([erlfdb]). + + +create_db(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + ?assertEqual({error, file_exists}, fabric2_db:create(DbName, [])). + + +open_db(_) -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + % Opening the cached version + ?assertMatch({ok, _}, fabric2_db:open(DbName, [])), + + % Remove from cache and re-open + true = ets:delete(fabric2_server, DbName), + ?assertMatch({ok, _}, fabric2_db:open(DbName, [])). + + +delete_db(_) -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + ?assertError(database_does_not_exist, fabric2_db:open(DbName, [])). + + +recreate_db(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db1} = fabric2_db:open(DbName, []), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db1)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db2} = fabric2_db:open(DbName, []), + + CurOpts = [{uuid, fabric2_db:get_uuid(Db2)}], + ?assertMatch({ok, #{}}, fabric2_db:open(DbName, CurOpts)), + + % Remove from cache to force it to open through fabric2_fdb:open + fabric2_server:remove(DbName), + ?assertMatch({ok, #{}}, fabric2_db:open(DbName, CurOpts)), + + BadOpts = [{uuid, fabric2_util:uuid()}], + ?assertError(database_does_not_exist, fabric2_db:open(DbName, BadOpts)), + + % Remove from cache to force it to open through fabric2_fdb:open + fabric2_server:remove(DbName), + ?assertError(database_does_not_exist, fabric2_db:open(DbName, BadOpts)). + + +recreate_db_interactive(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db1} = fabric2_db:open(DbName, [{interactive, true}]), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db1)). + + +recreate_db_non_interactive(_) -> + % This is also the default case, but we check that parsing the `false` open + % value works correctly. + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db1} = fabric2_db:open(DbName, [{interactive, false}]), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db1)). + + +undelete_db(_) -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + [DeletedDbInfo] = [Info || Info <- Infos, + DbName == proplists:get_value(db_name, Info) + ], + Timestamp = proplists:get_value(timestamp, DeletedDbInfo), + + OldTS = <<"2020-01-01T12:00:00Z">>, + ?assertEqual(not_found, fabric2_db:undelete(DbName, DbName, OldTS, [])), + BadDbName = <<"bad_dbname">>, + ?assertEqual(not_found, + fabric2_db:undelete(BadDbName, BadDbName, Timestamp, [])), + + ok = fabric2_db:undelete(DbName, DbName, Timestamp, []), + {ok, AllDbInfos} = fabric2_db:list_dbs_info(), + ?assert(is_db_info_member(DbName, AllDbInfos)). + + +remove_deleted_db(_) -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + [DeletedDbInfo] = [Info || Info <- Infos, + DbName == proplists:get_value(db_name, Info) + ], + Timestamp = proplists:get_value(timestamp, DeletedDbInfo), + OldTS = <<"2020-01-01T12:00:00Z">>, + ?assertEqual(not_found, + fabric2_db:delete(DbName, [{deleted_at, OldTS}])), + BadDbName = <<"bad_dbname">>, + ?assertEqual(not_found, + fabric2_db:delete(BadDbName, [{deleted_at, Timestamp}])), + + ok = fabric2_db:delete(DbName, [{deleted_at, Timestamp}]), + {ok, Infos2} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos2], + ?assert(not lists:member(DbName, DeletedDbs)). + + +scheduled_remove_deleted_db(_) -> + ok = config:set("couchdb", "db_expiration_enabled", "true", false), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + + meck:reset(fabric2_db_expiration), + meck:wait(fabric2_db_expiration, process_expirations, '_', 7000), + + ?assertEqual(ok, test_util:wait(fun() -> + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], + case lists:member(DbName, DeletedDbs) of + true -> wait; + false -> ok + end + end)). + + +scheduled_remove_deleted_dbs(_) -> + ok = config:set("couchdb", "db_expiration_enabled", "true", false), + ok = config:set("couchdb", "db_expiration_batch", "2", false), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + DbNameList = [create_and_delete_db() || _I <- lists:seq(1, 5)], + meck:reset(fabric2_db_expiration), + meck:wait(fabric2_db_expiration, process_expirations, '_', 7000), + + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs = [proplists:get_value(db_name, Info) || Info <- Infos], + lists:map(fun(DbName) -> + ?assert(not lists:member(DbName, DeletedDbs)) + end, DbNameList). + + +scheduled_remove_deleted_dbs_with_error(_) -> + meck:expect(fabric2_db_expiration, process_expirations, fun(_, _) -> + throw(process_expirations_error) + end), + + {Pid, Ref} = spawn_monitor(fun() -> + fabric2_db_expiration:cleanup(true) + end), + receive + {'DOWN', Ref, process, Pid, Error} -> + ?assertMatch({job_error, process_expirations_error, _}, Error) + end, + JobType = <<"db_expiration">>, + JobId = <<"db_expiration_job">>, + FQJobId = <<JobId/binary, "-", 1:16/integer>>, + + ?assertMatch({ok, _}, couch_jobs:get_job_data(undefined, JobType, FQJobId)), + {ok, JobState} = couch_jobs:get_job_state(undefined, JobType, FQJobId), + ?assert(lists:member(JobState, [pending, running])). + + +old_db_handle(_) -> + % db hard deleted + DbName1 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName1, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), + {ok, Db1} = fabric2_db:open(DbName1, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db1)), + ?assertEqual(ok, fabric2_db:delete(DbName1, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db1)), + + % db soft deleted + DbName2 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName2, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), + {ok, Db2} = fabric2_db:open(DbName2, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db2)), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName2, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db2)), + + % db soft deleted and re-created + DbName3 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName3, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName3, [])), + {ok, Db3} = fabric2_db:open(DbName3, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db3)), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName3, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName3, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db3)), + + % db soft deleted and undeleted + DbName4 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName4, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName4, [])), + {ok, Db4} = fabric2_db:open(DbName4, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db4)), + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName4, [])), + {ok, Infos} = fabric2_db:list_deleted_dbs_info(), + [DeletedDbInfo] = [Info || Info <- Infos, + DbName4 == proplists:get_value(db_name, Info) + ], + Timestamp = proplists:get_value(timestamp, DeletedDbInfo), + ok = fabric2_db:undelete(DbName4, DbName4, Timestamp, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db4)), + + % db hard deleted and re-created + DbName5 = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName5, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName5, [])), + {ok, Db5} = fabric2_db:open(DbName5, []), + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db5)), + ok = config:set("couchdb", "enable_database_recovery", "false", false), + ?assertEqual(ok, fabric2_db:delete(DbName5, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName5, [])), + ?assertError(database_does_not_exist, fabric2_db:get_db_info(Db5)). + + +list_dbs(_) -> + DbName = ?tempdb(), + AllDbs1 = fabric2_db:list_dbs(), + + ?assert(is_list(AllDbs1)), + ?assert(not lists:member(DbName, AllDbs1)), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + AllDbs2 = fabric2_db:list_dbs(), + ?assert(lists:member(DbName, AllDbs2)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + AllDbs3 = fabric2_db:list_dbs(), + ?assert(not lists:member(DbName, AllDbs3)). + + +list_dbs_user_fun(_) -> + ?assertMatch({ok, _}, fabric2_db:create(?tempdb(), [])), + + UserFun = fun(Row, Acc) -> {ok, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_dbs(UserFun, [], []), + + Base = lists:foldl(fun(DbName, Acc) -> + [{row, [{id, DbName}]} | Acc] + end, [{meta, []}], fabric2_db:list_dbs()), + Expect = lists:reverse(Base, [complete]), + + ?assertEqual(Expect, lists:reverse(UserAcc)). + + +list_dbs_user_fun_partial(_) -> + UserFun = fun(Row, Acc) -> {stop, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_dbs(UserFun, [], []), + ?assertEqual([{meta, []}], UserAcc). + + +list_dbs_info(_) -> + DbName = ?tempdb(), + {ok, AllDbInfos1} = fabric2_db:list_dbs_info(), + + ?assert(is_list(AllDbInfos1)), + ?assert(not is_db_info_member(DbName, AllDbInfos1)), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + {ok, AllDbInfos2} = fabric2_db:list_dbs_info(), + ?assert(is_db_info_member(DbName, AllDbInfos2)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + {ok, AllDbInfos3} = fabric2_db:list_dbs_info(), + ?assert(not is_db_info_member(DbName, AllDbInfos3)). + + +list_dbs_info_partial(_) -> + UserFun = fun(Row, Acc) -> {stop, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_dbs_info(UserFun, [], []), + ?assertEqual([{meta, []}], UserAcc). + + +list_dbs_tx_too_old(_) -> + DbName1 = ?tempdb(), + DbName2 = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), + ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), + + UserFun = fun(Row, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + {ok, [Row | Acc]} + end, + + % Get get expected output without any transactions timing out + Dbs = fabric2_db:list_dbs(UserFun, [], []), + + % Blow up in fold range + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in fold_range after emitting one row + fabric2_test_util:tx_too_old_setup_errors(0, {1, 1}), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in user fun + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in user fun after emitting one row + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + % Blow up in in user fun and fold range + fabric2_test_util:tx_too_old_setup_errors(1, {1, 1}), + ?assertEqual(Dbs, fabric2_db:list_dbs(UserFun, [], [])), + + ok = fabric2_db:delete(DbName1, []), + ok = fabric2_db:delete(DbName2, []). + + +list_dbs_info_tx_too_old(_) -> + % list_dbs_info uses a queue of 100 futures to fetch db infos in parallel + % so create more than 100 dbs so make sure we have 100+ dbs in our test + + DbCount = 101, + DbNames = fabric2_util:pmap(fun(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + DbName + end, lists:seq(1, DbCount)), + + UserFun = fun(Row, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + {ok, [Row | Acc]} + end, + + % This is the expected return with no tx timeouts + {ok, DbInfos} = fabric2_db:list_dbs_info(UserFun, [], []), + + % Blow up in fold range on the first call + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in fold_range after emitting one row + fabric2_test_util:tx_too_old_setup_errors(0, {1, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in fold_range after emitting 99 rows + fabric2_test_util:tx_too_old_setup_errors(0, {DbCount - 2, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in fold_range after emitting 100 rows + fabric2_test_util:tx_too_old_setup_errors(0, {DbCount - 1, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun after emitting one row + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun after emitting 99 rows + fabric2_test_util:tx_too_old_setup_errors({DbCount - 2, 1}, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in user fun after emitting 100 rows + fabric2_test_util:tx_too_old_setup_errors({DbCount - 1, 1}, 0), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + % Blow up in in user fun and fold range + fabric2_test_util:tx_too_old_setup_errors(1, {1, 1}), + ?assertEqual({ok, DbInfos}, fabric2_db:list_dbs_info(UserFun, [], [])), + + fabric2_util:pmap(fun(DbName) -> + ?assertEqual(ok, fabric2_db:delete(DbName, [])) + end, DbNames). + + +list_deleted_dbs_info(_) -> + DbName = ?tempdb(), + AllDbs1 = fabric2_db:list_dbs(), + + ?assert(is_list(AllDbs1)), + ?assert(not lists:member(DbName, AllDbs1)), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + AllDbs2 = fabric2_db:list_dbs(), + ?assert(lists:member(DbName, AllDbs2)), + + ok = config:set("couchdb", "enable_database_recovery", "true", false), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + + AllDbs3 = fabric2_db:list_dbs(), + ?assert(not lists:member(DbName, AllDbs3)), + {ok, DeletedDbsInfo} = fabric2_db:list_deleted_dbs_info(), + DeletedDbs4 = get_deleted_dbs(DeletedDbsInfo), + ?assert(lists:member(DbName, DeletedDbs4)). + + +list_deleted_dbs_info_user_fun(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + + UserFun = fun(Row, Acc) -> {ok, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_deleted_dbs_info(UserFun, [], []), + {ok, DeletedDbsInfo} = fabric2_db:list_deleted_dbs_info(), + + Base = lists:foldl(fun(DbInfo, Acc) -> + [{row, DbInfo} | Acc] + end, [{meta, []}], DeletedDbsInfo), + Expect = lists:reverse(Base, [complete]), + + ?assertEqual(Expect, lists:reverse(UserAcc)). + + +list_deleted_dbs_info_user_fun_partial(_) -> + UserFun = fun(Row, Acc) -> {stop, [Row | Acc]} end, + {ok, UserAcc} = fabric2_db:list_deleted_dbs_info(UserFun, [], []), + ?assertEqual([{meta, []}], UserAcc). + + +list_deleted_dbs_info_with_timestamps(_) -> + ok = config:set("couchdb", "enable_database_recovery", "true", false), + + % Cycle our database three times to get multiple entries + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + timer:sleep(1100), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + timer:sleep(1100), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + + UserFun = fun(Row, Acc) -> + case Row of + {row, Info} -> {ok, [Info | Acc]}; + _ -> {ok, Acc} + end + end, + + Options1 = [{start_key, DbName}, {end_key, <<DbName/binary, 255>>}], + {ok, Infos1} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options1), + TimeStamps1 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos1], + ?assertEqual(3, length(TimeStamps1)), + + [FirstTS, MiddleTS, LastTS] = lists:sort(TimeStamps1), + + % Check we can skip over the FirstTS + Options2 = [{start_key, [DbName, MiddleTS]}, {end_key, [DbName, LastTS]}], + {ok, Infos2} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options2), + TimeStamps2 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos2], + ?assertEqual(2, length(TimeStamps2)), + ?assertEqual([LastTS, MiddleTS], TimeStamps2), % because foldl reverses + + % Check we an end before LastTS + Options3 = [{start_key, DbName}, {end_key, [DbName, MiddleTS]}], + {ok, Infos3} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options3), + TimeStamps3 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos3], + ?assertEqual([MiddleTS, FirstTS], TimeStamps3), + + % Check that {dir, rev} works without timestamps + Options4 = [{start_key, DbName}, {end_key, DbName}, {dir, rev}], + {ok, Infos4} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options4), + TimeStamps4 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos4], + ?assertEqual([FirstTS, MiddleTS, LastTS], TimeStamps4), + + % Check that reverse with keys returns correctly + Options5 = [ + {start_key, [DbName, MiddleTS]}, + {end_key, [DbName, FirstTS]}, + {dir, rev} + ], + {ok, Infos5} = fabric2_db:list_deleted_dbs_info(UserFun, [], Options5), + TimeStamps5 = [fabric2_util:get_value(timestamp, Info) || Info <- Infos5], + ?assertEqual([FirstTS, MiddleTS], TimeStamps5). + + +get_info_wait_retry_on_tx_too_old(_) -> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db} = fabric2_db:open(DbName, []), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + % Simulate being in a list_dbs_info callback + ok = erlfdb:set_option(Tx, disallow_writes), + + InfoF = fabric2_fdb:get_info_future(Tx, DbPrefix), + {info_future, _, _, ChangesF, _, _, _} = InfoF, + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1007}, 3), + ?assertError({erlfdb_error, 1007}, fabric2_fdb:get_info_wait(InfoF)), + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1007}, 2), + ?assertMatch([{_, _} | _], fabric2_fdb:get_info_wait(InfoF)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])) + end). + + +get_info_wait_retry_on_tx_abort(_)-> + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + + {ok, Db} = fabric2_db:open(DbName, []), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + % Simulate being in a list_dbs_info callback + ok = erlfdb:set_option(Tx, disallow_writes), + + InfoF = fabric2_fdb:get_info_future(Tx, DbPrefix), + {info_future, _, _, ChangesF, _, _, _} = InfoF, + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1025}, 3), + ?assertError({erlfdb_error, 1025}, fabric2_fdb:get_info_wait(InfoF)), + + raise_in_erlfdb_wait(ChangesF, {erlfdb_error, 1025}, 2), + ?assertMatch([{_, _} | _], fabric2_fdb:get_info_wait(InfoF)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])) + end). + + +reset_fail_erfdb_wait() -> + erase(?PDICT_RAISE_IN_ERLFDB_WAIT), + meck:expect(erlfdb, wait, fun(F) -> meck:passthrough([F]) end). + + +raise_in_erlfdb_wait(Future, Error, Count) -> + put(?PDICT_RAISE_IN_ERLFDB_WAIT, Count), + meck:expect(erlfdb, wait, fun + (F) when F =:= Future -> + case get(?PDICT_RAISE_IN_ERLFDB_WAIT) of + N when is_integer(N), N > 0 -> + put(?PDICT_RAISE_IN_ERLFDB_WAIT, N - 1), + error(Error); + _ -> + meck:passthrough([F]) + end; + (F) -> + meck:passthrough([F]) + end). + + +is_db_info_member(_, []) -> + false; + +is_db_info_member(DbName, [DbInfo | RestInfos]) -> + case lists:keyfind(db_name, 1, DbInfo) of + {db_name, DbName} -> + true; + _E -> + is_db_info_member(DbName, RestInfos) + end. + +get_deleted_dbs(DeletedDbInfos) -> + lists:foldl(fun(DbInfo, Acc) -> + DbName = fabric2_util:get_value(db_name, DbInfo), + [DbName | Acc] + end, [], DeletedDbInfos). + + +create_and_delete_db() -> + DbName = ?tempdb(), + ?assertError(database_does_not_exist, fabric2_db:delete(DbName, [])), + + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])), + ?assertEqual(true, ets:member(fabric2_server, DbName)), + + ?assertEqual(ok, fabric2_db:delete(DbName, [])), + ?assertEqual(false, ets:member(fabric2_server, DbName)), + DbName. diff --git a/src/fabric/test/fabric2_db_fold_doc_docids_tests.erl b/src/fabric/test/fabric2_db_fold_doc_docids_tests.erl new file mode 100644 index 000000000..b55da5363 --- /dev/null +++ b/src/fabric/test/fabric2_db_fold_doc_docids_tests.erl @@ -0,0 +1,150 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_fold_doc_docids_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + +doc_fold_test_() -> + { + "Test document fold operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(fold_docs_simple), + ?TDEF_FE(fold_docs_lots), + ?TDEF_FE(fold_docs_local), + ?TDEF_FE(fold_docs_mixed) +] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +cleanup(Db) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +fold_docs_simple(Db) -> + Docs = create_docs(Db, 10), + run_fold(Db, Docs). + + +fold_docs_lots(Db) -> + Docs = create_docs(Db, 110), + run_fold(Db, Docs). + + +fold_docs_local(Db) -> + Docs = create_local_docs(Db, 10), + run_fold(Db, Docs). + + +fold_docs_mixed(Db) -> + Docs = create_mixed_docs(Db, 200), + run_fold(Db, Docs). + + +run_fold(Db, Docs) -> + SortedIds = get_ids(Docs), + Ids = shuffle(SortedIds), + Returned = fabric2_fdb:transactional(Db, fun (TxDb) -> + fold_docs_return_ids(TxDb, Ids) + end), + ?assertEqual(Returned, Ids). + + +fold_docs_return_ids(TxDb, Ids) -> + CB = fun(DocId, _Doc, Acc) -> + {ok, Acc ++ [DocId]} + end, + {ok, Acc} = fabric2_db:fold_docs(TxDb, Ids, CB, [], []), + Acc. + +get_ids(Docs) -> + lists:map(fun (#doc{id = Id}) -> Id end, Docs). + + +create_mixed_docs(Db, Size) -> + fabric2_fdb:transactional(Db, fun (TxDb) -> + Docs = lists:map(fun (Id) -> + case Id rem 3 == 0 of + true -> create_local_doc(Id); + false -> create_doc(Id) + end + end, lists:seq(0, Size)), + {ok, _} = fabric2_db:update_docs(TxDb, Docs), + Docs + end). + + +create_local_docs(Db, Size) -> + fabric2_fdb:transactional(Db, fun (TxDb) -> + Docs = lists:map(fun (Id) -> + create_local_doc(Id) + end, lists:seq(0, Size)), + {ok, _} = fabric2_db:update_docs(TxDb, Docs), + Docs + end). + + +create_docs(Db, Size) -> + fabric2_fdb:transactional(Db, fun (TxDb) -> + Docs = lists:map(fun (Id) -> + create_doc(Id) + end, lists:seq(0, Size)), + {ok, _} = fabric2_db:update_docs(TxDb, Docs), + Docs + end). + + +create_doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary([<<"doc-">>, integer_to_binary(Id)])}, + {<<"value">>, 1} + ]}). + + +create_local_doc(Id) -> + couch_doc:from_json_obj({[ + {<<"_id">>, list_to_binary([<<"_local/doc-">>, integer_to_binary(Id)])}, + {<<"value">>, 1} + ]}). + + +shuffle(List) when is_list(List) -> + Tagged = [{rand:uniform(), Item} || Item <- List], + {_, Randomized} = lists:unzip(lists:sort(Tagged)), + Randomized. diff --git a/src/fabric/test/fabric2_db_misc_tests.erl b/src/fabric/test/fabric2_db_misc_tests.erl new file mode 100644 index 000000000..23532144d --- /dev/null +++ b/src/fabric/test/fabric2_db_misc_tests.erl @@ -0,0 +1,445 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_misc_tests). + + +% Used in events_listener test +-export([ + event_listener_callback/3 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2.hrl"). +-include("fabric2_test.hrl"). + + +misc_test_() -> + { + "Test database miscellaney", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(empty_db_info), + ?TDEF(accessors), + ?TDEF(set_revs_limit), + ?TDEF(set_security), + ?TDEF(get_security_cached), + ?TDEF(is_system_db), + ?TDEF(validate_dbname), + ?TDEF(validate_doc_ids), + ?TDEF(get_doc_info), + ?TDEF(get_doc_info_not_found), + ?TDEF(get_full_doc_info), + ?TDEF(get_full_doc_info_not_found), + ?TDEF(get_full_doc_infos), + ?TDEF(ensure_full_commit), + ?TDEF(metadata_bump), + ?TDEF(db_version_bump), + ?TDEF(db_cache_doesnt_evict_newer_handles), + ?TDEF(events_listener) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [{user_ctx, ?ADMIN_USER}]), + {DbName, Db, Ctx}. + + +cleanup({_DbName, Db, Ctx}) -> + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +empty_db_info({DbName, Db, _}) -> + {ok, Info} = fabric2_db:get_db_info(Db), + ?assertEqual(DbName, fabric2_util:get_value(db_name, Info)), + ?assertEqual(0, fabric2_util:get_value(doc_count, Info)), + ?assertEqual(0, fabric2_util:get_value(doc_del_count, Info)), + ?assert(is_binary(fabric2_util:get_value(update_seq, Info))), + InfoUUID = fabric2_util:get_value(uuid, Info), + UUID = fabric2_db:get_uuid(Db), + ?assertEqual(UUID, InfoUUID). + + +accessors({DbName, Db, _}) -> + SeqZero = fabric2_fdb:vs_to_seq(fabric2_util:seq_zero_vs()), + ?assertEqual(DbName, fabric2_db:name(Db)), + ?assertEqual(0, fabric2_db:get_instance_start_time(Db)), + ?assertEqual(nil, fabric2_db:get_pid(Db)), + ?assertEqual(undefined, fabric2_db:get_before_doc_update_fun(Db)), + ?assertEqual(undefined, fabric2_db:get_after_doc_read_fun(Db)), + ?assertEqual(SeqZero, fabric2_db:get_committed_update_seq(Db)), + ?assertEqual(SeqZero, fabric2_db:get_compacted_seq(Db)), + ?assertEqual(SeqZero, fabric2_db:get_update_seq(Db)), + ?assertEqual(nil, fabric2_db:get_compactor_pid(Db)), + ?assertEqual(1000, fabric2_db:get_revs_limit(Db)), + ?assertMatch(<<_:32/binary>>, fabric2_db:get_uuid(Db)), + ?assertEqual(true, fabric2_db:is_db(Db)), + ?assertEqual(false, fabric2_db:is_db(#{})), + ?assertEqual(false, fabric2_db:is_partitioned(Db)), + ?assertEqual(false, fabric2_db:is_clustered(Db)). + + +set_revs_limit({DbName, Db, _}) -> + ?assertEqual(ok, fabric2_db:set_revs_limit(Db, 500)), + {ok, Db2} = fabric2_db:open(DbName, []), + ?assertEqual(500, fabric2_db:get_revs_limit(Db2)). + + +set_security({DbName, Db, _}) -> + SecObj = {[ + {<<"admins">>, {[ + {<<"names">>, []}, + {<<"roles">>, []} + ]}} + ]}, + ?assertEqual(ok, fabric2_db:set_security(Db, SecObj)), + {ok, Db2} = fabric2_db:open(DbName, []), + ?assertEqual(SecObj, fabric2_db:get_security(Db2)). + + +get_security_cached({DbName, Db, _}) -> + OldSecObj = fabric2_db:get_security(Db), + SecObj = {[ + {<<"admins">>, {[ + {<<"names">>, [<<"foo1">>]}, + {<<"roles">>, []} + ]}} + ]}, + + % Set directly so we don't auto-update the local cache + {ok, Db1} = fabric2_db:open(DbName, [?ADMIN_CTX]), + ?assertMatch({ok, #{}}, fabric2_fdb:transactional(Db1, fun(TxDb) -> + fabric2_fdb:set_config(TxDb, security_doc, SecObj) + end)), + + {ok, Db2} = fabric2_db:open(DbName, [?ADMIN_CTX]), + ?assertEqual(OldSecObj, fabric2_db:get_security(Db2, [{max_age, 1000}])), + + timer:sleep(100), + ?assertEqual(SecObj, fabric2_db:get_security(Db2, [{max_age, 50}])), + + ?assertEqual(ok, fabric2_db:set_security(Db2, OldSecObj)). + + +is_system_db({DbName, Db, _}) -> + ?assertEqual(false, fabric2_db:is_system_db(Db)), + ?assertEqual(false, fabric2_db:is_system_db_name("foo")), + ?assertEqual(false, fabric2_db:is_system_db_name(DbName)), + ?assertEqual(true, fabric2_db:is_system_db_name(<<"_replicator">>)), + ?assertEqual(true, fabric2_db:is_system_db_name("_replicator")), + ?assertEqual(true, fabric2_db:is_system_db_name(<<"foo/_replicator">>)), + ?assertEqual(false, fabric2_db:is_system_db_name(<<"f.o/_replicator">>)), + ?assertEqual(false, fabric2_db:is_system_db_name(<<"foo/bar">>)). + + +validate_dbname(_) -> + Tests = [ + {ok, <<"foo">>}, + {ok, "foo"}, + {ok, <<"_replicator">>}, + {error, illegal_database_name, <<"Foo">>}, + {error, illegal_database_name, <<"foo|bar">>}, + {error, illegal_database_name, <<"Foo">>}, + {error, database_name_too_long, << + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789" + >>} + ], + CheckFun = fun + ({ok, DbName}) -> + ?assertEqual(ok, fabric2_db:validate_dbname(DbName)); + ({error, Reason, DbName}) -> + Expect = {error, {Reason, DbName}}, + ?assertEqual(Expect, fabric2_db:validate_dbname(DbName)) + end, + try + % Don't allow epi plugins to interfere with test results + meck:new(couch_epi, [passthrough]), + meck:expect(couch_epi, decide, 5, no_decision), + lists:foreach(CheckFun, Tests) + after + % Unload within the test to minimize interference with other tests + meck:unload() + end. + + +validate_doc_ids(_) -> + % Basic test with default max infinity length + ?assertEqual(ok, fabric2_db:validate_docid(<<"foo">>)), + + Tests = [ + {ok, <<"_local/foo">>}, + {ok, <<"_design/foo">>}, + {ok, <<"0123456789012345">>}, + {illegal_docid, <<"">>}, + {illegal_docid, <<"_design/">>}, + {illegal_docid, <<"_local/">>}, + {illegal_docid, <<"01234567890123456">>}, + {illegal_docid, <<16#FF>>}, + {illegal_docid, <<"_bad">>}, + {illegal_docid, null} + ], + CheckFun = fun + ({ok, DocId}) -> + ?assertEqual(ok, fabric2_db:validate_docid(DocId)); + ({illegal_docid, DocId}) -> + ?assertThrow({illegal_docid, _}, fabric2_db:validate_docid(DocId)) + end, + + try + meck:new(config, [passthrough]), + meck:expect( + config, + get, + ["couchdb", "max_document_id_length", "infinity"], + "16" + ), + lists:foreach(CheckFun, Tests), + + % Check that fabric2_db_plugin can't allow for + % underscore prefixed dbs + meck:new(fabric2_db_plugin, [passthrough]), + meck:expect(fabric2_db_plugin, validate_docid, ['_'], true), + ?assertEqual(ok, fabric2_db:validate_docid(<<"_wheee">>)) + after + % Unloading within the test as the config mock + % interferes with the db version bump test. + meck:unload() + end. + + +get_doc_info({_, Db, _}) -> + DocId = couch_uuids:random(), + InsertDoc = #doc{ + id = DocId, + body = {[{<<"foo">>, true}]} + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, InsertDoc, []), + + DI = fabric2_db:get_doc_info(Db, DocId), + ?assert(is_record(DI, doc_info)), + #doc_info{ + id = DIDocId, + high_seq = HighSeq, + revs = Revs + } = DI, + + ?assertEqual(DocId, DIDocId), + ?assert(is_binary(HighSeq)), + ?assertMatch([#rev_info{}], Revs), + + [#rev_info{ + rev = DIRev, + seq = Seq, + deleted = Deleted, + body_sp = BodySp + }] = Revs, + + ?assertEqual({Pos, Rev}, DIRev), + ?assert(is_binary(Seq)), + ?assert(not Deleted), + ?assertMatch(undefined, BodySp). + + +get_doc_info_not_found({_, Db, _}) -> + DocId = couch_uuids:random(), + ?assertEqual(not_found, fabric2_db:get_doc_info(Db, DocId)). + + +get_full_doc_info({_, Db, _}) -> + DocId = couch_uuids:random(), + InsertDoc = #doc{ + id = DocId, + body = {[{<<"foo">>, true}]} + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, InsertDoc, []), + FDI = fabric2_db:get_full_doc_info(Db, DocId), + + ?assert(is_record(FDI, full_doc_info)), + #full_doc_info{ + id = FDIDocId, + update_seq = UpdateSeq, + deleted = Deleted, + rev_tree = RevTree, + sizes = SizeInfo + } = FDI, + + ?assertEqual(DocId, FDIDocId), + ?assert(is_binary(UpdateSeq)), + ?assert(not Deleted), + ?assertMatch([{Pos, {Rev, _, []}}], RevTree), + ?assertEqual(#size_info{}, SizeInfo). + + +get_full_doc_info_not_found({_, Db, _}) -> + DocId = couch_uuids:random(), + ?assertEqual(not_found, fabric2_db:get_full_doc_info(Db, DocId)). + + +get_full_doc_infos({_, Db, _}) -> + DocIds = lists:map(fun(_) -> + DocId = couch_uuids:random(), + Doc = #doc{id = DocId}, + {ok, _} = fabric2_db:update_doc(Db, Doc, []), + DocId + end, lists:seq(1, 5)), + + FDIs = fabric2_db:get_full_doc_infos(Db, DocIds), + lists:zipwith(fun(DocId, FDI) -> + ?assertEqual(DocId, FDI#full_doc_info.id) + end, DocIds, FDIs). + + +ensure_full_commit({_, Db, _}) -> + ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db)), + ?assertEqual({ok, 0}, fabric2_db:ensure_full_commit(Db, 5)). + + +metadata_bump({DbName, _, _}) -> + % Call open again here to make sure we have a version in the cache + % as we'll be checking if that version gets its metadata bumped + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + + % Emulate a remote client bumping the metadataversion + {ok, Fdb} = application:get_env(fabric, db), + erlfdb:transactional(Fdb, fun(Tx) -> + erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>) + end), + NewMDVersion = erlfdb:transactional(Fdb, fun(Tx) -> + erlfdb:wait(erlfdb:get(Tx, ?METADATA_VERSION_KEY)) + end), + + % Save timetamp before ensure_current/1 is called + TsBeforeEnsureCurrent = erlang:monotonic_time(millisecond), + + % Perform a random operation which calls ensure_current + {ok, _} = fabric2_db:get_db_info(Db), + + % Check that db handle in the cache got the new metadata version + % and that check_current_ts was updated + CachedDb = fabric2_server:fetch(DbName, undefined), + ?assertMatch(#{ + md_version := NewMDVersion, + check_current_ts := Ts + } when Ts >= TsBeforeEnsureCurrent, CachedDb). + + +db_version_bump({DbName, _, _}) -> + % Call open again here to make sure we have a version in the cache + % as we'll be checking if that version gets its metadata bumped + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + + % Emulate a remote client bumping db version. We don't go through the + % regular db open + update security doc or something like that to make sure + % we don't touch the local cache + #{db_prefix := DbPrefix} = Db, + DbVersionKey = erlfdb_tuple:pack({?DB_VERSION}, DbPrefix), + {ok, Fdb} = application:get_env(fabric, db), + NewDbVersion = fabric2_util:uuid(), + erlfdb:transactional(Fdb, fun(Tx) -> + erlfdb:set(Tx, DbVersionKey, NewDbVersion), + erlfdb:set_versionstamped_value(Tx, ?METADATA_VERSION_KEY, <<0:112>>) + end), + + % Perform a random operation which calls ensure_current + {ok, _} = fabric2_db:get_db_info(Db), + + % After previous operation, the cache should have been cleared + ?assertMatch(undefined, fabric2_server:fetch(DbName, undefined)), + + % Call open again and check that we have the latest db version + {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + + % Check that db handle in the cache got the new metadata version + ?assertMatch(#{db_version := NewDbVersion}, Db2). + + +db_cache_doesnt_evict_newer_handles({DbName, _, _}) -> + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, ?ADMIN_USER}]), + CachedDb = fabric2_server:fetch(DbName, undefined), + + StaleDb = Db#{md_version := <<0>>}, + + ok = fabric2_server:store(StaleDb), + ?assertEqual(CachedDb, fabric2_server:fetch(DbName, undefined)), + + ?assert(not fabric2_server:maybe_update(StaleDb)), + ?assertEqual(CachedDb, fabric2_server:fetch(DbName, undefined)), + + ?assert(not fabric2_server:maybe_remove(StaleDb)), + ?assertEqual(CachedDb, fabric2_server:fetch(DbName, undefined)). + + +events_listener({DbName, Db, _}) -> + Opts = [ + {dbname, DbName}, + {uuid, fabric2_db:get_uuid(Db)}, + {timeout, 100} + ], + + Fun = event_listener_callback, + {ok, Pid} = fabric2_events:link_listener(?MODULE, Fun, self(), Opts), + unlink(Pid), + Ref = monitor(process, Pid), + + NextEvent = fun(Timeout) -> + receive + {Pid, Evt} when is_pid(Pid) -> Evt; + {'DOWN', Ref, _, _, normal} -> exited_normal + after Timeout -> + timeout + end + end, + + Doc1 = #doc{id = couch_uuids:random()}, + {ok, _} = fabric2_db:update_doc(Db, Doc1, []), + ?assertEqual(updated, NextEvent(1000)), + + % Just one update, then expect a timeout + ?assertEqual(timeout, NextEvent(500)), + + Doc2 = #doc{id = couch_uuids:random()}, + {ok, _} = fabric2_db:update_doc(Db, Doc2, []), + ?assertEqual(updated, NextEvent(1000)), + + % Process is still alive + ?assert(is_process_alive(Pid)), + + % Recreate db + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]), + {ok, _} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ?assertEqual(deleted, NextEvent(1000)), + + % After db is deleted or re-created listener should die + ?assertEqual(exited_normal, NextEvent(1000)). + + +% Callback for event_listener function +event_listener_callback(_DbName, Event, TestPid) -> + TestPid ! {self(), Event}, + {ok, TestPid}. diff --git a/src/fabric/test/fabric2_db_security_tests.erl b/src/fabric/test/fabric2_db_security_tests.erl new file mode 100644 index 000000000..3d7167a00 --- /dev/null +++ b/src/fabric/test/fabric2_db_security_tests.erl @@ -0,0 +1,219 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_security_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +security_test_() -> + { + "Test database security operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(check_is_admin), + ?TDEF(check_is_not_admin), + ?TDEF(check_is_admin_role), + ?TDEF(check_is_not_admin_role), + ?TDEF(check_is_member_name), + ?TDEF(check_is_not_member_name), + ?TDEF(check_is_member_role), + ?TDEF(check_is_not_member_role), + ?TDEF(check_admin_is_member), + ?TDEF(check_is_member_of_public_db), + ?TDEF(check_set_user_ctx), + ?TDEF(check_forbidden), + ?TDEF(check_fail_no_opts), + ?TDEF(check_fail_name_null), + ?TDEF(check_forbidden_with_interactive_reopen) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + DbName = ?tempdb(), + PubDbName = ?tempdb(), + {ok, Db1} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ok = set_test_security(Db1), + {ok, _} = fabric2_db:create(PubDbName, [?ADMIN_CTX]), + {DbName, PubDbName, Ctx}. + + +cleanup({DbName, PubDbName, Ctx}) -> + ok = fabric2_db:delete(DbName, []), + ok = fabric2_db:delete(PubDbName, []), + test_util:stop_couch(Ctx). + + +set_test_security(Db) -> + SecProps = {[ + {<<"admins">>, {[ + {<<"names">>, [<<"admin_name1">>, <<"admin_name2">>]}, + {<<"roles">>, [<<"admin_role1">>, <<"admin_role2">>]} + ]}}, + {<<"members">>, {[ + {<<"names">>, [<<"member_name1">>, <<"member_name2">>]}, + {<<"roles">>, [<<"member_role1">>, <<"member_role2">>]} + ]}} + ]}, + ok = fabric2_db:set_security(Db, SecProps). + + +check_is_admin({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"admin_name1">>}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_admin(Db)). + + +check_is_not_admin({DbName, _, _}) -> + {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, #user_ctx{}}]), + ?assertThrow( + {unauthorized, <<"You are not authorized", _/binary>>}, + fabric2_db:check_is_admin(Db1) + ), + + UserCtx = #user_ctx{name = <<"member_name1">>}, + {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow( + {forbidden, <<"You are not a db or server admin.">>}, + fabric2_db:check_is_admin(Db2) + ). + + +check_is_admin_role({DbName, _, _}) -> + UserCtx = #user_ctx{roles = [<<"admin_role1">>]}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_admin(Db)). + + +check_is_not_admin_role({DbName, _, _}) -> + UserCtx = #user_ctx{ + name = <<"member_name1">>, + roles = [<<"member_role1">>] + }, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow( + {forbidden, <<"You are not a db or server admin.">>}, + fabric2_db:check_is_admin(Db) + ). + + +check_is_member_name({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"member_name1">>}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db)). + + +check_is_not_member_name({DbName, _, _}) -> + {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, #user_ctx{}}]), + ?assertThrow( + {unauthorized, <<"You are not authorized", _/binary>>}, + fabric2_db:check_is_member(Db1) + ), + + UserCtx = #user_ctx{name = <<"foo">>}, + {ok, Db2} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow( + {forbidden, <<"You are not allowed to access", _/binary>>}, + fabric2_db:check_is_member(Db2) + ). + + +check_is_member_role({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"member_role1">>]}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db)). + + +check_is_not_member_role({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow( + {forbidden, <<"You are not allowed to access", _/binary>>}, + fabric2_db:check_is_member(Db) + ). + + +check_admin_is_member({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"admin_name1">>}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db)). + + +check_is_member_of_public_db({_, PubDbName, _}) -> + {ok, Db1} = fabric2_db:open(PubDbName, [{user_ctx, #user_ctx{}}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db1)), + + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + {ok, Db2} = fabric2_db:open(PubDbName, [{user_ctx, UserCtx}]), + ?assertEqual(ok, fabric2_db:check_is_member(Db2)). + + +check_set_user_ctx({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"admin_role1">>]}, + {ok, Db1} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertEqual(UserCtx, fabric2_db:get_user_ctx(Db1)). + + +check_forbidden({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"foo">>, roles = [<<"bar">>]}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db)). + + +check_fail_no_opts({DbName, _, _}) -> + {ok, Db} = fabric2_db:open(DbName, []), + ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). + + +check_fail_name_null({DbName, _, _}) -> + UserCtx = #user_ctx{name = null}, + {ok, Db} = fabric2_db:open(DbName, [{user_ctx, UserCtx}]), + ?assertThrow({unauthorized, _}, fabric2_db:get_db_info(Db)). + + +check_forbidden_with_interactive_reopen({DbName, _, _}) -> + UserCtx = #user_ctx{name = <<"foo">>}, + Options = [{user_ctx, UserCtx}, {interactive, true}], + + {ok, Db1} = fabric2_db:open(DbName, Options), + + % Verify foo is forbidden by default + ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db1)), + + % Allow foo + {ok, Db2} = fabric2_db:open(DbName, [?ADMIN_CTX]), + AllowFoo = {[ + {<<"members">>, {[ + {<<"names">>, [<<"foo">>]} + ]}} + ]}, + ok = fabric2_db:set_security(Db2, AllowFoo), + + ?assertMatch({ok, _}, fabric2_db:get_db_info(Db1)), + + % Recreate test db instance with the default security + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]), + {ok, Db3} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ok = set_test_security(Db3), + + % Original handle is forbidden to again + ?assertThrow({forbidden, _}, fabric2_db:get_db_info(Db1)). diff --git a/src/fabric/test/fabric2_db_size_tests.erl b/src/fabric/test/fabric2_db_size_tests.erl new file mode 100644 index 000000000..0bb9c7a8e --- /dev/null +++ b/src/fabric/test/fabric2_db_size_tests.erl @@ -0,0 +1,918 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_db_size_tests). + +-export([ + random_body/0 +]). + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +db_size_test_() -> + { + "Test database size calculations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(new_doc), + ?TDEF(replicate_new_doc), + ?TDEF(edit_doc), + ?TDEF(delete_doc), + ?TDEF(create_conflict), + ?TDEF(replicate_new_winner), + ?TDEF(replicate_deep_deleted), + ?TDEF(delete_winning_revision), + ?TDEF(delete_conflict_revision), + ?TDEF(replicate_existing_revision), + ?TDEF(replicate_shared_history), + ?TDEF(create_doc_with_attachment), + ?TDEF(add_attachment_in_update), + ?TDEF(add_second_attachment), + ?TDEF(delete_attachment), + ?TDEF(delete_one_attachment), + ?TDEF(delete_all_attachments), + ?TDEF(re_add_attachment), + ?TDEF(update_and_remove_attachment), + ?TDEF(replicate_new_doc_with_attachment), + ?TDEF(replicate_remove_attachment), + ?TDEF(replicate_stub_attachment), + ?TDEF(replicate_stub_and_new_attachment), + ?TDEF(replicate_new_att_to_winner), + ?TDEF(replicate_change_att_to_winner), + ?TDEF(replicate_rem_att_from_winner), + ?TDEF(replicate_stub_to_winner), + ?TDEF(replicate_new_att_to_conflict), + ?TDEF(replicate_change_att_to_conflict), + ?TDEF(replicate_rem_att_from_conflict), + ?TDEF(replicate_stub_to_conflict), + ?TDEF(create_local_doc), + ?TDEF(update_local_doc), + ?TDEF(delete_local_doc), + ?TDEF(recreate_local_doc) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +new_doc({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}} + ]). + + +replicate_new_doc({Db, _}) -> + check(Db, [ + {replicate, #{tgt => rev1}} + ]). + + +edit_doc({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}} + ]). + + +delete_doc({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {delete, #{src => rev1, tgt => rev2}} + ]). + + +create_conflict({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}} + ]). + + +replicate_new_winner({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2, depth => 3}} + ]). + + +replicate_deep_deleted({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1, depth => 2}}, + {replicate, #{tgt => rev2, depth => 5, deleted => true}} + ]). + + +delete_winning_revision({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {delete, #{src => {winner, [rev1, rev2]}, tgt => rev3}} + ]). + + +delete_conflict_revision({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {delete, #{src => {conflict, [rev1, rev2]}, tgt => rev3}} + ]). + + +replicate_existing_revision({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1}}, + {replicate, #{src => rev1, tgt => rev2, depth => 0}} + ]). + + +replicate_shared_history({Db, _}) -> + check(Db, [ + {create, #{tgt => rev1, depth => 5}}, + {update, #{src => rev1, tgt => rev2, depth => 5}}, + {replicate, #{ + src => rev1, + src_exists => false, + tgt => rev3, + depth => 5 + }} + ]). + + +create_doc_with_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}} + ]). + + +add_attachment_in_update({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2, atts => [att1]}} + ]). + + +add_second_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1]}}, + {update, #{src => rev1, tgt => rev2, atts => [att1, att2]}} + ]). + + +delete_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {update, #{src => rev1, tgt => rev2}} + ]). + + +delete_one_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {mk_att, #{tgt => att3, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1, att2]}}, + {update, #{src => rev1, tgt => rev2, atts => [att3]}} + ]). + + +delete_all_attachments({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1, att2]}}, + {update, #{src => rev1, tgt => rev2, atts => []}} + ]). + + +re_add_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {update, #{src => rev1, tgt => rev2}}, + {update, #{src => rev2, tgt => rev3, atts => [att1]}} + ]). + + +update_and_remove_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {mk_att, #{tgt => att3, stub => att1, revpos => 1}}, + {mk_att, #{tgt => att4}}, + {create, #{tgt => rev1, atts => [att1, att2]}}, + {update, #{src => rev1, tgt => rev2, atts => [att3, att4]}} + ]). + + +replicate_new_doc_with_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {replicate, #{tgt => rev1, atts => [att1]}} + ]). + + +replicate_remove_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{src => rev1, tgt => rev2}} + ]). + + +replicate_stub_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{src => rev1, tgt => rev2, atts => [att2]}} + ]). + + +replicate_stub_and_new_attachment({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {mk_att, #{tgt => att3}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{src => rev1, tgt => rev2, atts => [att2, att3]}} + ]). + + +replicate_new_att_to_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {replicate, #{ + src => {winner, [rev1, rev2]}, + tgt => rev3, + atts => [att1]} + } + ]). + + +replicate_change_att_to_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {winner, [rev1, rev2]}, + tgt => rev3, + atts => [att2]} + } + ]). + + +replicate_rem_att_from_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{src => {winner, [rev1, rev2]}, tgt => rev3}} + ]). + + +replicate_stub_to_winner({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {winner, [rev1, rev2]}, + tgt => rev3, + atts => [att2]}} + ]). + + +replicate_new_att_to_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1}}, + {replicate, #{tgt => rev2}}, + {replicate, #{ + src => {conflict, [rev1, rev2]}, + tgt => rev3, + atts => [att1]} + } + ]). + + +replicate_change_att_to_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {conflict, [rev1, rev2]}, + tgt => rev3, + atts => [att2]} + } + ]). + + +replicate_rem_att_from_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{src => {conflict, [rev1, rev2]}, tgt => rev3}} + ]). + + +replicate_stub_to_conflict({Db, _}) -> + check(Db, [ + {mk_att, #{tgt => att1}}, + {mk_att, #{tgt => att2, stub => att1, revpos => 1}}, + {create, #{tgt => rev1, atts => [att1]}}, + {replicate, #{tgt => rev2, atts => [att1]}}, + {replicate, #{ + src => {conflict, [rev1, rev2]}, + tgt => rev3, + atts => [att2]}} + ]). + + +create_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}} + ]). + + +update_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}} + ]). + + +delete_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}}, + {delete, #{src => rev2, tgt => rev3}} + ]). + + +recreate_local_doc({Db, _}) -> + check(Db, #{local => true}, [ + {create, #{tgt => rev1}}, + {update, #{src => rev1, tgt => rev2}}, + {delete, #{src => rev2, tgt => rev3}}, + {create, #{tgt => rev4}} + ]). + + +check(Db, Actions) -> + check(Db, #{}, Actions). + + +check(Db, CheckOpts, Actions) -> + DocId = case maps:get(local, CheckOpts, false) of + true -> + Base = couch_uuids:random(), + <<"_local/", Base/binary>>; + false -> + couch_uuids:random() + end, + InitSt = #{ + doc_id => DocId, + revs => #{}, + atts => #{}, + size => db_size(Db) + }, + lists:foldl(fun({Action, Opts}, StAcc) -> + case Action of + create -> create_doc(Db, Opts, StAcc); + update -> update_doc(Db, Opts, StAcc); + delete -> delete_doc(Db, Opts, StAcc); + replicate -> replicate_doc(Db, Opts, StAcc); + mk_att -> make_attachment(Opts, StAcc); + log_state -> log_state(Opts, StAcc) + end + end, InitSt, Actions). + + +create_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := InitDbSize + } = St, + + ?assert(maps:is_key(tgt, Opts)), + + Tgt = maps:get(tgt, Opts), + AttKeys = maps:get(atts, Opts, []), + Depth = maps:get(depth, Opts, 1), + + ?assert(not maps:is_key(Tgt, Revs)), + lists:foreach(fun(AttKey) -> + ?assert(maps:is_key(AttKey, Atts)) + end, AttKeys), + ?assert(Depth >= 1), + + AttRecords = lists:map(fun(AttKey) -> + maps:get(AttKey, Atts) + end, AttKeys), + + InitDoc = #doc{id = DocId}, + FinalDoc = lists:foldl(fun(Iter, Doc0) -> + #doc{ + revs = {_OldStart, OldRevs} + } = Doc1 = randomize_doc(Doc0), + Doc2 = if Iter < Depth -> Doc1; true -> + Doc1#doc{atts = AttRecords} + end, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc2), + Doc2#doc{revs = {Pos, [Rev | OldRevs]}} + end, InitDoc, lists:seq(1, Depth)), + + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + ?assertEqual(FinalDbSize - InitDbSize, FinalDocSize), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +update_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := InitDbSize + } = St, + + IsLocal = case DocId of + <<"_local/", _/binary>> -> true; + _ -> false + end, + + ?assert(maps:is_key(src, Opts)), + ?assert(maps:is_key(tgt, Opts)), + + Src = pick_rev(Revs, maps:get(src, Opts)), + Tgt = maps:get(tgt, Opts), + AttKeys = maps:get(atts, Opts, []), + Depth = maps:get(depth, Opts, 1), + + ?assert(maps:is_key(Src, Revs)), + ?assert(not maps:is_key(Tgt, Revs)), + lists:foreach(fun(AttKey) -> + ?assert(maps:is_key(AttKey, Atts)) + end, AttKeys), + ?assert(Depth >= 1), + + AttRecords = lists:map(fun(AttKey) -> + maps:get(AttKey, Atts) + end, AttKeys), + + InitDoc = maps:get(Src, Revs), + FinalDoc = lists:foldl(fun(Iter, Doc0) -> + #doc{ + revs = {_OldStart, OldRevs} + } = Doc1 = randomize_doc(Doc0), + Doc2 = if Iter < Depth -> Doc1; true -> + Doc1#doc{atts = AttRecords} + end, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc2), + case IsLocal of + true -> Doc2#doc{revs = {Pos, [Rev]}}; + false -> Doc2#doc{revs = {Pos, [Rev | OldRevs]}} + end + end, InitDoc, lists:seq(1, Depth)), + + InitDocSize = doc_size(InitDoc), + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + ?assertEqual(FinalDbSize - InitDbSize, FinalDocSize - InitDocSize), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +delete_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + size := InitDbSize + } = St, + + IsLocal = case DocId of + <<"_local/", _/binary>> -> true; + _ -> false + end, + + ?assert(maps:is_key(src, Opts)), + ?assert(maps:is_key(tgt, Opts)), + + Src = pick_rev(Revs, maps:get(src, Opts)), + Tgt = maps:get(tgt, Opts), + + ?assert(maps:is_key(Src, Revs)), + ?assert(not maps:is_key(Tgt, Revs)), + + InitDoc = maps:get(Src, Revs), + #doc{ + revs = {_OldStart, OldRevs} + } = UpdateDoc = randomize_deleted_doc(InitDoc), + + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, UpdateDoc), + + FinalDoc = case IsLocal of + true -> UpdateDoc#doc{revs = {Pos, [Rev]}}; + false -> UpdateDoc#doc{revs = {Pos, [Rev | OldRevs]}} + end, + + InitDocSize = doc_size(InitDoc), + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + ?assertEqual(FinalDbSize - InitDbSize, FinalDocSize - InitDocSize), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +replicate_doc(Db, Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := InitDbSize + } = St, + + ?assert(maps:is_key(tgt, Opts)), + + Src = pick_rev(Revs, maps:get(src, Opts, undefined)), + SrcExists = maps:get(src_exists, Opts, true), + Tgt = maps:get(tgt, Opts), + Deleted = maps:get(deleted, Opts, false), + AttKeys = maps:get(atts, Opts, []), + Depth = maps:get(depth, Opts, 1), + + if Src == undefined -> ok; true -> + ?assert(maps:is_key(Src, Revs)) + end, + ?assert(not maps:is_key(Tgt, Revs)), + ?assert(is_boolean(Deleted)), + lists:foreach(fun(AttKey) -> + ?assert(maps:is_key(AttKey, Atts)) + end, AttKeys), + ?assert(Depth >= 0), + + if Depth > 0 -> ok; true -> + ?assert(length(AttKeys) == 0) + end, + + InitDoc = maps:get(Src, Revs, #doc{id = DocId}), + NewRevsDoc = lists:foldl(fun(_, Doc0) -> + #doc{ + revs = {RevStart, RevIds} + } = Doc0, + NewRev = crypto:strong_rand_bytes(16), + Doc0#doc{ + revs = {RevStart + 1, [NewRev | RevIds]} + } + end, InitDoc, lists:seq(1, Depth)), + + FinalDoc = if NewRevsDoc == InitDoc -> NewRevsDoc; true -> + UpdateDoc = case Deleted of + true -> randomize_deleted_doc(NewRevsDoc); + false -> randomize_doc(NewRevsDoc) + end, + #doc{ + revs = {RevPos, _} + } = UpdateDoc, + AttRecords = lists:map(fun(AttKey) -> + BaseAtt = maps:get(AttKey, Atts), + case couch_att:fetch(data, BaseAtt) of + stub -> BaseAtt; + <<_/binary>> -> couch_att:store(revpos, RevPos, BaseAtt) + end + end, AttKeys), + UpdateDoc#doc{atts = AttRecords} + end, + + try + {ok, _} = fabric2_db:update_doc(Db, FinalDoc, [replicated_changes]) + catch throw:{missing_stub, _} -> + log_state(#{}, St), + ?debugFmt("Replicated: ~p~n", [FinalDoc]), + ?assert(false) + end, + + InitDocSize = doc_size(InitDoc), + FinalDocSize = doc_size(FinalDoc), + FinalDbSize = db_size(Db), + + SizeChange = case {Src, SrcExists} of + {undefined, _} -> FinalDocSize; + {_, false} -> FinalDocSize; + {_, _} -> FinalDocSize - InitDocSize + end, + ?assertEqual(FinalDbSize - InitDbSize, SizeChange), + + store_rev(Db, St, FinalDbSize, Tgt, FinalDoc). + + +make_attachment(Opts, St) -> + #{ + atts := Atts + } = St, + + ?assert(maps:is_key(tgt, Opts)), + + Tgt = maps:get(tgt, Opts), + Stub = maps:get(stub, Opts, undefined), + RevPos = maps:get(revpos, Opts, undefined), + NameRaw = maps:get(name, Opts, undefined), + + ?assert(not maps:is_key(Tgt, Atts)), + if Stub == undefined -> ok; true -> + ?assert(maps:is_key(Stub, Atts)) + end, + ?assert(RevPos == undefined orelse RevPos >= 0), + + Name = if + NameRaw == undefined -> undefined; + is_atom(NameRaw) -> atom_to_binary(NameRaw, utf8); + is_binary(NameRaw) -> NameRaw; + is_list(NameRaw) -> list_to_binary(NameRaw) + end, + + Att0 = case Stub of + undefined -> + random_attachment(Name); + _ -> + SrcAtt = maps:get(Stub, Atts), + couch_att:store(data, stub, SrcAtt) + end, + Att1 = if RevPos == undefined -> Att0; true -> + couch_att:store(revpos, RevPos, Att0) + end, + + St#{atts := maps:put(Tgt, Att1, Atts)}. + + +log_state(_Opts, St) -> + #{ + doc_id := DocId, + revs := Revs, + atts := Atts, + size := DbSize + } = St, + + ?debugFmt("~nDocId: ~p~n", [DocId]), + ?debugFmt("Db Size: ~p~n~n", [DbSize]), + + RevKeys = maps:keys(Revs), + lists:foreach(fun(RevKey) -> + #doc{ + id = RevDocId, + revs = {Pos, [Rev | RestRevs]}, + body = Body, + deleted = Deleted, + atts = DocAtts, + meta = Meta + } = Doc = maps:get(RevKey, Revs), + ?debugFmt("Doc: ~p (~p)~n", [RevKey, doc_size(Doc)]), + ?debugFmt("Id: ~p~n", [RevDocId]), + ?debugFmt("Rev: ~p ~w~n", [Pos, Rev]), + lists:foreach(fun(R) -> + ?debugFmt(" ~p~n", [R]) + end, RestRevs), + ?debugFmt("Deleted: ~p~n", [Deleted]), + ?debugFmt("Atts:~n", []), + lists:foreach(fun(Att) -> + ?debugFmt(" ~p~n", [Att]) + end, DocAtts), + ?debugFmt("Body: ~p~n", [Body]), + ?debugFmt("Meta: ~p~n", [Meta]), + ?debugFmt("~n", []) + end, lists:sort(RevKeys)), + + AttKeys = maps:keys(Atts), + ?debugFmt("~n~nAtts:~n", []), + lists:foreach(fun(AttKey) -> + Att = maps:get(AttKey, Atts), + ?debugFmt("Att: ~p (~p)~n", [AttKey, couch_att:external_size(Att)]), + ?debugFmt(" ~p~n", [Att]) + end, lists:sort(AttKeys)), + + St. + + +pick_rev(_Revs, Rev) when is_atom(Rev) -> + Rev; +pick_rev(Revs, {Op, RevList}) when Op == winner; Op == conflict -> + ChooseFrom = lists:map(fun(Rev) -> + #doc{ + revs = {S, [R | _]}, + deleted = Deleted + } = maps:get(Rev, Revs), + #{ + deleted => Deleted, + rev_id => {S, R}, + name => Rev + } + end, RevList), + Sorted = fabric2_util:sort_revinfos(ChooseFrom), + RetRev = case Op of + winner -> hd(Sorted); + conflict -> choose(tl(Sorted)) + end, + maps:get(name, RetRev). + + +store_rev(Db, St, DbSize, Tgt, #doc{id = <<"_local/", _/binary>>} = Doc) -> + DbDoc = case fabric2_db:open_doc(Db, Doc#doc.id) of + {ok, Found} -> Found; + {not_found, _} -> not_found + end, + store_rev(St, DbSize, Tgt, DbDoc); + +store_rev(Db, St, DbSize, Tgt, #doc{} = Doc) -> + #doc{ + id = DocId, + revs = {Pos, [Rev | _]} + } = Doc, + RevId = {Pos, Rev}, + {ok, [{ok, DbDoc}]} = fabric2_db:open_doc_revs(Db, DocId, [RevId], []), + store_rev(St, DbSize, Tgt, DbDoc). + + +store_rev(St, DbSize, Tgt, Doc) -> + #{ + revs := Revs + } = St, + ?assert(not maps:is_key(Tgt, Revs)), + St#{ + revs := maps:put(Tgt, Doc, Revs), + size := DbSize + }. + + +randomize_doc(#doc{} = Doc) -> + Doc#doc{ + deleted = false, + body = random_body() + }. + + +randomize_deleted_doc(Doc) -> + NewDoc = case rand:uniform() < 0.05 of + true -> randomize_doc(Doc); + false -> Doc#doc{body = {[]}} + end, + NewDoc#doc{deleted = true}. + + +db_size(Info) when is_list(Info) -> + {sizes, {Sizes}} = lists:keyfind(sizes, 1, Info), + {<<"external">>, External} = lists:keyfind(<<"external">>, 1, Sizes), + External; +db_size(Db) when is_map(Db) -> + {ok, Info} = fabric2_db:get_db_info(Db), + db_size(Info). + + +doc_size(#doc{id = <<"_local/", _/binary>>} = Doc) -> + fabric2_util:ldoc_size(Doc); +doc_size(#doc{} = Doc) -> + fabric2_util:rev_size(Doc). + + +-define(MAX_JSON_ELEMENTS, 5). +-define(MAX_STRING_LEN, 10). +-define(MAX_INT, 4294967296). + + +random_body() -> + Elems = rand:uniform(?MAX_JSON_ELEMENTS), + {Obj, _} = random_json_object(Elems), + Obj. + + +random_json(MaxElems) -> + case choose([object, array, terminal]) of + object -> random_json_object(MaxElems); + array -> random_json_array(MaxElems); + terminal -> {random_json_terminal(), MaxElems} + end. + + +random_json_object(MaxElems) -> + NumKeys = rand:uniform(MaxElems + 1) - 1, + {Props, RemElems} = lists:mapfoldl(fun(_, Acc1) -> + {Value, Acc2} = random_json(Acc1), + {{random_json_string(), Value}, Acc2} + end, MaxElems - NumKeys, lists:seq(1, NumKeys)), + {{Props}, RemElems}. + + +random_json_array(MaxElems) -> + NumItems = rand:uniform(MaxElems + 1) - 1, + lists:mapfoldl(fun(_, Acc1) -> + random_json(Acc1) + end, MaxElems - NumItems, lists:seq(1, NumItems)). + + +random_json_terminal() -> + case choose([null, true, false, number, string]) of + null -> null; + true -> true; + false -> false; + number -> random_json_number(); + string -> random_json_string() + end. + + +random_json_number() -> + AbsValue = case choose([integer, double]) of + integer -> rand:uniform(?MAX_INT); + double -> rand:uniform() * rand:uniform() + end, + case choose([pos, neg]) of + pos -> AbsValue; + neg -> -1 * AbsValue + end. + + +random_json_string() -> + random_string(0, ?MAX_STRING_LEN). + + +random_attachment(undefined) -> + random_attachment(random_string(1, 32)); + +random_attachment(Name) when is_binary(Name) -> + Type = random_string(1, 32), + Data = random_string(1, 512), + Md5 = erlang:md5(Data), + couch_att:new([ + {name, Name}, + {type, Type}, + {att_len, size(Data)}, + {data, Data}, + {encoding, identity}, + {md5, Md5} + ]). + + +random_string(MinLen, MaxLen) -> + Alphabet = [ + $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, + $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, + $A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, + $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $Y, $X, $Z, + $1, $2, $3, $4, $5, $6, $7, $8, $9, $0, + $!, $@, $#, $$, $%, $^, $&, $*, $(, $), + $ , ${, $}, $[, $], $", $', $-, $_, $+, $=, $,, $., + $\x{1}, $\x{a2}, $\x{20ac}, $\x{10348} + ], + Len = MinLen + rand:uniform(MaxLen - MinLen) - 1, + Str = lists:map(fun(_) -> + choose(Alphabet) + end, lists:seq(1, Len)), + unicode:characters_to_binary(Str). + + +choose(Options) -> + Pos = rand:uniform(length(Options)), + lists:nth(Pos, Options). diff --git a/src/fabric/test/fabric2_dir_prefix_tests.erl b/src/fabric/test/fabric2_dir_prefix_tests.erl new file mode 100644 index 000000000..2943d6533 --- /dev/null +++ b/src/fabric/test/fabric2_dir_prefix_tests.erl @@ -0,0 +1,71 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_dir_prefix_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +dir_prefix_test_() -> + { + "Test couchdb fdb directory prefix", + setup, + fun() -> + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3, ctrace, fabric]) + end, + fun(Ctx) -> + config:delete("fabric", "fdb_directory"), + test_util:stop_couch(Ctx) + end, + with([ + ?TDEF(default_prefix, 15), + ?TDEF(custom_prefix, 15) + ]) + }. + + +default_prefix(_) -> + erase(fdb_directory), + ok = config:delete("fabric", "fdb_directory", false), + ok = application:stop(fabric), + ok = application:start(fabric), + + ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), + + % Try again to test pdict caching code + ?assertEqual([<<"couchdb">>], fabric2_server:fdb_directory()), + + % Check that we can create dbs + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). + + +custom_prefix(_) -> + erase(fdb_directory), + ok = config:set("fabric", "fdb_directory", "couchdb_foo", false), + ok = application:stop(fabric), + ok = application:start(fabric), + + ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), + + % Try again to test pdict caching code + ?assertEqual([<<"couchdb_foo">>], fabric2_server:fdb_directory()), + + % Check that we can create dbs + DbName = ?tempdb(), + ?assertMatch({ok, _}, fabric2_db:create(DbName, [])). diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl new file mode 100644 index 000000000..5d28b6da0 --- /dev/null +++ b/src/fabric/test/fabric2_doc_att_tests.erl @@ -0,0 +1,331 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_att_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2.hrl"). +-include("fabric2_test.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(create_att), + ?TDEF(create_att_already_compressed), + ?TDEF(delete_att), + ?TDEF(multiple_atts), + ?TDEF(delete_one_att), + ?TDEF(large_att), + ?TDEF(att_on_conflict_isolation) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +create_att({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, identity}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + AttData = fabric2_db:read_attachment(Db, DocId, AttId), + ?assertEqual(<<"foobar">>, AttData), + + % Check that the raw keys exist + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(erlfdb_tuple:pack({0, true}), IdVal), + Opts = [{minor_version, 1}, {compressed, 6}], + Expect = term_to_binary(<<"foobar">>, Opts), + ?assertMatch([{_, Expect}], AttVals) + end). + + +create_att_already_compressed({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, gzip}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + AttData = fabric2_db:read_attachment(Db, DocId, AttId), + ?assertEqual(<<"foobar">>, AttData), + + % Check that the raw keys exist + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(erlfdb_tuple:pack({0, false}), IdVal), + ?assertMatch([{_, <<"foobar">>}], AttVals) + end). + + +delete_att({Db, _}) -> + DocId = fabric2_util:uuid(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, identity}, + {md5, <<>>} + ]), + Doc1 = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + {ok, Doc2} = fabric2_db:open_doc(Db, DocId), + #doc{ + atts = [Att2] + } = Doc2, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + + Doc3 = Doc2#doc{atts = []}, + {ok, _} = fabric2_db:update_doc(Db, Doc3), + + {ok, Doc4} = fabric2_db:open_doc(Db, DocId), + ?assertEqual([], Doc4#doc.atts), + + % Check that the raw keys were removed + #{ + db_prefix := DbPrefix + } = Db, + IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix), + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + + fabric2_fdb:transactional(fun(Tx) -> + IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)), + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + + ?assertEqual(not_found, IdVal), + ?assertMatch([], AttVals) + end). + + +multiple_atts({Db, _}) -> + DocId = fabric2_util:uuid(), + Atts = [ + mk_att(<<"foo.txt">>, <<"foobar">>), + mk_att(<<"bar.txt">>, <<"barfoo">>), + mk_att(<<"baz.png">>, <<"blargh">>) + ], + {ok, _} = create_doc(Db, DocId, Atts), + ?assertEqual( + #{ + <<"foo.txt">> => <<"foobar">>, + <<"bar.txt">> => <<"barfoo">>, + <<"baz.png">> => <<"blargh">> + }, + read_atts(Db, DocId) + ). + + +delete_one_att({Db, _}) -> + DocId = fabric2_util:uuid(), + Atts1 = [ + mk_att(<<"foo.txt">>, <<"foobar">>), + mk_att(<<"bar.txt">>, <<"barfoo">>), + mk_att(<<"baz.png">>, <<"blargh">>) + ], + {ok, RevId} = create_doc(Db, DocId, Atts1), + Atts2 = tl(Atts1), + {ok, _} = update_doc(Db, DocId, RevId, stubify(RevId, Atts2)), + ?assertEqual( + #{ + <<"bar.txt">> => <<"barfoo">>, + <<"baz.png">> => <<"blargh">> + }, + read_atts(Db, DocId) + ). + + +large_att({Db, _}) -> + DocId = fabric2_util:uuid(), + % Total size ~360,000 bytes + AttData = iolist_to_binary([ + <<"foobar">> || _ <- lists:seq(1, 60000) + ]), + Att1 = mk_att(<<"long.txt">>, AttData, gzip), + {ok, _} = create_doc(Db, DocId, [Att1]), + ?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)), + + {ok, Doc} = fabric2_db:open_doc(Db, DocId), + #doc{atts = [Att2]} = Doc, + {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2), + + #{db_prefix := DbPrefix} = Db, + AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix), + fabric2_fdb:transactional(fun(Tx) -> + AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)), + ?assertEqual(4, length(AttVals)) + end). + + +att_on_conflict_isolation({Db, _}) -> + DocId = fabric2_util:uuid(), + [PosRevA1, PosRevB1] = create_conflicts(Db, DocId, []), + Att = mk_att(<<"happy_goat.tiff">>, <<":D>">>), + {ok, PosRevA2} = update_doc(Db, DocId, PosRevA1, [Att]), + ?assertEqual( + #{<<"happy_goat.tiff">> => <<":D>">>}, + read_atts(Db, DocId, PosRevA2) + ), + ?assertEqual(#{}, read_atts(Db, DocId, PosRevB1)). + + +mk_att(Name, Data) -> + mk_att(Name, Data, identity). + + +mk_att(Name, Data, Encoding) -> + couch_att:new([ + {name, Name}, + {type, <<"application/octet-stream">>}, + {att_len, size(Data)}, + {data, Data}, + {encoding, Encoding}, + {md5, <<>>} + ]). + + +stubify(RevId, Atts) when is_list(Atts) -> + lists:map(fun(Att) -> + stubify(RevId, Att) + end, Atts); + +stubify({Pos, _Rev}, Att) -> + couch_att:store([ + {data, stub}, + {revpos, Pos} + ], Att). + + +create_doc(Db, DocId, Atts) -> + Doc = #doc{ + id = DocId, + atts = Atts + }, + fabric2_db:update_doc(Db, Doc). + + +update_doc(Db, DocId, {Pos, Rev}, Atts) -> + Doc = #doc{ + id = DocId, + revs = {Pos, [Rev]}, + atts = Atts + }, + fabric2_db:update_doc(Db, Doc). + + +create_conflicts(Db, DocId, Atts) -> + Base = #doc{ + id = DocId, + atts = Atts + }, + {ok, {_, Rev1} = PosRev} = fabric2_db:update_doc(Db, Base), + <<Rev2:16/binary, Rev3:16/binary>> = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev2, Rev1]}, + atts = stubify(PosRev, Atts) + }, + Doc2 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + atts = stubify(PosRev, Atts) + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + {ok, _} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + lists:reverse(lists:sort([{2, Rev2}, {2, Rev3}])). + + +read_atts(Db, DocId) -> + {ok, #doc{atts = Atts}} = fabric2_db:open_doc(Db, DocId), + atts_to_map(Db, DocId, Atts). + + +read_atts(Db, DocId, PosRev) -> + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, [PosRev], []), + [{ok, #doc{atts = Atts}}] = Docs, + atts_to_map(Db, DocId, Atts). + + +atts_to_map(Db, DocId, Atts) -> + lists:foldl(fun(Att, Acc) -> + [Name, Data] = couch_att:fetch([name, data], Att), + {loc, _Db, DocId, AttId} = Data, + AttBin = fabric2_db:read_attachment(Db, DocId, AttId), + maps:put(Name, AttBin, Acc) + end, #{}, Atts). diff --git a/src/fabric/test/fabric2_doc_count_tests.erl b/src/fabric/test/fabric2_doc_count_tests.erl new file mode 100644 index 000000000..7aaf288f4 --- /dev/null +++ b/src/fabric/test/fabric2_doc_count_tests.erl @@ -0,0 +1,278 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_count_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +-define(DOC_COUNT, 10). + + +doc_count_test_() -> + { + "Test document counting operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(normal_docs), + ?TDEF(replicated_docs), + ?TDEF(design_docs), + ?TDEF(local_docs) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +normal_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Docs1 = lists:map(fun(Id) -> + Doc = #doc{ + id = integer_to_binary(Id), + body = {[{<<"value">>, Id}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + Doc#doc{revs = {RevPos, [Rev]}} + end, lists:seq(1, ?DOC_COUNT)), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount, + LDocCount + ), + + Docs2 = lists:map(fun(Doc) -> + {[{<<"value">>, V}]} = Doc#doc.body, + NewDoc = case V rem 2 of + 0 -> Doc#doc{deleted = true}; + 1 -> Doc + end, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, NewDoc, []), + NewDoc#doc{revs = {RevPos, [Rev]}} + end, Docs1), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT div 2, + DelDocCount + ?DOC_COUNT div 2, + DDocCount, + LDocCount + ), + + lists:map(fun(Doc) -> + case Doc#doc.deleted of + true -> + Undeleted = Doc#doc{ + revs = {0, []}, + deleted = false + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Undeleted, []), + Undeleted#doc{revs = {RevPos, [Rev]}}; + false -> + Doc + end + end, Docs2), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount, + LDocCount + ). + + +replicated_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Opts = [replicated_changes], + {R1, R2, R3} = {<<"r1">>, <<"r2">>, <<"r3">>}, + + % First case is a simple replicated update + Doc1 = #doc{id = <<"rd1">>, revs = {1, [R1]}}, + {ok, {1, R1}} = fabric2_db:update_doc(Db, Doc1, Opts), + check_doc_counts(Db, DocCount + 1, DelDocCount, DDocCount, LDocCount), + + % Here a deleted document is replicated into the db. Doc count should not + % change, only deleted doc count. + Doc2 = #doc{id = <<"rd2">>, revs = {1, [R2]}, deleted = true}, + {ok, {1, R2}} = fabric2_db:update_doc(Db, Doc2, Opts), + check_doc_counts(Db, DocCount + 1, DelDocCount + 1, DDocCount, LDocCount), + + % Here we extended the deleted document's rev path but keep it deleted. + % Deleted doc count doesn't bumped since the document was already counted + % as deleted + Doc3 = #doc{id = <<"rd2">>, revs = {2, [R3, R2]}, deleted = true}, + {ok, {2, R3}} = fabric2_db:update_doc(Db, Doc3, Opts), + check_doc_counts(Db, DocCount + 1, DelDocCount + 1 , DDocCount, LDocCount). + + +design_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Docs1 = lists:map(fun(Id) -> + BinId = integer_to_binary(Id), + DDocId = <<?DESIGN_DOC_PREFIX, BinId/binary>>, + Doc = #doc{ + id = DDocId, + body = {[{<<"value">>, Id}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + Doc#doc{revs = {RevPos, [Rev]}} + end, lists:seq(1, ?DOC_COUNT)), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount + ?DOC_COUNT, + LDocCount + ), + + Docs2 = lists:map(fun(Doc) -> + {[{<<"value">>, V}]} = Doc#doc.body, + NewDoc = case V rem 2 of + 0 -> Doc#doc{deleted = true}; + 1 -> Doc + end, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, NewDoc, []), + NewDoc#doc{revs = {RevPos, [Rev]}} + end, Docs1), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT div 2, + DelDocCount + ?DOC_COUNT div 2, + DDocCount + ?DOC_COUNT div 2, + LDocCount + ), + + lists:map(fun(Doc) -> + case Doc#doc.deleted of + true -> + Undeleted = Doc#doc{ + revs = {0, []}, + deleted = false + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Undeleted, []), + Undeleted#doc{revs = {RevPos, [Rev]}}; + false -> + Doc + end + end, Docs2), + + check_doc_counts( + Db, + DocCount + ?DOC_COUNT, + DelDocCount, + DDocCount + ?DOC_COUNT, + LDocCount + ). + + +local_docs({Db, _}) -> + {DocCount, DelDocCount, DDocCount, LDocCount} = get_doc_counts(Db), + + Docs1 = lists:map(fun(Id) -> + BinId = integer_to_binary(Id), + LDocId = <<?LOCAL_DOC_PREFIX, BinId/binary>>, + Doc = #doc{ + id = LDocId, + body = {[{<<"value">>, Id}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + Doc#doc{revs = {RevPos, [Rev]}} + end, lists:seq(1, ?DOC_COUNT)), + + check_doc_counts( + Db, + DocCount, + DelDocCount, + DDocCount, + LDocCount + ?DOC_COUNT + ), + + Docs2 = lists:map(fun(Doc) -> + {[{<<"value">>, V}]} = Doc#doc.body, + NewDoc = case V rem 2 of + 0 -> Doc#doc{deleted = true}; + 1 -> Doc + end, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, NewDoc, []), + NewDoc#doc{revs = {RevPos, [Rev]}} + end, Docs1), + + check_doc_counts( + Db, + DocCount, + DelDocCount, + DDocCount, + LDocCount + ?DOC_COUNT div 2 + ), + + lists:map(fun(Doc) -> + case Doc#doc.deleted of + true -> + Undeleted = Doc#doc{ + revs = {0, []}, + deleted = false + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Undeleted, []), + Undeleted#doc{revs = {RevPos, [Rev]}}; + false -> + Doc + end + end, Docs2), + + check_doc_counts( + Db, + DocCount, + DelDocCount, + DDocCount, + LDocCount + ?DOC_COUNT + ). + + +get_doc_counts(Db) -> + DocCount = fabric2_db:get_doc_count(Db), + DelDocCount = fabric2_db:get_del_doc_count(Db), + DDocCount = fabric2_db:get_doc_count(Db, <<"_design">>), + LDocCount = fabric2_db:get_doc_count(Db, <<"_local">>), + {DocCount, DelDocCount, DDocCount, LDocCount}. + + +check_doc_counts(Db, DocCount, DelDocCount, DDocCount, LDocCount) -> + ?assertEqual(DocCount, fabric2_db:get_doc_count(Db)), + ?assertEqual(DelDocCount, fabric2_db:get_del_doc_count(Db)), + ?assertEqual(DocCount, fabric2_db:get_doc_count(Db, <<"_all_docs">>)), + ?assertEqual(DDocCount, fabric2_db:get_doc_count(Db, <<"_design">>)), + ?assertEqual(LDocCount, fabric2_db:get_doc_count(Db, <<"_local">>)). diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl new file mode 100644 index 000000000..7a24b7d52 --- /dev/null +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -0,0 +1,1018 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_crud_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2.hrl"). +-include("fabric2_test.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(open_missing_doc), + ?TDEF(create_new_doc), + ?TDEF(create_ddoc_basic), + ?TDEF(create_ddoc_requires_admin), + ?TDEF(create_ddoc_requires_validation), + ?TDEF(create_ddoc_requires_compilation), + ?TDEF(can_create_a_partitioned_ddoc), + ?TDEF(update_doc_basic), + ?TDEF(update_ddoc_basic), + ?TDEF(update_doc_replicated), + ?TDEF(update_doc_replicated_add_conflict), + ?TDEF(update_doc_replicated_changes_winner), + ?TDEF(update_doc_replicated_extension), + ?TDEF(update_doc_replicate_existing_rev), + ?TDEF(update_winning_conflict_branch), + ?TDEF(update_non_winning_conflict_branch), + ?TDEF(delete_doc_basic), + ?TDEF(delete_changes_winner), + ?TDEF(recreate_doc_basic), + ?TDEF(conflict_on_create_new_with_rev), + ?TDEF(conflict_on_update_with_no_rev), + ?TDEF(allow_create_new_as_deleted), + ?TDEF(conflict_on_recreate_as_deleted), + ?TDEF(conflict_on_extend_deleted), + ?TDEF(open_doc_revs_basic), + ?TDEF(open_doc_revs_all), + ?TDEF(open_doc_revs_latest), + ?TDEF(get_missing_revs_basic), + ?TDEF(get_missing_revs_on_missing_doc), + ?TDEF(open_missing_local_doc), + ?TDEF(create_local_doc_basic), + ?TDEF(update_local_doc_basic), + ?TDEF(delete_local_doc_basic), + ?TDEF(recreate_local_doc), + ?TDEF(create_local_doc_bad_rev), + ?TDEF(create_local_doc_random_rev), + ?TDEF(create_a_large_local_doc), + ?TDEF(create_2_large_local_docs), + ?TDEF(local_doc_with_previous_encoding), + ?TDEF(before_doc_update_skips_local_docs), + ?TDEF(open_doc_opts) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +open_missing_doc({Db, _}) -> + ?assertEqual({not_found, missing}, fabric2_db:open_doc(Db, <<"foo">>)). + + +create_new_doc({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc), + NewDoc = Doc#doc{revs = {RevPos, [Rev]}}, + ?assertEqual({ok, NewDoc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +create_ddoc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {RevPos, Rev}} = fabric2_db:update_doc(Db, Doc), + NewDoc = Doc#doc{revs = {RevPos, [Rev]}}, + ?assertEqual({ok, NewDoc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +can_create_a_partitioned_ddoc({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[ + {<<"options">>, {[{<<"partitioned">>, true}]}}, + {<<"views">>, {[ + {<<"foo">>, {[ + {<<"map">>, <<"function(doc) {}">>} + ]}} + ]}} + ]} + }, + ?assertMatch({ok, {_, _}}, fabric2_db:update_doc(Db, Doc)). + + +create_ddoc_requires_admin({Db, _}) -> + Db2 = fabric2_db:set_user_ctx(Db, #user_ctx{}), + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[{<<"foo">>, <<"bar">>}]} + }, + ?assertThrow({unauthorized, _}, fabric2_db:update_doc(Db2, Doc)). + + +create_ddoc_requires_validation({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[ + {<<"views">>, {[ + {<<"foo">>, {[ + {<<"map">>, <<"function(doc) {}">>}, + {<<"reduce">>, <<"_not_a_builtin_reduce">>} + ]}} + ]}} + ]} + }, + ?assertThrow( + {bad_request, invalid_design_doc, _}, + fabric2_db:update_doc(Db, Doc) + ). + + +create_ddoc_requires_compilation({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc = #doc{ + id = DDocId, + body = {[ + {<<"language">>, <<"javascript">>}, + {<<"views">>, {[ + {<<"foo">>, {[ + {<<"map">>, <<"Hopefully this is invalid JavaScript">>} + ]}} + ]}} + ]} + }, + ?assertThrow( + {bad_request, compilation_error, _}, + fabric2_db:update_doc(Db, Doc) + ). + + +update_doc_basic({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{ + revs = {Pos2, [Rev2, Rev1]} + }, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_ddoc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + DDocId = <<"_design/", UUID/binary>>, + Doc1 = #doc{ + id = DDocId, + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{ + revs = {Pos2, [Rev2, Rev1]} + }, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_doc_replicated({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {2, [fabric2_util:uuid(), fabric2_util:uuid()]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc, [replicated_changes]), + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +update_doc_replicated_add_conflict({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc1#doc.id)), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_doc_replicated_changes_winner({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc1#doc.id)), + Doc2 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + ?assertEqual({ok, Doc2}, fabric2_db:open_doc(Db, Doc2#doc.id)). + + +update_doc_replicated_extension({Db, _}) -> + % No sort necessary and avoided on purpose to + % demonstrate that this is not sort dependent + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Rev3 = fabric2_util:uuid(), + Rev4 = fabric2_util:uuid(), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {4, [Rev4, Rev3, Rev2]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {4, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + {ok, Doc3} = fabric2_db:open_doc(Db, Doc2#doc.id), + ?assertEqual({4, [Rev4, Rev3, Rev2, Rev1]}, Doc3#doc.revs), + ?assertEqual(Doc2#doc{revs = undefined}, Doc3#doc{revs = undefined}). + + +update_doc_replicate_existing_rev({Db, _}) -> + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + {ok, []} = fabric2_db:update_docs(Db, [Doc1], [replicated_changes]), + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc1#doc.id)). + + +update_winning_conflict_branch({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev3, Rev1]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +update_non_winning_conflict_branch({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the non winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev2, Rev1]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +delete_doc_basic({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{revs = {Pos2, [Rev2, Rev1]}}, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id, [deleted])). + + +delete_changes_winner({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Delete the winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + deleted = true, + body = {[]} + }, + {ok, {3, _}} = fabric2_db:update_doc(Db, Doc3), + ?assertEqual({ok, Doc2}, fabric2_db:open_doc(Db, Doc3#doc.id)). + + +recreate_doc_basic({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {0, []}, + deleted = false, + body = {[{<<"state">>, 3}]} + }, + {ok, {3, Rev3}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual({3, [Rev3, Rev2, Rev1]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +conflict_on_create_new_with_rev({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {1, [fabric2_util:uuid()]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc)). + + +conflict_on_update_with_no_rev({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {0, []}, + body = {[{<<"state">>, 2}]} + }, + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc2)). + + +allow_create_new_as_deleted({Db, _}) -> + Doc = #doc{ + id = fabric2_util:uuid(), + deleted = true, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {1, Rev}} = fabric2_db:update_doc(Db, Doc), + ?assertEqual({not_found, deleted}, fabric2_db:open_doc(Db, Doc#doc.id)), + Doc1 = Doc#doc{ + revs = {1, [Rev]} + }, + ?assertEqual({ok, Doc1}, fabric2_db:open_doc(Db, Doc#doc.id, [deleted])), + % Only works when the document has never existed to match CouchDB 3.x + % behavior + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc)). + + +conflict_on_recreate_as_deleted({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {0, []}, + deleted = true, + body = {[{<<"state">>, 3}]} + }, + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc3)). + + +conflict_on_extend_deleted({Db, _}) -> + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {Pos2, [Rev2]}, + deleted = false, + body = {[{<<"state">>, 3}]} + }, + ?assertThrow(conflict, fabric2_db:update_doc(Db, Doc3)). + + +open_doc_revs_basic({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + {ok, [{ok, Doc3}]} = fabric2_db:open_doc_revs(Db, DocId, [{2, Rev3}], []), + ?assertEqual(Doc1, Doc3), + + {ok, [{ok, Doc4}]} = fabric2_db:open_doc_revs(Db, DocId, [{2, Rev2}], []), + ?assertEqual(Doc2, Doc4), + + Revs = [{2, Rev3}, {2, Rev2}, {1, Rev1}], + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, Revs, []), + ?assert(length(Docs) == 3), + ?assert(lists:member({ok, Doc1}, Docs)), + ?assert(lists:member({ok, Doc2}, Docs)), + ?assert(lists:member({{not_found, missing}, {1, Rev1}}, Docs)), + + % Make sure crazy madeup revisions are accepted + MissingRevs = [{5, fabric2_util:uuid()}, {1, fabric2_util:uuid()}], + {ok, NFMissing} = fabric2_db:open_doc_revs(Db, DocId, MissingRevs, []), + ?assertEqual(2, length(NFMissing)), + lists:foreach(fun(MR) -> + ?assert(lists:member({{not_found, missing}, MR}, NFMissing)) + end, MissingRevs). + + +open_doc_revs_all({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, all, []), + ?assert(length(Docs) == 2), + ?assert(lists:member({ok, Doc1}, Docs)), + ?assert(lists:member({ok, Doc2}, Docs)). + + +open_doc_revs_latest({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + Opts = [latest], + {ok, [{ok, Doc3}]} = fabric2_db:open_doc_revs(Db, DocId, [{2, Rev3}], Opts), + ?assertEqual(Doc1, Doc3), + + {ok, Docs} = fabric2_db:open_doc_revs(Db, DocId, [{1, Rev1}], Opts), + ?assert(length(Docs) == 2), + ?assert(lists:member({ok, Doc1}, Docs)), + ?assert(lists:member({ok, Doc2}, Docs)). + + +get_missing_revs_basic({Db, _}) -> + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + DocId = fabric2_util:uuid(), + Doc1 = #doc{ + id = DocId, + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + % Check that we can find all revisions + AllRevs = [{1, Rev1}, {2, Rev2}, {2, Rev3}], + ?assertEqual( + {ok, []}, + fabric2_db:get_missing_revs(Db, [{DocId, AllRevs}]) + ), + + % Check that a missing revision is found with no possible ancestors + MissingRev = {2, fabric2_util:uuid()}, + ?assertEqual( + {ok, [{DocId, [MissingRev], []}]}, + fabric2_db:get_missing_revs(Db, [{DocId, [MissingRev]}]) + ), + + % Check that only a missing rev is returned + ?assertEqual( + {ok, [{DocId, [MissingRev], []}]}, + fabric2_db:get_missing_revs(Db, [{DocId, [MissingRev | AllRevs]}]) + ), + + % Check that we can find possible ancestors + MissingWithAncestors = {4, fabric2_util:uuid()}, + PossibleAncestors = [{2, Rev2}, {2, Rev3}], + ?assertEqual( + {ok, [{DocId, [MissingWithAncestors], PossibleAncestors}]}, + fabric2_db:get_missing_revs(Db, [{DocId, [MissingWithAncestors]}]) + ). + + +get_missing_revs_on_missing_doc({Db, _}) -> + Revs = lists:sort([ + couch_doc:rev_to_str({1, fabric2_util:uuid()}), + couch_doc:rev_to_str({2, fabric2_util:uuid()}), + couch_doc:rev_to_str({800, fabric2_util:uuid()}) + ]), + DocId = fabric2_util:uuid(), + {ok, Resp} = fabric2_db:get_missing_revs(Db, [{DocId, Revs}]), + ?assertMatch([{DocId, [_ | _], []}], Resp), + [{DocId, Missing, _}] = Resp, + MissingStrs = [couch_doc:rev_to_str(Rev) || Rev <- Missing], + ?assertEqual(Revs, lists:sort(MissingStrs)). + + +open_missing_local_doc({Db, _}) -> + ?assertEqual( + {not_found, missing}, + fabric2_db:open_doc(Db, <<"_local/foo">>, []) + ). + + +create_local_doc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc2} = fabric2_db:open_doc(Db, Doc1#doc.id, []), + ?assertEqual(Doc1#doc{revs = {0, [<<"1">>]}}, Doc2). + + +update_local_doc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + Doc2 = Doc1#doc{ + revs = {0, [<<"1">>]}, + body = {[{<<"whiz">>, <<"bang">>}]} + }, + ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc2)), + {ok, Doc3} = fabric2_db:open_doc(Db, Doc1#doc.id, []), + ?assertEqual(Doc2#doc{revs = {0, [<<"2">>]}}, Doc3). + + +delete_local_doc_basic({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + Doc2 = Doc1#doc{ + revs = {0, [<<"1">>]}, + deleted = true, + body = {[]} + }, + ?assertEqual({ok, {0, <<"0">>}}, fabric2_db:update_doc(Db, Doc2)), + ?assertEqual( + {not_found, missing}, + fabric2_db:open_doc(Db, LDocId) + ). + + +recreate_local_doc({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + deleted = false, + body = {[{<<"ohai">>, <<"there">>}]} + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + Doc2 = Doc1#doc{ + revs = {0, [<<"1">>]}, + deleted = true, + body = {[]} + }, + ?assertEqual({ok, {0, <<"0">>}}, fabric2_db:update_doc(Db, Doc2)), + ?assertEqual( + {not_found, missing}, + fabric2_db:open_doc(Db, LDocId) + ), + + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc3} = fabric2_db:open_doc(Db, LDocId), + ?assertEqual(Doc1#doc{revs = {0, [<<"1">>]}}, Doc3). + + +create_local_doc_bad_rev({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Doc1 = #doc{ + id = LDocId, + revs = {0, [<<"not a number">>]} + }, + ?assertThrow(<<"Invalid rev format">>, fabric2_db:update_doc(Db, Doc1)), + + Doc2 = Doc1#doc{ + revs = bad_bad_rev_roy_brown + }, + ?assertThrow(<<"Invalid rev format">>, fabric2_db:update_doc(Db, Doc2)). + + +create_local_doc_random_rev({Db, _}) -> + % Local docs don't care what rev is passed as long + % as long as its a number. + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Doc1 = #doc{ + id = LDocId, + revs = {0, [<<"42">>]}, + body = {[{<<"state">>, 1}]} + }, + ?assertEqual({ok, {0, <<"43">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc2} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc1#doc{revs = {0, [<<"43">>]}}, Doc2), + + Doc3 = Doc1#doc{ + revs = {0, [<<"1234567890">>]}, + body = {[{<<"state">>, 2}]} + }, + ?assertEqual({ok, {0, <<"1234567891">>}}, fabric2_db:update_doc(Db, Doc3)), + {ok, Doc4} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc3#doc{revs = {0, [<<"1234567891">>]}}, Doc4), + + Doc5 = Doc1#doc{ + revs = {0, [<<"1">>]}, + body = {[{<<"state">>, 3}]} + }, + ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc5)), + {ok, Doc6} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc5#doc{revs = {0, [<<"2">>]}}, Doc6). + + +create_a_large_local_doc({Db, _}) -> + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Body = << <<"x">> || _ <- lists:seq(1, 300000) >>, + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + body = Body + }, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + {ok, Doc2} = fabric2_db:open_doc(Db, Doc1#doc.id, []), + ?assertEqual(Doc1#doc{revs = {0, [<<"1">>]}}, Doc2), + + % Read via fold_local_docs + {ok, Result} = fabric2_db:fold_local_docs(Db, fun(Data, Acc) -> + case Data of + {row, [{id, DocId} | _]} when LDocId =:= DocId -> + {ok, [Data | Acc]}; + _ -> + {ok, Acc} + end + end, [], []), + ?assertEqual([{row, [ + {id, LDocId}, + {key, LDocId}, + {value, {[{rev, <<"0-1">>}]}} + ]}], Result). + + +create_2_large_local_docs({Db, _}) -> + % Create a large doc then overwrite with a smaller one. The reason is to + % ensure the previous one correctly clears its range before writting the + % new smaller one it its place. + UUID = fabric2_util:uuid(), + LDocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + Body1 = << <<"x">> || _ <- lists:seq(1, 400000) >>, + Body2 = << <<"y">> || _ <- lists:seq(1, 150000) >>, + + Doc1 = #doc{ + id = LDocId, + revs = {0, []}, + body = Body1 + }, + + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc1)), + + Doc2 = Doc1#doc{body = Body2}, + ?assertEqual({ok, {0, <<"1">>}}, fabric2_db:update_doc(Db, Doc2)), + + {ok, Doc3} = fabric2_db:open_doc(Db, LDocId, []), + ?assertEqual(Doc2#doc{revs = {0, [<<"1">>]}}, Doc3). + + +local_doc_with_previous_encoding({Db, _}) -> + #{db_prefix := DbPrefix} = Db, + + Id = <<"_local/old_doc">>, + Body = {[{<<"x">>, 5}]}, + Rev = <<"1">>, + Key = erlfdb_tuple:pack({?DB_LOCAL_DOCS, Id}, DbPrefix), + + fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + Term = term_to_binary({Rev, Body}, [{minor_version, 1}]), + ok = erlfdb:set(Tx, Key, Term) + end), + + % Read old doc + {ok, Doc1} = fabric2_db:open_doc(Db, Id, []), + ?assertEqual({0, [<<"1">>]}, Doc1#doc.revs), + ?assertEqual({[{<<"x">>, 5}]}, Doc1#doc.body), + + % Read via fold_local_docs. + {ok, Result} = fabric2_db:fold_local_docs(Db, fun(Data, Acc) -> + case Data of + {row, [{id, DocId} | _]} when Id =:= DocId -> + {ok, [Data | Acc]}; + _ -> + {ok, Acc} + end + end, [], []), + ?assertEqual([{row, [ + {id, Id}, + {key, Id}, + {value, {[{rev, <<"0-1">>}]}} + ]}], Result), + + % Update doc + NewBody = {[{<<"y">>, 6}]}, + Doc2 = Doc1#doc{body = NewBody}, + ?assertEqual({ok, {0, <<"2">>}}, fabric2_db:update_doc(Db, Doc2)), + {ok, Doc3} = fabric2_db:open_doc(Db, Doc2#doc.id, []), + ?assertEqual({0, [<<"2">>]}, Doc3#doc.revs), + ?assertEqual(NewBody, Doc3#doc.body), + + % Old doc now has only the rev number in it + <<255, OldDocBin/binary>> = fabric2_fdb:transactional(Db, fun(TxDb) -> + #{tx := Tx} = TxDb, + erlfdb:wait(erlfdb:get(Tx, Key)) + end), + Unpacked = erlfdb_tuple:unpack(OldDocBin), + ?assertMatch({?CURR_LDOC_FORMAT, <<"2">>, _}, Unpacked). + + +before_doc_update_skips_local_docs({Db0, _}) -> + + BduFun = fun(Doc, _, _) -> + Doc#doc{body = {[<<"bdu_was_here">>, true]}} + end, + + Db = Db0#{before_doc_update := BduFun}, + + LDoc1 = #doc{id = <<"_local/ldoc1">>}, + Doc1 = #doc{id = <<"doc1">>}, + + ?assertMatch({ok, {_, _}}, fabric2_db:update_doc(Db, LDoc1)), + ?assertMatch({ok, {_, _}}, fabric2_db:update_doc(Db, Doc1)), + + {ok, LDoc2} = fabric2_db:open_doc(Db, LDoc1#doc.id), + {ok, Doc2} = fabric2_db:open_doc(Db, Doc1#doc.id), + + ?assertEqual({[]}, LDoc2#doc.body), + ?assertEqual({[<<"bdu_was_here">>, true]}, Doc2#doc.body). + + +open_doc_opts({Db, _}) -> + % Build out state so that we can exercise each doc + % open option. This requires a live revision with + % an attachment, a conflict, and a deleted conflict. + DocId = couch_uuids:random(), + Att1 = couch_att:new([ + {name, <<"foo.txt">>}, + {type, <<"application/octet-stream">>}, + {att_len, 6}, + {data, <<"foobar">>}, + {encoding, identity}, + {md5, <<>>} + ]), + Doc1A = #doc{ + id = DocId, + atts = [Att1] + }, + {ok, {Pos1, Rev1A}} = fabric2_db:update_doc(Db, Doc1A), + Att2 = couch_att:store([ + {data, stub}, + {revpos, 1} + ], Att1), + Doc1B = Doc1A#doc{ + revs = {Pos1, [Rev1A]}, + atts = [Att2] + }, + {ok, {Pos2, Rev1B}} = fabric2_db:update_doc(Db, Doc1B), + + Rev2 = crypto:strong_rand_bytes(16), + Rev3 = crypto:strong_rand_bytes(16), + Rev4 = crypto:strong_rand_bytes(16), + + % Create a live conflict + Doc2 = #doc{ + id = DocId, + revs = {1, [Rev2]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + + % Create a deleted conflict + Doc3 = #doc{ + id = DocId, + revs = {1, [Rev3]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc3, [replicated_changes]), + Doc4 = #doc{ + id = DocId, + revs = {2, [Rev4, Rev3]}, + deleted = true + }, + {ok, _} = fabric2_db:update_doc(Db, Doc4, [replicated_changes]), + + OpenOpts1 = [ + revs_info, + conflicts, + deleted_conflicts, + local_seq, + {atts_since, [{Pos1, Rev1A}]} + ], + {ok, OpenedDoc1} = fabric2_db:open_doc(Db, DocId, OpenOpts1), + + #doc{ + id = DocId, + revs = {2, [Rev1B, Rev1A]}, + atts = [Att3], + meta = Meta + } = OpenedDoc1, + ?assertEqual(stub, couch_att:fetch(data, Att3)), + ?assertEqual( + {revs_info, Pos2, [{Rev1B, available}, {Rev1A, missing}]}, + lists:keyfind(revs_info, 1, Meta) + ), + ?assertEqual( + {conflicts, [{1, Rev2}]}, + lists:keyfind(conflicts, 1, Meta) + ), + ?assertEqual( + {deleted_conflicts, [{2, Rev4}]}, + lists:keyfind(deleted_conflicts, 1, Meta) + ), + ?assertMatch({_, <<_/binary>>}, lists:keyfind(local_seq, 1, Meta)), + + % Empty atts_since list + {ok, OpenedDoc2} = fabric2_db:open_doc(Db, DocId, [{atts_since, []}]), + #doc{atts = [Att4]} = OpenedDoc2, + ?assertNotEqual(stub, couch_att:fetch(data, Att4)), + + % Missing ancestor + Rev5 = crypto:strong_rand_bytes(16), + OpenOpts2 = [{atts_since, [{5, Rev5}]}], + {ok, OpenedDoc3} = fabric2_db:open_doc(Db, DocId, OpenOpts2), + #doc{atts = [Att5]} = OpenedDoc3, + ?assertNotEqual(stub, couch_att:fetch(data, Att5)). + diff --git a/src/fabric/test/fabric2_doc_fold_tests.erl b/src/fabric/test/fabric2_doc_fold_tests.erl new file mode 100644 index 000000000..0695b450b --- /dev/null +++ b/src/fabric/test/fabric2_doc_fold_tests.erl @@ -0,0 +1,378 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_fold_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +-define(DOC_COUNT, 50). + + +doc_fold_test_() -> + { + "Test document fold operations", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(fold_docs_basic), + ?TDEF_FE(fold_docs_rev), + ?TDEF_FE(fold_docs_with_start_key), + ?TDEF_FE(fold_docs_with_end_key), + ?TDEF_FE(fold_docs_with_both_keys_the_same), + ?TDEF_FE(fold_docs_with_different_keys, 10000), + ?TDEF_FE(fold_docs_with_limit), + ?TDEF_FE(fold_docs_with_skip), + ?TDEF_FE(fold_docs_with_skip_and_limit), + ?TDEF_FE(fold_docs_tx_too_old), + ?TDEF_FE(fold_docs_db_recreated) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DocIdRevs = lists:map(fun(Val) -> + DocId = fabric2_util:uuid(), + Doc = #doc{ + id = DocId, + body = {[{<<"value">>, Val}]} + }, + {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), + {DocId, {[{rev, couch_doc:rev_to_str(Rev)}]}} + end, lists:seq(1, ?DOC_COUNT)), + meck:new(erlfdb, [passthrough]), + fabric2_test_util:tx_too_old_mock_erlfdb(), + {Db, lists:sort(DocIdRevs)}. + + +cleanup({Db, _DocIdRevs}) -> + fabric2_test_util:tx_too_old_reset_errors(), + meck:unload(), + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +fold_docs_basic({Db, DocIdRevs}) -> + {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_docs(Db, fun fold_fun/2, []), + ?assertEqual(DocIdRevs, lists:reverse(Rows)). + + +fold_docs_rev({Db, DocIdRevs}) -> + Opts = [{dir, rev}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, Rows). + + +fold_docs_with_start_key({Db, DocIdRevs}) -> + {StartKey, _} = hd(DocIdRevs), + Opts = [{start_key, StartKey}], + {ok, {?DOC_COUNT, Rows}} + = fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, lists:reverse(Rows)), + if length(DocIdRevs) == 1 -> ok; true -> + fold_docs_with_start_key({Db, tl(DocIdRevs)}) + end. + + +fold_docs_with_end_key({Db, DocIdRevs}) -> + RevDocIdRevs = lists:reverse(DocIdRevs), + {EndKey, _} = hd(RevDocIdRevs), + Opts = [{end_key, EndKey}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(RevDocIdRevs, Rows), + if length(DocIdRevs) == 1 -> ok; true -> + fold_docs_with_end_key({Db, lists:reverse(tl(RevDocIdRevs))}) + end. + + +fold_docs_with_both_keys_the_same({Db, DocIdRevs}) -> + lists:foreach(fun({DocId, _} = Row) -> + check_all_combos(Db, DocId, DocId, [Row]) + end, DocIdRevs). + + +fold_docs_with_different_keys({Db, DocIdRevs}) -> + lists:foreach(fun(_) -> + {StartKey, EndKey, Rows} = pick_range(DocIdRevs), + check_all_combos(Db, StartKey, EndKey, Rows) + end, lists:seq(1, 500)). + + +fold_docs_with_limit({Db, DocIdRevs}) -> + lists:foreach(fun(Limit) -> + Opts1 = [{limit, Limit}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:sublist(DocIdRevs, Limit), lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts2), + ?assertEqual( + lists:sublist(lists:reverse(DocIdRevs), Limit), + lists:reverse(Rows2) + ) + end, lists:seq(0, 51)). + + +fold_docs_with_skip({Db, DocIdRevs}) -> + lists:foreach(fun(Skip) -> + Opts1 = [{skip, Skip}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), + Expect1 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, DocIdRevs) + end, + ?assertEqual(Expect1, lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, lists:reverse(DocIdRevs)) + end, + ?assertEqual(Expect2, lists:reverse(Rows2)) + end, lists:seq(0, 51)). + + +fold_docs_with_skip_and_limit({Db, DocIdRevs}) -> + lists:foreach(fun(_) -> + check_skip_and_limit(Db, [], DocIdRevs), + check_skip_and_limit(Db, [{dir, rev}], lists:reverse(DocIdRevs)) + end, lists:seq(1, 100)). + + +fold_docs_tx_too_old({Db, _DocIdRevs}) -> + {ok, Expected} = fabric2_db:fold_docs(Db, fun fold_fun/2, []), + + FoldDocsFun = fun() -> + fabric2_db:fold_docs(Db, fun fold_fun/2, [], [{restart_tx, true}]) + end, + + % Blow up in fold range on the first call + fabric2_test_util:tx_too_old_setup_errors(0, 1), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in fold_range after emitting one row + fabric2_test_util:tx_too_old_setup_errors(0, {1, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in fold_range after emitting 48 rows + fabric2_test_util:tx_too_old_setup_errors(0, {?DOC_COUNT - 2, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in fold_range after emitting 49 rows + fabric2_test_util:tx_too_old_setup_errors(0, {?DOC_COUNT - 1, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun + fabric2_test_util:tx_too_old_setup_errors(1, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun after emitting one row + fabric2_test_util:tx_too_old_setup_errors({1, 1}, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun after emitting 48 rows + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 2, 1}, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in user fun after emitting 49 rows + fabric2_test_util:tx_too_old_setup_errors({?DOC_COUNT - 1, 1}, 0), + ?assertEqual({ok, Expected}, FoldDocsFun()), + + % Blow up in in user fun and fold range + fabric2_test_util:tx_too_old_setup_errors(1, {1, 1}), + ?assertEqual({ok, Expected}, FoldDocsFun()). + + +fold_docs_db_recreated({Db, _DocIdRevs}) -> + DbName = fabric2_db:name(Db), + + RecreateDb = fun() -> + ok = fabric2_db:delete(DbName, []), + {ok, _} = fabric2_db:create(DbName, []) + end, + + FoldFun = fun + ({meta, _}, Acc) -> + {ok, Acc}; + ({row, Row}, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + % After meta and one row emitted, recreate the db + case length(Acc) =:= 1 of + true -> RecreateDb(); + false -> ok + end, + {ok, [Row | Acc]}; + (complete, Acc) -> + {ok, Acc} + end, + % Blow up in user fun after emitting two rows + fabric2_test_util:tx_too_old_setup_errors({2, 1}, 0), + ?assertError(database_does_not_exist, fabric2_db:fold_docs(Db, FoldFun, + [], [{restart_tx, true}])). + + +check_all_combos(Db, StartKey, EndKey, Rows) -> + Opts1 = make_opts(fwd, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:reverse(Rows), Rows1), + check_skip_and_limit(Db, Opts1, Rows), + + Opts2 = make_opts(fwd, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = if EndKey == undefined -> lists:reverse(Rows); true -> + lists:reverse(all_but_last(Rows)) + end, + ?assertEqual(Expect2, Rows2), + check_skip_and_limit(Db, Opts2, lists:reverse(Expect2)), + + Opts3 = make_opts(rev, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows3}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts3), + ?assertEqual(Rows, Rows3), + check_skip_and_limit(Db, Opts3, lists:reverse(Rows)), + + Opts4 = make_opts(rev, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows4}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], Opts4), + Expect4 = if StartKey == undefined -> Rows; true -> + tl(Rows) + end, + ?assertEqual(Expect4, Rows4), + check_skip_and_limit(Db, Opts4, lists:reverse(Expect4)). + + +check_skip_and_limit(Db, Opts, []) -> + Skip = rand:uniform(?DOC_COUNT + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1) - 1, + NewOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, OutRows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], NewOpts), + ?assertEqual([], OutRows); + +check_skip_and_limit(Db, Opts, Rows) -> + Skip = rand:uniform(length(Rows) + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1 - Skip) - 1, + + ExpectRows = case Skip >= length(Rows) of + true -> + []; + false -> + lists:sublist(lists:nthtail(Skip, Rows), Limit) + end, + + SkipLimitOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, RevRows}} = + fabric2_db:fold_docs(Db, fun fold_fun/2, [], SkipLimitOpts), + OutRows = lists:reverse(RevRows), + ?assertEqual(ExpectRows, OutRows). + + +make_opts(fwd, StartKey, EndKey, InclusiveEnd) -> + DirOpts = case rand:uniform() =< 0.50 of + true -> [{dir, fwd}]; + false -> [] + end, + StartOpts = case StartKey of + undefined -> []; + <<_/binary>> -> [{start_key, StartKey}] + end, + EndOpts = case EndKey of + undefined -> []; + <<_/binary>> when InclusiveEnd -> [{end_key, EndKey}]; + <<_/binary>> -> [{end_key_gt, EndKey}] + end, + DirOpts ++ StartOpts ++ EndOpts; +make_opts(rev, StartKey, EndKey, InclusiveEnd) -> + BaseOpts = make_opts(fwd, EndKey, StartKey, InclusiveEnd), + [{dir, rev}] ++ BaseOpts -- [{dir, fwd}]. + + +all_but_last([]) -> + []; +all_but_last([_]) -> + []; +all_but_last(Rows) -> + lists:sublist(Rows, length(Rows) - 1). + + +pick_range(DocIdRevs) -> + {StartKey, StartRow, RestRows} = pick_start_key(DocIdRevs), + {EndKey, EndRow, RowsBetween} = pick_end_key(RestRows), + {StartKey, EndKey, StartRow ++ RowsBetween ++ EndRow}. + + +pick_start_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + {DocId, [Row], lists:nthtail(Idx, Rows)} + end. + + +pick_end_key([]) -> + {undefined, [], []}; + +pick_end_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + Tail = lists:nthtail(Idx, Rows), + {DocId, [Row], Rows -- [Row | Tail]} + end. + + +fold_fun({meta, Meta}, _Acc) -> + Total = fabric2_util:get_value(total, Meta), + {ok, {Total, []}}; +fold_fun({row, Row}, {Total, Rows}) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + RowId = fabric2_util:get_value(id, Row), + RowId = fabric2_util:get_value(key, Row), + RowRev = fabric2_util:get_value(value, Row), + {ok, {Total, [{RowId, RowRev} | Rows]}}; +fold_fun(complete, Acc) -> + {ok, Acc}. diff --git a/src/fabric/test/fabric2_doc_size_tests.erl b/src/fabric/test/fabric2_doc_size_tests.erl new file mode 100644 index 000000000..1e3dca4f6 --- /dev/null +++ b/src/fabric/test/fabric2_doc_size_tests.erl @@ -0,0 +1,320 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_doc_size_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("eunit/include/eunit.hrl"). + + +% Doc body size calculations +% ID: size(Doc#doc.id) +% Rev: size(erlfdb_tuple:encode(Start)) + size(Rev) % where Rev is usually 16 +% Deleted: 1 % (binary value is one byte) +% Body: couch_ejson_size:external_size(Body) % Where empty is {} which is 2) + + +-define(NUM_RANDOM_TESTS, 1000). + + +-define(DOC_IDS, [ + {0, <<>>}, + {1, <<"a">>}, + {3, <<"foo">>}, + {6, <<"foobar">>}, + {32, <<"af196ae095631b020eedf8f69303e336">>} +]). + +-define(REV_STARTS, [ + {1, 0}, + {2, 1}, + {2, 255}, + {3, 256}, + {3, 65535}, + {4, 65536}, + {4, 16777215}, + {5, 16777216}, + {5, 4294967295}, + {6, 4294967296}, + {6, 1099511627775}, + {7, 1099511627776}, + {7, 281474976710655}, + {8, 281474976710656}, + {8, 72057594037927935}, + {9, 72057594037927936}, + {9, 18446744073709551615}, + + % The jump from 9 to 11 bytes is because when we + % spill over into the bigint range of 9-255 + % bytes we have an extra byte that encodes the + % length of the bigint. + {11, 18446744073709551616} +]). + +-define(REVS, [ + {0, <<>>}, + {8, <<"foobarba">>}, + {16, <<"foobarbazbambang">>} +]). + +-define(DELETED, [ + {1, true}, + {1, false} +]). + +-define(BODIES, [ + {2, {[]}}, + {13, {[{<<"foo">>, <<"bar">>}]}}, + {28, {[{<<"b">>, <<"a">>}, {<<"c">>, [true, null, []]}]}} +]). + +-define(ATT_NAMES, [ + {5, <<"a.txt">>}, + {7, <<"foo.csv">>}, + {29, <<"a-longer-name-for-example.bat">>} +]). + +-define(ATT_TYPES, [ + {24, <<"application/octet-stream">>}, + {10, <<"text/plain">>}, + {9, <<"image/png">>} +]). + +-define(ATT_BODIES, [ + {0, <<>>}, + {1, <<"g">>}, + {6, <<"foobar">>}, + {384, << + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + "xlasdjfsapoiewrposdlfadfuaducvwerwlkdsfljdfusfsd" + >>} +]). + +-define(LDOC_IDS, [ + {8, <<"_local/a">>}, + {10, <<"_local/foo">>}, + {13, <<"_local/foobar">>}, + {39, <<"_local/af196ae095631b020eedf8f69303e336">>} +]). + +-define(LDOC_REVS, [ + {1, <<"0">>}, + {2, <<"10">>}, + {3, <<"100">>}, + {4, <<"1000">>}, + {5, <<"10000">>}, + {6, <<"100000">>}, + {7, <<"1000000">>} +]). + + +empty_doc_test() -> + ?assertEqual(4, fabric2_util:rev_size(#doc{})). + + +docid_size_test() -> + lists:foreach(fun({Size, DocId}) -> + ?assertEqual(4 + Size, fabric2_util:rev_size(#doc{id = DocId})) + end, ?DOC_IDS). + + +rev_size_test() -> + lists:foreach(fun({StartSize, Start}) -> + lists:foreach(fun({RevSize, Rev}) -> + Doc = #doc{ + revs = {Start, [Rev]} + }, + ?assertEqual(3 + StartSize + RevSize, fabric2_util:rev_size(Doc)) + end, ?REVS) + end, ?REV_STARTS). + + +deleted_size_test() -> + lists:foreach(fun({Size, Deleted}) -> + ?assertEqual(3 + Size, fabric2_util:rev_size(#doc{deleted = Deleted})) + end, ?DELETED). + + +body_size_test() -> + lists:foreach(fun({Size, Body}) -> + ?assertEqual(2 + Size, fabric2_util:rev_size(#doc{body = Body})) + end, ?BODIES). + + +att_names_test() -> + lists:foreach(fun({Size, AttName}) -> + Att = mk_att(AttName, <<>>, <<>>, false), + Doc = #doc{atts = [Att]}, + ?assertEqual(4 + Size, fabric2_util:rev_size(Doc)) + end, ?ATT_NAMES). + + +att_types_test() -> + lists:foreach(fun({Size, AttType}) -> + Att = mk_att(<<"foo">>, AttType, <<>>, false), + Doc = #doc{atts = [Att]}, + ?assertEqual(7 + Size, fabric2_util:rev_size(Doc)) + end, ?ATT_TYPES). + + +att_bodies_test() -> + lists:foreach(fun({Size, AttBody}) -> + Att1 = mk_att(<<"foo">>, <<>>, AttBody, false), + Doc1 = #doc{atts = [Att1]}, + ?assertEqual(7 + Size, fabric2_util:rev_size(Doc1)), + + Att2 = mk_att(<<"foo">>, <<>>, AttBody, true), + Doc2 = #doc{atts = [Att2]}, + ?assertEqual(7 + 16 + Size, fabric2_util:rev_size(Doc2)) + end, ?ATT_BODIES). + + +local_doc_ids_test() -> + lists:foreach(fun({Size, LDocId}) -> + ?assertEqual(3 + Size, fabric2_util:ldoc_size(mk_ldoc(LDocId, 0))) + end, ?LDOC_IDS). + + +local_doc_revs_test() -> + lists:foreach(fun({Size, Rev}) -> + Doc = mk_ldoc(<<"_local/foo">>, Rev), + ?assertEqual(12 + Size, fabric2_util:ldoc_size(Doc)) + end, ?LDOC_REVS). + + +local_doc_bodies_test() -> + lists:foreach(fun({Size, Body}) -> + Doc = mk_ldoc(<<"_local/foo">>, 0, Body), + ?assertEqual(11 + Size, fabric2_util:ldoc_size(Doc)) + end, ?BODIES). + + +doc_combinatorics_test() -> + Elements = [ + {?DOC_IDS, fun(Doc, DocId) -> Doc#doc{id = DocId} end}, + {?REV_STARTS, fun(Doc, RevStart) -> + #doc{revs = {_, RevIds}} = Doc, + Doc#doc{revs = {RevStart, RevIds}} + end}, + {?REVS, fun(Doc, Rev) -> + #doc{revs = {Start, _}} = Doc, + Doc#doc{revs = {Start, [Rev]}} + end}, + {?DELETED, fun(Doc, Deleted) -> Doc#doc{deleted = Deleted} end}, + {?BODIES, fun(Doc, Body) -> Doc#doc{body = Body} end} + ], + doc_combine(Elements, 0, #doc{}). + + +doc_combine([], TotalSize, Doc) -> + ?assertEqual(TotalSize, fabric2_util:rev_size(Doc)); + +doc_combine([{Elems, UpdateFun} | Rest], TotalSize, Doc) -> + lists:foreach(fun({Size, Elem}) -> + doc_combine(Rest, TotalSize + Size, UpdateFun(Doc, Elem)) + end, Elems). + + +local_doc_combinatorics_test() -> + Elements = [ + {?LDOC_IDS, fun(Doc, DocId) -> Doc#doc{id = DocId} end}, + {?LDOC_REVS, fun(Doc, Rev) -> Doc#doc{revs = {0, [Rev]}} end}, + {?BODIES, fun(Doc, Body) -> Doc#doc{body = Body} end} + ], + local_doc_combine(Elements, 0, #doc{}). + + +local_doc_combine([], TotalSize, Doc) -> + ?assertEqual(TotalSize, fabric2_util:ldoc_size(Doc)); + +local_doc_combine([{Elems, UpdateFun} | Rest], TotalSize, Doc) -> + lists:foreach(fun({Size, Elem}) -> + local_doc_combine(Rest, TotalSize + Size, UpdateFun(Doc, Elem)) + end, Elems). + + +random_docs_test() -> + lists:foreach(fun(_) -> + {DocIdSize, DocId} = choose(?DOC_IDS), + {RevStartSize, RevStart} = choose(?REV_STARTS), + {RevSize, Rev} = choose(?REVS), + {DeletedSize, Deleted} = choose(?DELETED), + {BodySize, Body} = choose(?BODIES), + NumAtts = choose([0, 1, 2, 5]), + {Atts, AttSize} = lists:mapfoldl(fun(_, Acc) -> + {S, A} = random_att(), + {A, Acc + S} + end, 0, lists:seq(1, NumAtts)), + Doc = #doc{ + id = DocId, + revs = {RevStart, [Rev]}, + deleted = Deleted, + body = Body, + atts = Atts + }, + Expect = lists:sum([ + DocIdSize, + RevStartSize, + RevSize, + DeletedSize, + BodySize, + AttSize + ]), + ?assertEqual(Expect, fabric2_util:rev_size(Doc)) + end, lists:seq(1, ?NUM_RANDOM_TESTS)). + + +random_att() -> + {NameSize, Name} = choose(?ATT_NAMES), + {TypeSize, Type} = choose(?ATT_TYPES), + {BodySize, Body} = choose(?ATT_BODIES), + {Md5Size, AddMd5} = choose([{0, false}, {16, true}]), + AttSize = lists:sum([NameSize, TypeSize, BodySize, Md5Size]), + {AttSize, mk_att(Name, Type, Body, AddMd5)}. + + +mk_att(Name, Type, Data, AddMd5) -> + Md5 = if not AddMd5 -> <<>>; true -> + erlang:md5(Data) + end, + couch_att:new([ + {name, Name}, + {type, Type}, + {att_len, size(Data)}, + {data, Data}, + {encoding, identity}, + {md5, Md5} + ]). + + +mk_ldoc(DocId, Rev) -> + mk_ldoc(DocId, Rev, {[]}). + + +mk_ldoc(DocId, Rev, Body) -> + #doc{ + id = DocId, + revs = {0, [Rev]}, + body = Body + }. + + +choose(Options) -> + Pos = rand:uniform(length(Options)), + lists:nth(Pos, Options). diff --git a/src/fabric/test/fabric2_fdb_tx_retry_tests.erl b/src/fabric/test/fabric2_fdb_tx_retry_tests.erl new file mode 100644 index 000000000..7fb0f21d0 --- /dev/null +++ b/src/fabric/test/fabric2_fdb_tx_retry_tests.erl @@ -0,0 +1,176 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_fdb_tx_retry_tests). + + +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +retry_test_() -> + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(read_only_no_retry), + ?TDEF(read_only_commit_unknown_result), + ?TDEF(run_on_first_try), + ?TDEF(retry_when_commit_conflict), + ?TDEF(retry_when_txid_not_found), + ?TDEF(no_retry_when_txid_found) + ]) + }. + + +setup() -> + meck:new(erlfdb), + meck:new(fabric2_txids), + EnvSt = case application:get_env(fabric, db) of + {ok, Db} -> {ok, Db}; + undefined -> undefined + end, + application:set_env(fabric, db, not_a_real_db), + EnvSt. + + +cleanup(EnvSt) -> + case EnvSt of + {ok, Db} -> application:set_env(fabric, db, Db); + undefined -> application:unset_env(fabric, db) + end, + meck:unload(). + + +read_only_no_retry(_) -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 0 end), + meck:expect(erlfdb, get, fun(_, _) -> foo end), + meck:expect(erlfdb, is_read_only, fun(_) -> true end), + meck:expect(fabric2_txids, remove, fun(undefined) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(foo, erlfdb:get(Tx, bar)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +read_only_commit_unknown_result(_) -> + % Not 100% certain that this would ever actually + % happen in the wild but might as well test that + % we don't blow up if it does. + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1021 end), + meck:expect(erlfdb, get, fun(_, _) -> foo end), + meck:expect(erlfdb, is_read_only, fun(_) -> true end), + meck:expect(fabric2_txids, remove, fun(undefined) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(foo, erlfdb:get(Tx, bar)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +run_on_first_try(_) -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> undefined end), + meck:expect(erlfdb, clear, fun(_, _) -> ok end), + meck:expect(erlfdb, is_read_only, fun(_) -> false end), + meck:expect(fabric2_txids, create, fun(_, _) -> <<"a txid">> end), + meck:expect(erlfdb, set, fun(_, <<"a txid">>, <<>>) -> ok end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, erlfdb:clear(Tx, bang)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +retry_when_commit_conflict(_) -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1020 end), + meck:expect(erlfdb, clear, fun(_, _) -> ok end), + meck:expect(erlfdb, is_read_only, fun(_) -> false end), + meck:expect(fabric2_txids, create, fun(_, _) -> <<"a txid">> end), + meck:expect(erlfdb, set, fun(_, <<"a txid">>, <<>>) -> ok end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, erlfdb:clear(Tx, <<"foo">>)), + did_run + end), + + ?assertEqual(did_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +retry_when_txid_not_found(_) -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1021 end), + meck:expect(erlfdb, get, fun(_, <<"a txid">>) -> future end), + meck:expect(erlfdb, wait, fun(future) -> not_found end), + meck:expect(erlfdb, clear, fun(_, _) -> ok end), + meck:expect(erlfdb, is_read_only, fun(_) -> false end), + meck:expect(erlfdb, set, fun(_, <<"a txid">>, <<>>) -> ok end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + put('$fabric_tx_id', <<"a txid">>), + put('$fabric_tx_result', not_the_correct_result), + + Result = fabric2_fdb:transactional(fun(Tx) -> + ?assertEqual(ok, erlfdb:clear(Tx, <<"foo">>)), + yay_not_skipped + end), + + ?assertEqual(yay_not_skipped, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])). + + +no_retry_when_txid_found(_) -> + meck:expect(erlfdb, transactional, fun(_Db, UserFun) -> + UserFun(not_a_real_transaction) + end), + meck:expect(erlfdb, get_last_error, fun() -> 1021 end), + meck:expect(erlfdb, get, fun(_, <<"a txid">>) -> future end), + meck:expect(erlfdb, wait, fun(future) -> <<>> end), + meck:expect(fabric2_txids, remove, fun(<<"a txid">>) -> ok end), + + put('$fabric_tx_id', <<"a txid">>), + put('$fabric_tx_result', did_not_run), + + Result = fabric2_fdb:transactional(fun(_Tx) -> + ?assert(false), + did_run + end), + + ?assertEqual(did_not_run, Result), + ?assert(meck:validate([erlfdb, fabric2_txids])).
\ No newline at end of file diff --git a/src/fabric/test/fabric2_get_design_docs_tests.erl b/src/fabric/test/fabric2_get_design_docs_tests.erl new file mode 100644 index 000000000..eb227835c --- /dev/null +++ b/src/fabric/test/fabric2_get_design_docs_tests.erl @@ -0,0 +1,138 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_get_design_docs_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +get_design_docs_test_() -> + { + "Test get_design_docs", + { + setup, + fun setup_all/0, + fun cleanup_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(empty_db), + ?TDEF_FE(get_one), + ?TDEF_FE(get_two), + ?TDEF_FE(get_many), + ?TDEF_FE(get_many_with_regular_docs), + ?TDEF_FE(dont_return_deleted_ddocs) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +cleanup_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +cleanup(Db) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +empty_db(Db) -> + DDocs = fabric2_db:get_design_docs(Db), + ?assertEqual([], DDocs). + + +get_one(Db) -> + DDoc = create_ddoc(Db, <<"foo">>), + DDocs = fabric2_db:get_design_docs(Db), + ?assertEqual([DDoc], DDocs). + + +get_two(Db) -> + DDoc1 = create_ddoc(Db, <<"foo">>), + DDoc2 = create_ddoc(Db, <<"bar">>), + DDocs = fabric2_db:get_design_docs(Db), + % DDocs come back sorted + ?assertEqual([DDoc2, DDoc1], DDocs). + + +get_many(Db) -> + DDocsIn = lists:map(fun(Seq) -> + Id = io_lib:format("~2..0b", [Seq]), + create_ddoc(Db, iolist_to_binary(Id)) + end, lists:seq(1, 10)), + DDocsOut = fabric2_db:get_design_docs(Db), + ?assertEqual(DDocsIn, DDocsOut). + + +get_many_with_regular_docs(Db) -> + RegularIds = [ + <<"0">>, + <<"012aCb">>, + <<"Another_doc">>, + <<"Znother_doc">>, + <<"a_doc_as_well">>, + <<"zebra_doc">> + ], + lists:foreach(fun(DocId) -> + create_doc(Db, DocId) + end, RegularIds), + DDocsIn = lists:map(fun(Seq) -> + Id = io_lib:format("~2..0b", [Seq]), + create_ddoc(Db, iolist_to_binary(Id)) + end, lists:seq(1, 10)), + DDocsOut = fabric2_db:get_design_docs(Db), + ?assertEqual(DDocsIn, DDocsOut). + + +dont_return_deleted_ddocs(Db) -> + DDocsIn = lists:flatmap(fun(Seq) -> + Id = io_lib:format("~2..0b", [Seq]), + DDoc = create_ddoc(Db, iolist_to_binary(Id)), + case Seq rem 2 == 0 of + true -> + delete_ddoc(Db, DDoc), + []; + false -> + [DDoc] + end + end, lists:seq(1, 10)), + DDocsOut = fabric2_db:get_design_docs(Db), + ?assertEqual(DDocsIn, DDocsOut). + + +create_ddoc(Db, Id) -> + create_doc(Db, <<"_design/", Id/binary>>). + + +delete_ddoc(Db, DDoc) -> + {ok, _} = fabric2_db:update_doc(Db, DDoc#doc{deleted = true}). + + +create_doc(Db, Id) -> + Doc = #doc{id = Id}, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc), + Doc#doc{revs = {Pos, [Rev]}}. diff --git a/src/fabric/test/fabric2_index_tests.erl b/src/fabric/test/fabric2_index_tests.erl new file mode 100644 index 000000000..8a4acb77d --- /dev/null +++ b/src/fabric/test/fabric2_index_tests.erl @@ -0,0 +1,304 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_index_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2_test.hrl"). + + +% Should match fabric2_index define +-define(SHARDS, 32). + + +index_test_() -> + { + "Test fabric indexing module", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(register_index_works), + ?TDEF(single_update), + ?TDEF(multiple_updates), + ?TDEF(skip_db_if_no_ddocs), + ?TDEF(ignore_deleted_dbs, 10), + ?TDEF(check_gen_server_messages) + ]) + } + }. + + +index_process_cleanup_test_() -> + { + "Test fabric process cleanup in indexing module", + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(updater_processes_start, 15), + ?TDEF_FE(updater_processes_stop, 15), + ?TDEF_FE(indexing_can_be_disabled), + ?TDEF_FE(handle_indexer_blowing_up) + ] + } + }. + + +setup() -> + meck:new(config, [passthrough]), + meck:expect(config, get_integer, fun + ("fabric", "index_updater_delay_msec", _) -> 200; + ("fabric", "index_updater_resolution_msec", _) -> 100; + + (_, _, Default) -> Default + end), + meck:expect(config, get_boolean, fun + ("fabric", "index_updater_enabled", _) -> true; + (_, _, Default) -> Default + end), + + Indices = application:get_env(fabric, indices, []), + + Ctx = test_util:start_couch([fabric]), + + % Db1 has a valid design doc, a deleted one and one with "autoupdate":false + {ok, Db1} = fabric2_db:create(?tempdb(), [?ADMIN_CTX]), + {_, _} = create_doc(Db1, <<"_design/doc1">>), + + DDocId2 = <<"_design/doc2">>, + {DDocId2, {Pos, Rev}} = create_doc(Db1, DDocId2), + Delete2 = #doc{id = DDocId2, revs = {Pos, [Rev]}, deleted = true}, + {ok, _} = fabric2_db:update_doc(Db1, Delete2), + + NoAutoUpdate = {[{<<"autoupdate">>, false}]}, + {_, _} = create_doc(Db1, <<"_design/doc3">>, NoAutoUpdate), + + % Db2 doesn't have any desig documents + {ok, Db2} = fabric2_db:create(?tempdb(), [?ADMIN_CTX]), + + #{db1 => Db1, db2 => Db2, ctx => Ctx, indices => Indices}. + + +cleanup(#{db1 := Db1, db2 := Db2, ctx := Ctx, indices := Indices}) -> + catch fabric2_db:delete(fabric2_db:name(Db1), []), + catch fabric2_db:delete(fabric2_db:name(Db2), []), + + test_util:stop_couch(Ctx), + application:set_env(fabric, indices, Indices), + + meck:unload(). + + +register_index_works(_) -> + reset_callbacks(), + + Mod1 = fabric2_test_callback1, + fabric2_index:register_index(Mod1), + Indices1 = application:get_env(fabric, indices, []), + ?assertEqual([Mod1], Indices1), + + Mod2 = fabric2_test_callback2, + fabric2_index:register_index(Mod2), + Indices2 = application:get_env(fabric, indices, []), + ?assertEqual(lists:sort([Mod1, Mod2]), lists:sort(Indices2)). + + +single_update(#{db1 := Db}) -> + reset_callbacks(), + + Mod = fabric2_test_callback3, + setup_callback(Mod), + create_doc(Db), + + meck:wait(Mod, build_indices, 2, 2000), + ?assertEqual(1, meck:num_calls(Mod, build_indices, 2)). + + +multiple_updates(#{db1 := Db}) -> + reset_callbacks(), + + Mod = fabric2_test_callback4, + setup_callback(Mod), + create_docs(Db, 10), + + % should be called at least once + meck:wait(Mod, build_indices, 2, 2000), + + % Maybe called another time or two at most + timer:sleep(500), + ?assert(meck:num_calls(Mod, build_indices, 2) =< 3). + + +skip_db_if_no_ddocs(#{db2 := Db}) -> + reset_callbacks(), + + Mod = fabric2_test_callback5, + setup_callback(Mod), + create_doc(Db), + + timer:sleep(500), + ?assertEqual(0, meck:num_calls(Mod, build_indices, 2)). + + +ignore_deleted_dbs(#{}) -> + reset_callbacks(), + + Mod = fabric2_test_callback6, + setup_callback(Mod), + lists:foreach(fun(_) -> + RandomDbName = fabric2_util:uuid(), + fabric2_index:db_updated(RandomDbName) + end, lists:seq(1, 1000)), + + test_util:wait(fun() -> + case table_sizes() =:= 0 of + true -> ok; + false -> wait + end + end, 5000). + + +check_gen_server_messages(#{}) -> + CallExpect = {stop, {bad_call, foo}, {bad_call, foo}, baz}, + CastExpect = {stop, {bad_cast, foo}, bar}, + InfoExpect = {stop, {bad_info, foo}, bar}, + ?assertEqual(CallExpect, fabric2_index:handle_call(foo, bar, baz)), + ?assertEqual(CastExpect, fabric2_index:handle_cast(foo, bar)), + ?assertEqual(InfoExpect, fabric2_index:handle_info(foo, bar)), + ?assertEqual(ok, fabric2_index:terminate(shutdown, nil)), + ?assertEqual({ok, nil}, fabric2_index:code_change(v0, nil, extra)). + + +updater_processes_start(#{}) -> + Pid = whereis(fabric2_index), + ?assert(is_process_alive(Pid)), + lists:map(fun(N) -> + ?assertEqual(tid(N), ets:info(tid(N), name)) + end, lists:seq(0, ?SHARDS - 1)). + + +updater_processes_stop(#{}) -> + Refs = lists:map(fun(N) -> + Pid = ets:info(tid(N), owner), + ?assert(is_process_alive(Pid)), + monitor(process, Pid) + end, lists:seq(0, ?SHARDS - 1)), + + % We stop but don't restart fabric after this as we're running in a foreach + % test list where app restart happens after each test. + application:stop(fabric), + + lists:foreach(fun(Ref) -> + receive + {'DOWN', Ref, _, _, _} -> ok + after 5000 -> + ?assert(false) + end + end, Refs). + + +indexing_can_be_disabled(#{db1 := Db}) -> + meck:expect(config, get_boolean, fun + ("fabric", "index_updater_enabled", _) -> false; + (_, _, Default) -> Default + end), + + Mod = fabric2_test_callback7, + setup_callback(Mod), + + create_doc(Db), + timer:sleep(500), + ?assertEqual(0, meck:num_calls(Mod, build_indices, 2)), + + meck:expect(config, get_boolean, fun + ("fabric", "index_updater_enabled", _) -> true; + (_, _, Default) -> Default + end), + + create_doc(Db), + meck:wait(Mod, build_indices, 2, 2000). + + +handle_indexer_blowing_up(#{db1 := Db}) -> + Mod = fabric2_test_callback8, + setup_callback(Mod), + meck:expect(Mod, build_indices, fun(_, _) -> error(bad_index) end), + + MainPid = whereis(fabric2_index), + WPids1 = [ets:info(tid(N), owner) || N <- lists:seq(0, ?SHARDS - 1)], + + create_doc(Db), + meck:wait(Mod, build_indices, 2, 2000), + + ?assert(is_process_alive(MainPid)), + + WPids2 = [ets:info(tid(N), owner) || N <- lists:seq(0, ?SHARDS - 1)], + ?assertEqual(lists:sort(WPids1), lists:sort(WPids2)), + ?assert(lists:all(fun(Pid) -> is_process_alive(Pid) end, WPids2)). + + +% Utility functions + +setup_callback(Mod) -> + catch meck:unload(Mod), + meck:new(Mod, [non_strict]), + meck:expect(Mod, build_indices, 2, []), + fabric2_index:register_index(Mod). + + +reset_callbacks() -> + Mods = application:get_env(fabric, indices, []), + application:set_env(fabric, indices, []), + lists:foreach(fun(M) -> + catch meck:reset(M), + catch meck:unload(M) + end, Mods). + + +tid(Id) when is_integer(Id) -> + TableName = "fabric2_index_" ++ integer_to_list(Id), + list_to_existing_atom(TableName). + + +table_sizes() -> + Sizes = [ets:info(tid(N), size) || N <- lists:seq(0, ?SHARDS - 1)], + lists:sum(Sizes). + + +create_docs(Db, Count) -> + lists:map(fun(_) -> + {DocId, _RevStr} = create_doc(Db), + DocId + end, lists:seq(1, Count)). + + +create_doc(Db) -> + create_doc(Db, fabric2_util:uuid()). + + +create_doc(Db, DocId) -> + create_doc(Db, DocId, {[]}). + + +create_doc(Db, DocId, Body) -> + Doc = #doc{ + id = DocId, + body = Body + }, + {ok, {Pos, Rev}} = fabric2_db:update_doc(Db, Doc, []), + {DocId, {Pos, Rev}}. diff --git a/src/fabric/test/fabric2_local_doc_fold_tests.erl b/src/fabric/test/fabric2_local_doc_fold_tests.erl new file mode 100644 index 000000000..e3ff0eb21 --- /dev/null +++ b/src/fabric/test/fabric2_local_doc_fold_tests.erl @@ -0,0 +1,295 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_local_doc_fold_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +-define(DOC_COUNT, 50). + + +doc_fold_test_() -> + { + "Test local document fold operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(fold_local_docs_basic), + ?TDEF(fold_local_docs_rev), + ?TDEF(fold_local_docs_with_start_key), + ?TDEF(fold_local_docs_with_end_key), + ?TDEF(fold_local_docs_with_both_keys_the_same), + ?TDEF(fold_local_docs_with_different_keys, 15000), + ?TDEF(fold_local_docs_with_limit), + ?TDEF(fold_local_docs_with_skip), + ?TDEF(fold_local_docs_with_skip_and_limit) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + DocIdRevs = lists:map(fun(Val) -> + UUID = fabric2_util:uuid(), + DocId = <<?LOCAL_DOC_PREFIX, UUID/binary>>, + % Every 10th doc is large to force the doc to be chunkified + BigChunk = << <<"x">> || _ <- lists:seq(1, 200000) >>, + Body = case Val rem 10 == 0 of + true -> {[{<<"value">>, BigChunk}]}; + false -> {[{<<"value">>, Val}]} + end, + Doc = #doc{ + id = DocId, + body = Body + }, + {ok, Rev} = fabric2_db:update_doc(Db, Doc, []), + {DocId, {[{rev, couch_doc:rev_to_str(Rev)}]}} + end, lists:seq(1, ?DOC_COUNT)), + {Db, lists:sort(DocIdRevs), Ctx}. + + +cleanup({Db, _DocIdRevs, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +fold_local_docs_basic({Db, DocIdRevs, _}) -> + {ok, {?DOC_COUNT, Rows}} = fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], []), + ?assertEqual(DocIdRevs, lists:reverse(Rows)). + + +fold_local_docs_rev({Db, DocIdRevs, _}) -> + Opts = [{dir, rev}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, Rows). + + +fold_local_docs_with_start_key({Db, DocIdRevs, _}) -> + {StartKey, _} = hd(DocIdRevs), + Opts = [{start_key, StartKey}], + {ok, {?DOC_COUNT, Rows}} + = fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(DocIdRevs, lists:reverse(Rows)), + if length(DocIdRevs) == 1 -> ok; true -> + fold_local_docs_with_start_key({Db, tl(DocIdRevs), nil}) + end. + + +fold_local_docs_with_end_key({Db, DocIdRevs, _}) -> + RevDocIdRevs = lists:reverse(DocIdRevs), + {EndKey, _} = hd(RevDocIdRevs), + Opts = [{end_key, EndKey}], + {ok, {?DOC_COUNT, Rows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts), + ?assertEqual(RevDocIdRevs, Rows), + if length(DocIdRevs) == 1 -> ok; true -> + fold_local_docs_with_end_key({Db, lists:reverse(tl(RevDocIdRevs)), nil}) + end. + + +fold_local_docs_with_both_keys_the_same({Db, DocIdRevs, _}) -> + lists:foreach(fun({DocId, _} = Row) -> + check_all_combos(Db, DocId, DocId, [Row]) + end, DocIdRevs). + + +fold_local_docs_with_different_keys({Db, DocIdRevs, _}) -> + lists:foreach(fun(_) -> + {StartKey, EndKey, Rows} = pick_range(DocIdRevs), + check_all_combos(Db, StartKey, EndKey, Rows) + end, lists:seq(1, 100)). + + +fold_local_docs_with_limit({Db, DocIdRevs, _}) -> + lists:foreach(fun(Limit) -> + Opts1 = [{limit, Limit}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:sublist(DocIdRevs, Limit), lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts2), + ?assertEqual( + lists:sublist(lists:reverse(DocIdRevs), Limit), + lists:reverse(Rows2) + ) + end, lists:seq(0, 51)). + + +fold_local_docs_with_skip({Db, DocIdRevs, _}) -> + lists:foreach(fun(Skip) -> + Opts1 = [{skip, Skip}], + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts1), + Expect1 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, DocIdRevs) + end, + ?assertEqual(Expect1, lists:reverse(Rows1)), + + Opts2 = [{dir, rev} | Opts1], + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = case Skip > length(DocIdRevs) of + true -> []; + false -> lists:nthtail(Skip, lists:reverse(DocIdRevs)) + end, + ?assertEqual(Expect2, lists:reverse(Rows2)) + end, lists:seq(0, 51)). + + +fold_local_docs_with_skip_and_limit({Db, DocIdRevs, _}) -> + lists:foreach(fun(_) -> + check_skip_and_limit(Db, [], DocIdRevs), + check_skip_and_limit(Db, [{dir, rev}], lists:reverse(DocIdRevs)) + end, lists:seq(1, 100)). + + +check_all_combos(Db, StartKey, EndKey, Rows) -> + Opts1 = make_opts(fwd, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows1}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts1), + ?assertEqual(lists:reverse(Rows), Rows1), + check_skip_and_limit(Db, Opts1, Rows), + + Opts2 = make_opts(fwd, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows2}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts2), + Expect2 = if EndKey == undefined -> lists:reverse(Rows); true -> + lists:reverse(all_but_last(Rows)) + end, + ?assertEqual(Expect2, Rows2), + check_skip_and_limit(Db, Opts2, lists:reverse(Expect2)), + + Opts3 = make_opts(rev, StartKey, EndKey, true), + {ok, {?DOC_COUNT, Rows3}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts3), + ?assertEqual(Rows, Rows3), + check_skip_and_limit(Db, Opts3, lists:reverse(Rows)), + + Opts4 = make_opts(rev, StartKey, EndKey, false), + {ok, {?DOC_COUNT, Rows4}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], Opts4), + Expect4 = if StartKey == undefined -> Rows; true -> + tl(Rows) + end, + ?assertEqual(Expect4, Rows4), + check_skip_and_limit(Db, Opts4, lists:reverse(Expect4)). + + +check_skip_and_limit(Db, Opts, []) -> + Skip = rand:uniform(?DOC_COUNT + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1) - 1, + NewOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, OutRows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], NewOpts), + ?assertEqual([], OutRows); + +check_skip_and_limit(Db, Opts, Rows) -> + Skip = rand:uniform(length(Rows) + 1) - 1, + Limit = rand:uniform(?DOC_COUNT + 1 - Skip) - 1, + + ExpectRows = case Skip >= length(Rows) of + true -> + []; + false -> + lists:sublist(lists:nthtail(Skip, Rows), Limit) + end, + + SkipLimitOpts = [{skip, Skip}, {limit, Limit} | Opts], + {ok, {?DOC_COUNT, RevRows}} = + fabric2_db:fold_local_docs(Db, fun fold_fun/2, [], SkipLimitOpts), + OutRows = lists:reverse(RevRows), + ?assertEqual(ExpectRows, OutRows). + + +make_opts(fwd, StartKey, EndKey, InclusiveEnd) -> + DirOpts = case rand:uniform() =< 0.50 of + true -> [{dir, fwd}]; + false -> [] + end, + StartOpts = case StartKey of + undefined -> []; + <<_/binary>> -> [{start_key, StartKey}] + end, + EndOpts = case EndKey of + undefined -> []; + <<_/binary>> when InclusiveEnd -> [{end_key, EndKey}]; + <<_/binary>> -> [{end_key_gt, EndKey}] + end, + DirOpts ++ StartOpts ++ EndOpts; +make_opts(rev, StartKey, EndKey, InclusiveEnd) -> + BaseOpts = make_opts(fwd, EndKey, StartKey, InclusiveEnd), + [{dir, rev}] ++ BaseOpts -- [{dir, fwd}]. + + +all_but_last([]) -> + []; +all_but_last([_]) -> + []; +all_but_last(Rows) -> + lists:sublist(Rows, length(Rows) - 1). + + +pick_range(DocIdRevs) -> + {StartKey, StartRow, RestRows} = pick_start_key(DocIdRevs), + {EndKey, EndRow, RowsBetween} = pick_end_key(RestRows), + {StartKey, EndKey, StartRow ++ RowsBetween ++ EndRow}. + + +pick_start_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + {DocId, [Row], lists:nthtail(Idx, Rows)} + end. + + +pick_end_key([]) -> + {undefined, [], []}; + +pick_end_key(Rows) -> + case rand:uniform() =< 0.1 of + true -> + {undefined, [], Rows}; + false -> + Idx = rand:uniform(length(Rows)), + {DocId, _} = Row = lists:nth(Idx, Rows), + Tail = lists:nthtail(Idx, Rows), + {DocId, [Row], Rows -- [Row | Tail]} + end. + + +fold_fun({meta, Meta}, _Acc) -> + Total = fabric2_util:get_value(total, Meta), + {ok, {Total, []}}; +fold_fun({row, Row}, {Total, Rows}) -> + RowId = fabric2_util:get_value(id, Row), + RowId = fabric2_util:get_value(key, Row), + RowRev = fabric2_util:get_value(value, Row), + {ok, {Total, [{RowId, RowRev} | Rows]}}; +fold_fun(complete, Acc) -> + {ok, Acc}. diff --git a/src/fabric/test/fabric2_node_types_tests.erl b/src/fabric/test/fabric2_node_types_tests.erl new file mode 100644 index 000000000..074afe86b --- /dev/null +++ b/src/fabric/test/fabric2_node_types_tests.erl @@ -0,0 +1,66 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_node_types_tests). + + +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +node_types_test_() -> + { + "Test node types", + setup, + fun() -> + os:putenv("COUCHDB_NODE_TYPE_FOO", "false"), + os:putenv("COUCHDB_NODE_TYPE_BAZ", "true"), + os:putenv("COUCHDB_NODE_TYPE_ZIG", ""), + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3, ctrace, fabric]) + end, + fun(Ctx) -> + test_util:stop_couch(Ctx), + application:unset_env(fabric, node_types), + os:unsetenv("COUCHDB_NODE_TYPE_FOO"), + os:unsetenv("COUCHDB_NODE_TYPE_BAZ"), + os:unsetenv("COUCHDB_NODE_TYPE_ZIG") + end, + with([ + ?TDEF(basics), + ?TDEF(os_env_priority) + ]) + }. + + +basics(_) -> + % default is true for new types + ?assert(fabric2_node_types:is_type(some_new_node_type)), + + % defined in os env + ?assert(fabric2_node_types:is_type(baz)), + ?assert(not fabric2_node_types:is_type(foo)), + ?assert(fabric2_node_types:is_type(zig)), + + % defined in app env + application:set_env(fabric, node_types, [{zag, true}, {bam, false}]), + ?assert(fabric2_node_types:is_type(zag)), + ?assert(not fabric2_node_types:is_type(bam)). + + +os_env_priority(_) -> + % os env takes precedence + application:set_env(fabric, node_types, [{foo, true}, {baz, false}]), + ?assert(not fabric2_node_types:is_type(foo)), + ?assert(fabric2_node_types:is_type(baz)). diff --git a/src/fabric/test/fabric2_rev_stemming.erl b/src/fabric/test/fabric2_rev_stemming.erl new file mode 100644 index 000000000..62ce6901a --- /dev/null +++ b/src/fabric/test/fabric2_rev_stemming.erl @@ -0,0 +1,205 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_rev_stemming). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +doc_crud_test_() -> + { + "Test document CRUD operations with stemming", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(update_doc), + ?TDEF(update_doc_replicated_no_stemming), + ?TDEF(update_doc_replicated_with_stemming), + ?TDEF(update_doc_replicate_existing_rev), + ?TDEF(update_winning_conflict_branch), + ?TDEF(update_non_winning_conflict_branch), + ?TDEF(delete_doc_basic), + ?TDEF(recreate_doc_basic) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +update_doc({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + Doc1 = #doc{id = fabric2_util:uuid()}, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{revs = {Pos1, [Rev1]}}, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{revs = {Pos2, [Rev2, Rev1]}}, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id)), + + {ok, {_, Rev3}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual({3, [Rev3, Rev2]}, Doc4#doc.revs). + + +update_doc_replicated_no_stemming({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]), + {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id), + ?assertEqual({2, [Rev2, Rev1]}, Revs). + + +update_doc_replicated_with_stemming({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc, [replicated_changes]), + {ok, #doc{revs = Revs}} = fabric2_db:open_doc(Db, Doc#doc.id), + ?assertEqual({2, [Rev2]}, Revs). + + +update_doc_replicate_existing_rev({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Rev1 = fabric2_util:uuid(), + Rev2 = fabric2_util:uuid(), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev2, Rev1]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + {ok, []} = fabric2_db:update_docs(Db, [Doc1], [replicated_changes]), + {ok, Doc} = fabric2_db:open_doc(Db, Doc1#doc.id), + ?assertEqual({2, [Rev2]}, Doc#doc.revs). + + +update_winning_conflict_branch({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev3, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev3]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). + + +update_non_winning_conflict_branch({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 2), + [Rev1, Rev2, Rev3] = lists:sort([ + fabric2_util:uuid(), + fabric2_util:uuid(), + fabric2_util:uuid() + ]), + Doc1 = #doc{ + id = fabric2_util:uuid(), + revs = {2, [Rev3, Rev1]}, + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc1, [replicated_changes]), + Doc2 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"bar">>, <<"foo">>}]} + }, + {ok, {2, _}} = fabric2_db:update_doc(Db, Doc2, [replicated_changes]), + % Update the non winning branch + Doc3 = Doc1#doc{ + revs = {2, [Rev2, Rev1]}, + body = {[{<<"baz">>, 2}]} + }, + {ok, {3, Rev4}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + % Assert we've got the correct winner + ?assertEqual({3, [Rev4, Rev2]}, Doc4#doc.revs). + + +delete_doc_basic({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {Pos1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {Pos1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {Pos2, Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc2#doc{revs = {Pos2, [Rev2]}}, + ?assertEqual({ok, Doc3}, fabric2_db:open_doc(Db, Doc2#doc.id, [deleted])). + + +recreate_doc_basic({Db, _}) -> + ok = fabric2_db:set_revs_limit(Db, 1), + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"state">>, 1}]} + }, + {ok, {1, Rev1}} = fabric2_db:update_doc(Db, Doc1), + Doc2 = Doc1#doc{ + revs = {1, [Rev1]}, + deleted = true, + body = {[{<<"state">>, 2}]} + }, + {ok, {2, _Rev2}} = fabric2_db:update_doc(Db, Doc2), + Doc3 = Doc1#doc{ + revs = {0, []}, + deleted = false, + body = {[{<<"state">>, 3}]} + }, + {ok, {3, Rev3}} = fabric2_db:update_doc(Db, Doc3), + {ok, Doc4} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual({3, [Rev3]}, Doc4#doc.revs), + ?assertEqual(Doc3#doc{revs = undefined}, Doc4#doc{revs = undefined}). diff --git a/src/fabric/test/fabric2_test.hrl b/src/fabric/test/fabric2_test.hrl new file mode 100644 index 000000000..9239096fc --- /dev/null +++ b/src/fabric/test/fabric2_test.hrl @@ -0,0 +1,33 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +% Some test modules do not use with, so squash the unused fun compiler warning +-compile([{nowarn_unused_function, [{with, 1}]}]). + + +-define(TDEF(Name), {atom_to_list(Name), fun Name/1}). +-define(TDEF(Name, Timeout), {atom_to_list(Name), Timeout, fun Name/1}). + +-define(TDEF_FE(Name), fun(Arg) -> {atom_to_list(Name), ?_test(Name(Arg))} end). +-define(TDEF_FE(Name, Timeout), fun(Arg) -> {atom_to_list(Name), {timeout, Timeout, ?_test(Name(Arg))}} end). + + +with(Tests) -> + fun(ArgsTuple) -> + lists:map(fun + ({Name, Fun}) -> + {Name, ?_test(Fun(ArgsTuple))}; + ({Name, Timeout, Fun}) -> + {Name, {timeout, Timeout, ?_test(Fun(ArgsTuple))}} + end, Tests) + end. diff --git a/src/fabric/test/fabric2_test_util.erl b/src/fabric/test/fabric2_test_util.erl new file mode 100644 index 000000000..acbe252b1 --- /dev/null +++ b/src/fabric/test/fabric2_test_util.erl @@ -0,0 +1,76 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_test_util). + + +-export([ + tx_too_old_mock_erlfdb/0, + tx_too_old_setup_errors/2, + tx_too_old_reset_errors/0, + tx_too_old_raise_in_user_fun/0 +]). + + +-define(PDICT_ERROR_IN_FOLD_RANGE, '$fabric2_error_in_fold_range'). +-define(PDICT_ERROR_IN_USER_FUN, '$fabric2_error_throw_in_user_fun'). + + +% Set of function to test scenarios where the FDB throws transaction_too_long +% (1007) errors. The general pattern is to call tx_too_old_mock_erlfdb() in +% setup. Then, before tests call tx_too_old_setup_errors(UserErrs, FoldErrs) +% which will set how and when the error will be thrown. + +tx_too_old_mock_erlfdb() -> + meck:expect(erlfdb, fold_range, fun(Tx, Start, End, Callback, Acc, Opts) -> + MockFun = fun(Row, InnerAcc) -> + maybe_tx_too_old(?PDICT_ERROR_IN_FOLD_RANGE), + Callback(Row, InnerAcc) + end, + meck:passthrough([Tx, Start, End, MockFun, Acc, Opts]) + end). + + +tx_too_old_setup_errors(UserCnt, FoldErrs) when is_integer(UserCnt) -> + tx_too_old_setup_errors({0, UserCnt}, FoldErrs); + +tx_too_old_setup_errors(UserErrs, FoldCnt) when is_integer(FoldCnt) -> + tx_too_old_setup_errors(UserErrs, {0, FoldCnt}); + +tx_too_old_setup_errors({UserSkip, UserCnt}, {FoldSkip, FoldCnt}) -> + put(?PDICT_ERROR_IN_USER_FUN, {UserSkip, UserCnt}), + put(?PDICT_ERROR_IN_FOLD_RANGE, {FoldSkip, FoldCnt}). + + +tx_too_old_reset_errors() -> + erase(?PDICT_ERROR_IN_FOLD_RANGE), + erase(?PDICT_ERROR_IN_USER_FUN). + + +tx_too_old_raise_in_user_fun() -> + maybe_tx_too_old(?PDICT_ERROR_IN_USER_FUN). + + +% Private functions + +maybe_tx_too_old(Key) -> + case get(Key) of + {Skip, Count} when is_integer(Skip), Skip > 0 -> + put(Key, {Skip - 1, Count}); + {0, Count} when is_integer(Count), Count > 0 -> + put(Key, {0, Count - 1}), + error({erlfdb_error, 1007}); + {0, 0} -> + ok; + undefined -> + ok + end. diff --git a/src/fabric/test/fabric2_trace_db_create_tests.erl b/src/fabric/test/fabric2_trace_db_create_tests.erl new file mode 100644 index 000000000..926219f6a --- /dev/null +++ b/src/fabric/test/fabric2_trace_db_create_tests.erl @@ -0,0 +1,47 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_db_create_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +trace_test_() -> + { + "Trace operation", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(create_db) + ]) + } + }. + + +setup() -> + put(erlfdb_trace, "starting fabric"), + test_util:start_couch([fabric]). + + +cleanup(Ctx) -> + test_util:stop_couch(Ctx). + + +create_db(_) -> + put(erlfdb_trace, <<"create db">>), + {ok, _Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]). diff --git a/src/fabric/test/fabric2_trace_db_delete_tests.erl b/src/fabric/test/fabric2_trace_db_delete_tests.erl new file mode 100644 index 000000000..ac92c5335 --- /dev/null +++ b/src/fabric/test/fabric2_trace_db_delete_tests.erl @@ -0,0 +1,50 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_db_delete_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +trace_test_() -> + { + "Trace operation", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(delete_db) + ]) + } + }. + + +setup() -> + put(erlfdb_trace, "starting fabric"), + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({_Db, Ctx}) -> + test_util:stop_couch(Ctx). + + +delete_db({Db, _}) -> + put(erlfdb_trace, <<"delete db">>), + fabric2_server:remove(fabric2_db:name(Db)), + ok = fabric2_db:delete(fabric2_db:name(Db), []). diff --git a/src/fabric/test/fabric2_trace_db_open_tests.erl b/src/fabric/test/fabric2_trace_db_open_tests.erl new file mode 100644 index 000000000..3602b50e1 --- /dev/null +++ b/src/fabric/test/fabric2_trace_db_open_tests.erl @@ -0,0 +1,51 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_db_open_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +trace_test_() -> + { + "Trace operation", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(open_db) + ]) + } + }. + + +setup() -> + put(erlfdb_trace, "starting fabric"), + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +open_db({Db, _}) -> + put(erlfdb_trace, <<"open db">>), + fabric2_server:remove(fabric2_db:name(Db)), + {ok, _Db} = fabric2_db:open(fabric2_db:name(Db), [{user_ctx, ?ADMIN_USER}]). diff --git a/src/fabric/test/fabric2_trace_doc_create_tests.erl b/src/fabric/test/fabric2_trace_doc_create_tests.erl new file mode 100644 index 000000000..888039d05 --- /dev/null +++ b/src/fabric/test/fabric2_trace_doc_create_tests.erl @@ -0,0 +1,87 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_trace_doc_create_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +trace_doc_create_test_() -> + { + "Test document CRUD operations", + { + setup, + fun setup/0, + fun cleanup/1, + with([ + ?TDEF(create_new_doc), + ?TDEF(create_two_docs), + ?TDEF(create_50_docs) + ]) + } + }. + + +setup() -> + Ctx = test_util:start_couch([fabric]), + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + {Db, Ctx}. + + +cleanup({Db, Ctx}) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []), + test_util:stop_couch(Ctx). + + +create_new_doc({Db, _}) -> + put(erlfdb_trace, <<"one doc">>), + Doc = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"foo">>, <<"bar">>}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc). + + +create_two_docs({Db, _}) -> + put(erlfdb_trace, <<"two docs">>), + Doc1 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"bam">>, <<"baz">>}]} + }, + Doc2 = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"bang">>, <<"bargle">>}]} + }, + {ok, _} = fabric2_db:update_docs(Db, [Doc1, Doc2]). + + +create_50_docs({Db, _}) -> + lists:foreach(fun(_) -> + spawn_monitor(fun() -> + Name = io_lib:format("50 docs : ~w", [self()]), + put(erlfdb_trace, iolist_to_binary(Name)), + Docs = lists:map(fun(Val) -> + #doc{ + id = fabric2_util:uuid(), + body = {[{<<"value">>, Val}]} + } + end, lists:seq(1, 50)), + {ok, _} = fabric2_db:update_docs(Db, Docs) + end) + end, lists:seq(1, 5)), + lists:foreach(fun(_) -> + receive {'DOWN', _, _, _, _} -> ok end + end, lists:seq(1, 5)). diff --git a/src/fabric/test/fabric2_tx_options_tests.erl b/src/fabric/test/fabric2_tx_options_tests.erl new file mode 100644 index 000000000..b93cc3d69 --- /dev/null +++ b/src/fabric/test/fabric2_tx_options_tests.erl @@ -0,0 +1,103 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_tx_options_tests). + + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include("fabric2_test.hrl"). +-include("fabric2.hrl"). + + +fdb_tx_options_test_() -> + { + "Test setting default transaction options", + setup, + fun() -> + meck:new(erlfdb, [passthrough]), + % erlfdb, rexi and mem3 are all dependent apps for fabric. We make + % sure to start them so when fabric is started during the test it + % already has its dependencies + test_util:start_couch([erlfdb, rexi, mem3, ctrace, fabric]) + end, + fun(Ctx) -> + meck:unload(), + + config:delete("fdb_tx_options", "size_limit", false), + config:delete("fdb_tx_options", "max_retry_delay", false), + config:delete("fdb_tx_options", "machine_id", false), + config:delete("fdb_tx_options", "datacenter_id", false), + + test_util:stop_couch(Ctx) + end, + with([ + ?TDEF(options_take_effect, 15), + ?TDEF(can_configure_options_at_runtime, 15) + ]) + }. + + +options_take_effect(_) -> + ok = application:stop(fabric), + + % Try one of each type including some invalid values + config:set("fdb_tx_options", "size_limit", "150000", false), + config:set("fdb_tx_options", "max_retry_delay", "badness", false), + config:set("fdb_tx_options", "machine_id", "123abc", false), + TooLong = ["x" || _ <- lists:seq(1, 1000)], + config:set("fdb_tx_options", "datacenter_id", TooLong, false), + ok = application:start(fabric), + + DbName = ?tempdb(), + {ok, Db} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ?assertError({erlfdb_error, ?TRANSACTION_TOO_LARGE}, + add_large_doc(Db, 200000)), + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]). + + +can_configure_options_at_runtime(_) -> + meck:expect(erlfdb, set_option, fun(Fdb, Option, Val) -> + meck:passthrough([Fdb, Option, Val]) + end), + + meck:reset(erlfdb), + + config:set("fdb_tx_options", "size_limit", "150000", false), + meck:wait(erlfdb, set_option, ['_', size_limit, 150000], 4000), + + DbName = ?tempdb(), + + {ok, Db} = fabric2_db:create(DbName, [?ADMIN_CTX]), + ?assertError({erlfdb_error, ?TRANSACTION_TOO_LARGE}, + add_large_doc(Db, 200000)), + + meck:reset(erlfdb), + + config:delete("fdb_tx_options", "size_limit", false), + % Assert that we get a new handle and are setting our default values + meck:wait(erlfdb, set_option, ['_', timeout, '_'], 4000), + erase(?PDICT_DB_KEY), + + {ok, Db1} = fabric2_db:open(DbName, [?ADMIN_CTX]), + ?assertMatch({ok, _}, add_large_doc(Db1, 200000)), + + ok = fabric2_db:delete(DbName, [?ADMIN_CTX]). + + +add_large_doc(Db, Size) -> + Doc = #doc{ + id = fabric2_util:uuid(), + body = {[{<<"x">>, crypto:strong_rand_bytes(Size)}]} + }, + fabric2_db:update_doc(Db, Doc). diff --git a/src/fabric/test/fabric2_update_docs_tests.erl b/src/fabric/test/fabric2_update_docs_tests.erl new file mode 100644 index 000000000..469fa0d1b --- /dev/null +++ b/src/fabric/test/fabric2_update_docs_tests.erl @@ -0,0 +1,208 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric2_update_docs_tests). + + +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("eunit/include/eunit.hrl"). +-include("fabric2_test.hrl"). + + +update_docs_test_() -> + { + "Test update_docs", + { + setup, + fun setup_all/0, + fun teardown_all/1, + { + foreach, + fun setup/0, + fun cleanup/1, + [ + ?TDEF_FE(update_docs), + ?TDEF_FE(update_docs_replicated), + ?TDEF_FE(update_docs_batches), + ?TDEF_FE(update_docs_replicated_batches), + ?TDEF_FE(update_docs_duplicate_ids_conflict), + ?TDEF_FE(update_docs_duplicate_ids_with_batches), + ?TDEF_FE(update_docs_replicate_batches_duplicate_id) + ] + } + } + }. + + +setup_all() -> + test_util:start_couch([fabric]). + + +teardown_all(Ctx) -> + test_util:stop_couch(Ctx). + + +setup() -> + {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]), + Db. + + +cleanup(#{} = Db) -> + ok = fabric2_db:delete(fabric2_db:name(Db), []). + + +update_docs(Db) -> + ?assertEqual({ok, []}, fabric2_db:update_docs(Db, [])), + + Doc1 = doc(), + Res1 = fabric2_db:update_docs(Db, [Doc1]), + ?assertMatch({ok, [_]}, Res1), + {ok, [Doc1Res]} = Res1, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), + {ok, {1, Rev1}} = Doc1Res, + {ok, Doc1Open} = fabric2_db:open_doc(Db, Doc1#doc.id), + ?assertEqual(Doc1#doc{revs = {1, [Rev1]}}, Doc1Open), + + Doc2 = doc(), + Doc3 = doc(), + Res2 = fabric2_db:update_docs(Db, [Doc2, Doc3]), + ?assertMatch({ok, [_, _]}, Res2), + {ok, [Doc2Res, Doc3Res]} = Res2, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc2Res), + ?assertMatch({ok, {1, <<_/binary>>}}, Doc3Res). + + +update_docs_replicated(Db) -> + Opts = [replicated_changes], + + ?assertEqual({ok, []}, fabric2_db:update_docs(Db, [], Opts)), + + Doc1 = doc(10, {1, [rev()]}), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, [Doc1], Opts)), + {ok, Doc1Open} = fabric2_db:open_doc(Db, Doc1#doc.id), + ?assertEqual(Doc1, Doc1Open), + + Doc2 = doc(10, {1, [rev()]}), + Doc3 = doc(10, {1, [rev()]}), + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, [Doc2, Doc3], Opts)), + {ok, Doc2Open} = fabric2_db:open_doc(Db, Doc2#doc.id), + ?assertEqual(Doc2, Doc2Open), + {ok, Doc3Open} = fabric2_db:open_doc(Db, Doc3#doc.id), + ?assertEqual(Doc3, Doc3Open). + + +update_docs_batches(Db) -> + Opts = [{batch_size, 5000}], + + Docs1 = [doc(9000), doc(9000)], + + ?assertMatch({ok, [_ | _]}, fabric2_db:update_docs(Db, Docs1, Opts)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertMatch({ok, #doc{}}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs1), + + Docs2 = [doc(10), doc(10), doc(9000), doc(10)], + + ?assertMatch({ok, [_ | _]}, fabric2_db:update_docs(Db, Docs2, Opts)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertMatch({ok, #doc{}}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs2). + + +update_docs_replicated_batches(Db) -> + Opts = [{batch_size, 5000}, replicated_changes], + + Docs1 = [doc(Size, {1, [rev()]}) || Size <- [9000, 9000]], + + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs1, Opts)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs1), + + Docs2 = [doc(Size, {1, [rev()]}) || Size <- [10, 10, 9000, 10]], + + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs2, Opts)), + + lists:foreach(fun(#doc{} = Doc) -> + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)) + end, Docs2). + + +update_docs_duplicate_ids_conflict(Db) -> + Doc = doc(), + + Res = fabric2_db:update_docs(Db, [Doc, doc(), Doc]), + ?assertMatch({ok, [_, _, _]}, Res), + + {ok, [Doc1Res, Doc2Res, Doc3Res]} = Res, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), + ?assertMatch({ok, {1, <<_/binary>>}}, Doc2Res), + ?assertMatch(conflict, Doc3Res). + + +update_docs_duplicate_ids_with_batches(Db) -> + Opts = [{batch_size, 5000}], + + Doc = doc(9000), + + Res = fabric2_db:update_docs(Db, [Doc, doc(9000), Doc], Opts), + ?assertMatch({ok, [_, _, _]}, Res), + + {ok, [Doc1Res, Doc2Res, Doc3Res]} = Res, + ?assertMatch({ok, {1, <<_/binary>>}}, Doc1Res), + ?assertMatch({ok, {1, <<_/binary>>}}, Doc2Res), + ?assertMatch(conflict, Doc3Res). + + +update_docs_replicate_batches_duplicate_id(Db) -> + Opts = [replicated_changes], + + Doc = doc(10, {1, [rev()]}), + Docs = [Doc, Doc], + + ?assertMatch({ok, []}, fabric2_db:update_docs(Db, Docs, Opts)), + + ?assertEqual({ok, Doc}, fabric2_db:open_doc(Db, Doc#doc.id)). + + +% Utility functions + +doc() -> + doc(2). + + +doc(Size) -> + doc(Size, undefined). + + +doc(Size, Revs) -> + Doc = #doc{ + id = fabric2_util:uuid(), + body = doc_body(Size) + }, + case Revs of + undefined -> Doc; + _ -> Doc#doc{revs = Revs} + end. + + +rev() -> + fabric2_util:to_hex(crypto:strong_rand_bytes(16)). + + +doc_body(Size) when is_integer(Size), Size >= 2 -> + Val = fabric2_util:to_hex(crypto:strong_rand_bytes(Size div 2)), + {[{<<"x">>, Val}]}. diff --git a/src/global_changes/src/global_changes_httpd_handlers.erl b/src/global_changes/src/global_changes_httpd_handlers.erl index b21a64b8f..94a50abc8 100644 --- a/src/global_changes/src/global_changes_httpd_handlers.erl +++ b/src/global_changes/src/global_changes_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(global_changes_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(<<"_db_updates">>) -> fun global_changes_httpd:handle_global_changes_req/1; url_handler(_) -> no_match. @@ -20,3 +20,9 @@ url_handler(_) -> no_match. db_handler(_) -> no_match. design_handler(_) -> no_match. + +handler_info('GET', [<<"_db_updates">>], _) -> + {'db_updates.read', #{}}; + +handler_info(_, _, _) -> + no_match.
\ No newline at end of file diff --git a/src/global_changes/src/global_changes_server.erl b/src/global_changes/src/global_changes_server.erl index 7e3062586..a116e0668 100644 --- a/src/global_changes/src/global_changes_server.erl +++ b/src/global_changes/src/global_changes_server.erl @@ -25,7 +25,8 @@ handle_call/3, handle_cast/2, handle_info/2, - code_change/3 + code_change/3, + format_status/2 ]). -export([ @@ -143,7 +144,13 @@ handle_info(_, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. - +format_status(_Opt, [_PDict, State]) -> + Scrubbed = State#state{ + pending_updates=nil + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. flush_updates(State) -> DocIds = sets:to_list(State#state.pending_updates), diff --git a/src/ioq/src/ioq.erl b/src/ioq/src/ioq.erl index 81d94a36f..99b3ce385 100644 --- a/src/ioq/src/ioq.erl +++ b/src/ioq/src/ioq.erl @@ -45,7 +45,7 @@ call(Fd, Msg, Metadata) -> Priority = io_class(Msg, Metadata), case bypass(Priority) of true -> - gen_server:call(Fd, Msg); + gen_server:call(Fd, Msg, infinity); false -> queued_call(Fd, Msg, Priority) end. diff --git a/src/jwtf/.gitignore b/src/jwtf/.gitignore new file mode 100644 index 000000000..5eadeac89 --- /dev/null +++ b/src/jwtf/.gitignore @@ -0,0 +1,4 @@ +*~ +_build/ +doc/ +rebar.lock diff --git a/src/jwtf/LICENSE b/src/jwtf/LICENSE new file mode 100644 index 000000000..d9a10c0d8 --- /dev/null +++ b/src/jwtf/LICENSE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/src/jwtf/README.md b/src/jwtf/README.md new file mode 100644 index 000000000..e6038fbc0 --- /dev/null +++ b/src/jwtf/README.md @@ -0,0 +1,18 @@ +# jwtf + +JSON Web Token Functions + +This library provides JWT parsing and validation functions + +Supports; + +* Verify +* RS256 +* RS384 +* RS512 +* HS256 +* HS384 +* HS512 +* ES256 +* ES384 +* ES512 diff --git a/src/jwtf/rebar.config b/src/jwtf/rebar.config new file mode 100644 index 000000000..e0d18443b --- /dev/null +++ b/src/jwtf/rebar.config @@ -0,0 +1,2 @@ +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/jwtf/src/jwtf.app.src b/src/jwtf/src/jwtf.app.src new file mode 100644 index 000000000..24081bf6f --- /dev/null +++ b/src/jwtf/src/jwtf.app.src @@ -0,0 +1,32 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, jwtf, [ + {description, "JSON Web Token Functions"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + b64url, + config, + crypto, + jiffy, + public_key + ]}, + {mod, {jwtf_app, []}}, + {env,[]}, + {modules, []}, + {maintainers, []}, + {licenses, []}, + {links, []} +]}. diff --git a/src/jwtf/src/jwtf.erl b/src/jwtf/src/jwtf.erl new file mode 100644 index 000000000..247f2b508 --- /dev/null +++ b/src/jwtf/src/jwtf.erl @@ -0,0 +1,353 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +% @doc +% This module decodes and validates JWT tokens. Almost all property +% checks are optional. If not checked, the presence or validity of the +% field is not verified. Signature check is mandatory, though. + +-module(jwtf). + +-export([ + encode/3, + decode/3, + valid_algorithms/0, + verification_algorithm/1 +]). + +-define(ALGS, [ + {<<"RS256">>, {public_key, sha256}}, % RSA PKCS#1 signature with SHA-256 + {<<"RS384">>, {public_key, sha384}}, + {<<"RS512">>, {public_key, sha512}}, + {<<"ES256">>, {public_key, sha256}}, + {<<"ES384">>, {public_key, sha384}}, + {<<"ES512">>, {public_key, sha512}}, + {<<"HS256">>, {hmac, sha256}}, + {<<"HS384">>, {hmac, sha384}}, + {<<"HS512">>, {hmac, sha512}}]). + +-define(CHECKS, [ + alg, + exp, + iat, + iss, + kid, + nbf, + sig, + typ]). + + +% @doc encode +% Encode the JSON Header and Claims using Key and Alg obtained from Header +-spec encode(term(), term(), term()) -> + {ok, binary()} | no_return(). +encode(Header = {HeaderProps}, Claims, Key) -> + try + Alg = case prop(<<"alg">>, HeaderProps) of + undefined -> + throw({bad_request, <<"Missing alg header parameter">>}); + Val -> + Val + end, + EncodedHeader = b64url:encode(jiffy:encode(Header)), + EncodedClaims = b64url:encode(jiffy:encode(Claims)), + Message = <<EncodedHeader/binary, $., EncodedClaims/binary>>, + SignatureOrMac = case verification_algorithm(Alg) of + {public_key, Algorithm} -> + public_key:sign(Message, Algorithm, Key); + {hmac, Algorithm} -> + crypto:hmac(Algorithm, Key, Message) + end, + EncodedSignatureOrMac = b64url:encode(SignatureOrMac), + {ok, <<Message/binary, $., EncodedSignatureOrMac/binary>>} + catch + throw:Error -> + {error, Error} + end. + + +% @doc decode +% Decodes the supplied encoded token, checking +% for the attributes defined in Checks and calling +% the key store function to retrieve the key needed +% to verify the signature +decode(EncodedToken, Checks, KS) -> + try + [Header, Payload, Signature] = split(EncodedToken), + validate(Header, Payload, Signature, Checks, KS), + {ok, decode_b64url_json(Payload)} + catch + throw:Error -> + {error, Error} + end. + + +% @doc valid_algorithms +% Return a list of supported algorithms +-spec valid_algorithms() -> [binary()]. +valid_algorithms() -> + proplists:get_keys(?ALGS). + + +% @doc verification_algorithm +% Return {VerificationMethod, Algorithm} tuple for the specified Alg +-spec verification_algorithm(binary()) -> + {atom(), atom()} | no_return(). +verification_algorithm(Alg) -> + case lists:keyfind(Alg, 1, ?ALGS) of + {Alg, Val} -> + Val; + false -> + throw({bad_request, <<"Invalid alg header parameter">>}) + end. + + +validate(Header0, Payload0, Signature, Checks, KS) -> + validate_checks(Checks), + Header1 = props(decode_b64url_json(Header0)), + validate_header(Header1, Checks), + + Payload1 = props(decode_b64url_json(Payload0)), + validate_payload(Payload1, Checks), + + Alg = prop(<<"alg">>, Header1), + Key = key(Header1, Checks, KS), + verify(Alg, Header0, Payload0, Signature, Key). + + +validate_checks(Checks) when is_list(Checks) -> + case {lists:usort(Checks), lists:sort(Checks)} of + {L, L} -> + ok; + {L1, L2} -> + error({duplicate_checks, L2 -- L1}) + end, + {_, UnknownChecks} = lists:partition(fun valid_check/1, Checks), + case UnknownChecks of + [] -> + ok; + UnknownChecks -> + error({unknown_checks, UnknownChecks}) + end. + + +valid_check(Check) when is_atom(Check) -> + lists:member(Check, ?CHECKS); + +valid_check({Check, _}) when is_atom(Check) -> + lists:member(Check, ?CHECKS); + +valid_check(_) -> + false. + + +validate_header(Props, Checks) -> + validate_typ(Props, Checks), + validate_alg(Props, Checks). + + +validate_typ(Props, Checks) -> + Required = prop(typ, Checks), + TYP = prop(<<"typ">>, Props), + case {Required, TYP} of + {undefined, undefined} -> + ok; + {true, undefined} -> + throw({bad_request, <<"Missing typ header parameter">>}); + {_, <<"JWT">>} -> + ok; + {true, _} -> + throw({bad_request, <<"Invalid typ header parameter">>}) + end. + + +validate_alg(Props, Checks) -> + Required = prop(alg, Checks), + Alg = prop(<<"alg">>, Props), + case {Required, Alg} of + {undefined, undefined} -> + ok; + {true, undefined} -> + throw({bad_request, <<"Missing alg header parameter">>}); + {_, Alg} -> + case lists:member(Alg, valid_algorithms()) of + true -> + ok; + false -> + throw({bad_request, <<"Invalid alg header parameter">>}) + end + end. + + +%% Not all these fields have to be present, but if they _are_ present +%% they must be valid. +validate_payload(Props, Checks) -> + validate_iss(Props, Checks), + validate_iat(Props, Checks), + validate_nbf(Props, Checks), + validate_exp(Props, Checks). + + +validate_iss(Props, Checks) -> + ExpectedISS = prop(iss, Checks), + ActualISS = prop(<<"iss">>, Props), + + case {ExpectedISS, ActualISS} of + {undefined, undefined} -> + ok; + {ISS, undefined} when ISS /= undefined -> + throw({bad_request, <<"Missing iss claim">>}); + {ISS, ISS} -> + ok; + {_, _} -> + throw({bad_request, <<"Invalid iss claim">>}) + end. + + +validate_iat(Props, Checks) -> + Required = prop(iat, Checks), + IAT = prop(<<"iat">>, Props), + + case {Required, IAT} of + {undefined, undefined} -> + ok; + {true, undefined} -> + throw({bad_request, <<"Missing iat claim">>}); + {_, IAT} when is_integer(IAT) -> + ok; + {true, _} -> + throw({bad_request, <<"Invalid iat claim">>}) + end. + + +validate_nbf(Props, Checks) -> + Required = prop(nbf, Checks), + NBF = prop(<<"nbf">>, Props), + + case {Required, NBF} of + {undefined, undefined} -> + ok; + {true, undefined} -> + throw({bad_request, <<"Missing nbf claim">>}); + {_, IAT} -> + assert_past(<<"nbf">>, IAT) + end. + + +validate_exp(Props, Checks) -> + Required = prop(exp, Checks), + EXP = prop(<<"exp">>, Props), + + case {Required, EXP} of + {undefined, undefined} -> + ok; + {true, undefined} -> + throw({bad_request, <<"Missing exp claim">>}); + {_, EXP} -> + assert_future(<<"exp">>, EXP) + end. + + +key(Props, Checks, KS) -> + Alg = prop(<<"alg">>, Props), + Required = prop(kid, Checks), + KID = prop(<<"kid">>, Props), + case {Required, KID} of + {true, undefined} -> + throw({bad_request, <<"Missing kid claim">>}); + {_, KID} -> + KS(Alg, KID) + end. + + +verify(Alg, Header, Payload, SignatureOrMac0, Key) -> + Message = <<Header/binary, $., Payload/binary>>, + SignatureOrMac1 = b64url:decode(SignatureOrMac0), + {VerificationMethod, Algorithm} = verification_algorithm(Alg), + case VerificationMethod of + public_key -> + public_key_verify(Algorithm, Message, SignatureOrMac1, Key); + hmac -> + hmac_verify(Algorithm, Message, SignatureOrMac1, Key) + end. + + +public_key_verify(Algorithm, Message, Signature, PublicKey) -> + case public_key:verify(Message, Algorithm, Signature, PublicKey) of + true -> + ok; + false -> + throw({bad_request, <<"Bad signature">>}) + end. + + +hmac_verify(Algorithm, Message, HMAC, SecretKey) -> + case crypto:hmac(Algorithm, SecretKey, Message) of + HMAC -> + ok; + _ -> + throw({bad_request, <<"Bad HMAC">>}) + end. + + +split(EncodedToken) -> + case binary:split(EncodedToken, <<$.>>, [global]) of + [_, _, _] = Split -> Split; + _ -> throw({bad_request, <<"Malformed token">>}) + end. + + +decode_b64url_json(B64UrlEncoded) -> + try + case b64url:decode(B64UrlEncoded) of + {error, Reason} -> + throw({bad_request, Reason}); + JsonEncoded -> + jiffy:decode(JsonEncoded) + end + catch + error:Error -> + throw({bad_request, Error}) + end. + + +props({Props}) -> + Props; + +props(_) -> + throw({bad_request, <<"Not an object">>}). + + +assert_past(Name, Time) -> + case Time < now_seconds() of + true -> + ok; + false -> + throw({unauthorized, <<Name/binary, " not in past">>}) + end. + +assert_future(Name, Time) -> + case Time > now_seconds() of + true -> + ok; + false -> + throw({unauthorized, <<Name/binary, " not in future">>}) + end. + + +now_seconds() -> + {MegaSecs, Secs, _MicroSecs} = os:timestamp(), + MegaSecs * 1000000 + Secs. + + +prop(Prop, Props) -> + proplists:get_value(Prop, Props). diff --git a/src/jwtf/src/jwtf_app.erl b/src/jwtf/src/jwtf_app.erl new file mode 100644 index 000000000..bd708e2a3 --- /dev/null +++ b/src/jwtf/src/jwtf_app.erl @@ -0,0 +1,28 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_app). + +-behaviour(application). + +%% Application callbacks +-export([start/2, stop/1]). + +%% =================================================================== +%% Application callbacks +%% =================================================================== + +start(_StartType, _StartArgs) -> + jwtf_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/jwtf/src/jwtf_keystore.erl b/src/jwtf/src/jwtf_keystore.erl new file mode 100644 index 000000000..5c2b47985 --- /dev/null +++ b/src/jwtf/src/jwtf_keystore.erl @@ -0,0 +1,166 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_keystore). +-behaviour(gen_server). +-behaviour(config_listener). + +-include_lib("public_key/include/public_key.hrl"). + +% public api. +-export([ + get/2, + start_link/0 +]). + +% gen_server api. +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + code_change/3, terminate/2]). + +% config_listener api +-export([handle_config_change/5, handle_config_terminate/3]). + +% public functions + +get(Alg, undefined) when is_binary(Alg) -> + get(Alg, <<"_default">>); + +get(Alg, KID0) when is_binary(Alg), is_binary(KID0) -> + Kty = kty(Alg), + KID = binary_to_list(KID0), + case ets:lookup(?MODULE, {Kty, KID}) of + [] -> + Key = get_from_config(Kty, KID), + ok = gen_server:call(?MODULE, {set, Kty, KID, Key}), + Key; + [{{Kty, KID}, Key}] -> + Key + end. + + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +% gen_server functions + +init(_) -> + ok = config:listen_for_changes(?MODULE, nil), + ets:new(?MODULE, [public, named_table]), + {ok, nil}. + + +handle_call({set, Kty, KID, Key}, _From, State) -> + true = ets:insert(?MODULE, {{Kty, KID}, Key}), + {reply, ok, State}. + + +handle_cast({delete, Kty, KID}, State) -> + true = ets:delete(?MODULE, {Kty, KID}), + {noreply, State}; + +handle_cast(_Msg, State) -> + {noreply, State}. + + +handle_info(restart_config_listener, State) -> + ok = config:listen_for_changes(?MODULE, nil), + {noreply, State}; + +handle_info(_Msg, State) -> + {noreply, State}. + + +terminate(_Reason, _State) -> + ok. + + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +% config listener callback + +handle_config_change("jwt_keys", ConfigKey, _ConfigValue, _, _) -> + case string:split(ConfigKey, ":") of + [Kty, KID] -> + gen_server:cast(?MODULE, {delete, Kty, KID}); + _ -> + ignored + end, + {ok, nil}; + +handle_config_change(_, _, _, _, _) -> + {ok, nil}. + +handle_config_terminate(_Server, stop, _State) -> + ok; + +handle_config_terminate(_Server, _Reason, _State) -> + erlang:send_after(100, whereis(?MODULE), restart_config_listener). + +% private functions + +get_from_config(Kty, KID) -> + case config:get("jwt_keys", string:join([Kty, KID], ":")) of + undefined -> + throw({bad_request, <<"Unknown kid">>}); + Encoded -> + case Kty of + "hmac" -> + try + base64:decode(Encoded) + catch + error:_ -> + throw({bad_request, <<"Not a valid key">>}) + end; + "rsa" -> + case pem_decode(Encoded) of + #'RSAPublicKey'{} = Key -> + Key; + _ -> + throw({bad_request, <<"not an RSA public key">>}) + end; + "ec" -> + case pem_decode(Encoded) of + {#'ECPoint'{}, _} = Key -> + Key; + _ -> + throw({bad_request, <<"not an EC public key">>}) + end + end + end. + +pem_decode(PEM) -> + BinPEM = re:replace(PEM, "\\\\n", "\n", [global, {return, binary}]), + try + case public_key:pem_decode(BinPEM) of + [PEMEntry] -> + public_key:pem_entry_decode(PEMEntry); + _ -> + throw({bad_request, <<"Not a valid key">>}) + end + catch + error:_ -> + throw({bad_request, <<"Not a valid key">>}) + end. + +kty(<<"HS", _/binary>>) -> + "hmac"; + +kty(<<"RS", _/binary>>) -> + "rsa"; + +kty(<<"ES", _/binary>>) -> + "ec"; + +kty(_) -> + throw({bad_request, <<"Unknown kty">>}). diff --git a/src/jwtf/src/jwtf_sup.erl b/src/jwtf/src/jwtf_sup.erl new file mode 100644 index 000000000..6f44808de --- /dev/null +++ b/src/jwtf/src/jwtf_sup.erl @@ -0,0 +1,38 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_sup). + +-behaviour(supervisor). + +%% API +-export([start_link/0]). + +%% Supervisor callbacks +-export([init/1]). + +%% Helper macro for declaring children of supervisor +-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). + +%% =================================================================== +%% API functions +%% =================================================================== + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%% =================================================================== +%% Supervisor callbacks +%% =================================================================== + +init([]) -> + {ok, { {one_for_one, 5, 10}, [?CHILD(jwtf_keystore, worker)]} }. diff --git a/src/jwtf/test/jwtf_keystore_tests.erl b/src/jwtf/test/jwtf_keystore_tests.erl new file mode 100644 index 000000000..acbc002b5 --- /dev/null +++ b/src/jwtf/test/jwtf_keystore_tests.erl @@ -0,0 +1,64 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_keystore_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("public_key/include/public_key.hrl"). + +-define(HMAC_SECRET, "aGVsbG8="). +-define(RSA_SECRET, "-----BEGIN PUBLIC KEY-----\\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAztanwQtIx0sms+x7m1SF\\nh7EHJHkM2biTJ41jR89FsDE2gd3MChpaqxemS5GpNvfFKRvuHa4PUZ3JtRCBG1KM\\n/7EWIVTy1JQDr2mb8couGlQNqz4uXN2vkNQ0XszgjU4Wn6ZpvYxmqPFbmkRe8QSn\\nAy2Wf8jQgjsbez8eaaX0G9S1hgFZUN3KFu7SVmUDQNvWpQdaJPP+ms5Z0CqF7JLa\\nvJmSdsU49nlYw9VH/XmwlUBMye6HgR4ZGCLQS85frqF0xLWvi7CsMdchcIjHudXH\\nQK1AumD/VVZVdi8Q5Qew7F6VXeXqnhbw9n6Px25cCuNuh6u5+E6GUzXRrMpqo9vO\\nqQIDAQAB\\n-----END PUBLIC KEY-----\\n"). +-define(BAD_RSA_SECRET,"-----BEGIN PUBLIC KEY-----\\nMIIDAzCCAeugAwIBAgIJAL5YnwkF5jT6MA0GCSqGSIb3DQEBBQUAMBgxFjAUBgNV\\nBAMMDWZvby5hdXRoMC5jb20wHhcNMTQwMzE4MjAwNzUwWhcNMjcxMTI1MjAwNzUw\\nWjAYMRYwFAYDVQQDDA1mb28uYXV0aDAuY29tMIIBIjANBgkqhkiG9w0BAQEFAAOC\\nAQ8AMIIBCgKCAQEAtP6w43ppU0nkqGNHASojFJl60+k3isNVzYTO06f2vm/5tc3l\\nRhEA6ykyIuO8tHY3Ziqowc4h8XGaeDKqHw/BSS/b54F2rUVb/wACWyJICkM3bGtC\\ntWmM7kU8XZRCqXV04qIgQte+9GFSOax/TFyotS+FGFyFPUY+b57H7/6wNQ8ywGLi\\nWCbrWEx4wOJbGhnVNV+STmZXJgToLgz0R2kwsiGURhHMkNkUjcRl34nSv+lMYSMK\\nyywwzu0k3KBgqkxWibU3pa3jibWVRxc20f8ltfByp/wU/ICQ0MNGJ3/KaCiOtGQa\\noZOa7bMzb4W1x2L3cfgrshLrp978+FEeNzY9KQIDAQABo1AwTjAdBgNVHQ4EFgQU\\nOyDe79RE2SYTcCNPbniw3p4uZykwHwYDVR0jBBgwFoAUOyDe79RE2SYTcCNPbniw\\n3p4uZykwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQUFAAOCAQEAW0mB5wR1sSHC\\n7iSmQo1uioH80X7txJY6zXH8hVjoCQOGUCi79x43L9wUTtyJg44Z8RhNozWOsCZM\\nf5LDSkeNx48QITrinDqWv5C/NA0klJ1g0Y/jN9X01r5T6vGdge8inIbQcO7ZrJ6v\\nVYDH+9HLvfPKFYd0uhYRFnw2aa3mKIRsanlWSEYHQr5Aoa+nboFLRiDtVWBuiAoV\\nZ1NoYm7uheU42CNGJqkv6SXxKHTea2TjmOxKRmaxYMvkjk/CsiPrSEQHUxDXqSSd\\nrIWU8o+9q9Hpdb3UuNJzMjlTzg2/UeHpzMBJAWxUlzTuXMqrrDFF9V/d4zO77Ts/\\n4mRBKB+GsQ==\\n-----END PUBLIC KEY-----\\n"). + +-define(EC_SECRET, "-----BEGIN PUBLIC KEY-----\\nMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEDsr0lz/Dg3luarb+Kua0Wcj9WrfR23os\\nwHzakglb8GhWRDn+oZT0Bt/26sX8uB4/ij9PEOLHPo+IHBtX4ELFFVr5GTzlqcJe\\nyctaTDd1OOAPXYuc67EWtGZ3pDAzztRs\\n-----END PUBLIC KEY-----\\n"). + +setup() -> + test_util:start_applications([config, jwtf]), + config:set("jwt_keys", "hmac:hmac", ?HMAC_SECRET), + config:set("jwt_keys", "rsa:hmac", ?HMAC_SECRET), + config:set("jwt_keys", "ec:hmac", ?HMAC_SECRET), + + config:set("jwt_keys", "hmac:rsa", ?RSA_SECRET), + config:set("jwt_keys", "rsa:rsa", ?RSA_SECRET), + config:set("jwt_keys", "ec:rsa", ?RSA_SECRET), + + config:set("jwt_keys", "hmac:ec", ?EC_SECRET), + config:set("jwt_keys", "rsa:ec", ?EC_SECRET), + config:set("jwt_keys", "ec:ec", ?EC_SECRET), + + config:set("jwt_keys", "rsa:badrsa", ?BAD_RSA_SECRET). + + +teardown(_) -> + test_util:stop_applications([config, jwtf]). + +jwtf_keystore_test_() -> + { + setup, + fun setup/0, + fun teardown/1, + [ + ?_assertEqual(<<"hello">>, jwtf_keystore:get(<<"HS256">>, <<"hmac">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"RS256">>, <<"hmac">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"ES256">>, <<"hmac">>)), + + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"HS256">>, <<"rsa">>)), + ?_assertMatch(#'RSAPublicKey'{}, jwtf_keystore:get(<<"RS256">>, <<"rsa">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"ES256">>, <<"rsa">>)), + + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"HS256">>, <<"ec">>)), + ?_assertThrow({bad_request, _}, jwtf_keystore:get(<<"RS256">>, <<"ec">>)), + ?_assertMatch({#'ECPoint'{}, _}, jwtf_keystore:get(<<"ES256">>, <<"ec">>)), + + ?_assertThrow({bad_request, <<"Not a valid key">>}, jwtf_keystore:get(<<"RS256">>, <<"badrsa">>)) + ] + }. diff --git a/src/jwtf/test/jwtf_tests.erl b/src/jwtf/test/jwtf_tests.erl new file mode 100644 index 000000000..ba944f7c7 --- /dev/null +++ b/src/jwtf/test/jwtf_tests.erl @@ -0,0 +1,317 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(jwtf_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("public_key/include/public_key.hrl"). + +encode(Header0, Payload0) -> + Header1 = b64url:encode(jiffy:encode(Header0)), + Payload1 = b64url:encode(jiffy:encode(Payload0)), + Sig = b64url:encode(<<"bad">>), + <<Header1/binary, $., Payload1/binary, $., Sig/binary>>. + +valid_header() -> + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}. + +jwt_io_pubkey() -> + PublicKeyPEM = <<"-----BEGIN PUBLIC KEY-----\n" + "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDdlatRjRjogo3WojgGH" + "FHYLugdUWAY9iR3fy4arWNA1KoS8kVw33cJibXr8bvwUAUparCwlvdbH6" + "dvEOfou0/gCFQsHUfQrSDv+MuSUMAe8jzKE4qW+jK+xQU9a03GUnKHkkl" + "e+Q0pX/g6jXZ7r1/xAK5Do2kQ+X5xK9cipRgEKwIDAQAB\n" + "-----END PUBLIC KEY-----\n">>, + [PEMEntry] = public_key:pem_decode(PublicKeyPEM), + public_key:pem_entry_decode(PEMEntry). + + +b64_badarg_test() -> + Encoded = <<"0.0.0">>, + ?assertEqual({error, {bad_request,badarg}}, + jwtf:decode(Encoded, [], nil)). + + +b64_bad_block_test() -> + Encoded = <<" aGVsbG8. aGVsbG8. aGVsbG8">>, + ?assertEqual({error, {bad_request,{bad_block,0}}}, + jwtf:decode(Encoded, [], nil)). + + +invalid_json_test() -> + Encoded = <<"fQ.fQ.fQ">>, + ?assertEqual({error, {bad_request,{1,invalid_json}}}, + jwtf:decode(Encoded, [], nil)). + + +truncated_json_test() -> + Encoded = <<"ew.ew.ew">>, + ?assertEqual({error, {bad_request,{2,truncated_json}}}, + jwtf:decode(Encoded, [], nil)). + + +missing_typ_test() -> + Encoded = encode({[]}, []), + ?assertEqual({error, {bad_request,<<"Missing typ header parameter">>}}, + jwtf:decode(Encoded, [typ], nil)). + + +invalid_typ_test() -> + Encoded = encode({[{<<"typ">>, <<"NOPE">>}]}, []), + ?assertEqual({error, {bad_request,<<"Invalid typ header parameter">>}}, + jwtf:decode(Encoded, [typ], nil)). + + +missing_alg_test() -> + Encoded = encode({[]}, []), + ?assertEqual({error, {bad_request,<<"Missing alg header parameter">>}}, + jwtf:decode(Encoded, [alg], nil)). + + +invalid_alg_test() -> + Encoded = encode({[{<<"alg">>, <<"NOPE">>}]}, []), + ?assertEqual({error, {bad_request,<<"Invalid alg header parameter">>}}, + jwtf:decode(Encoded, [alg], nil)). + + +missing_iss_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request,<<"Missing iss claim">>}}, + jwtf:decode(Encoded, [{iss, right}], nil)). + + +invalid_iss_test() -> + Encoded = encode(valid_header(), {[{<<"iss">>, <<"wrong">>}]}), + ?assertEqual({error, {bad_request,<<"Invalid iss claim">>}}, + jwtf:decode(Encoded, [{iss, right}], nil)). + + +missing_iat_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request,<<"Missing iat claim">>}}, + jwtf:decode(Encoded, [iat], nil)). + + +invalid_iat_test() -> + Encoded = encode(valid_header(), {[{<<"iat">>, <<"hello">>}]}), + ?assertEqual({error, {bad_request,<<"Invalid iat claim">>}}, + jwtf:decode(Encoded, [iat], nil)). + + +missing_nbf_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request,<<"Missing nbf claim">>}}, + jwtf:decode(Encoded, [nbf], nil)). + + +invalid_nbf_test() -> + Encoded = encode(valid_header(), {[{<<"nbf">>, 2 * now_seconds()}]}), + ?assertEqual({error, {unauthorized, <<"nbf not in past">>}}, + jwtf:decode(Encoded, [nbf], nil)). + + +missing_exp_test() -> + Encoded = encode(valid_header(), {[]}), + ?assertEqual({error, {bad_request, <<"Missing exp claim">>}}, + jwtf:decode(Encoded, [exp], nil)). + + +invalid_exp_test() -> + Encoded = encode(valid_header(), {[{<<"exp">>, 0}]}), + ?assertEqual({error, {unauthorized, <<"exp not in future">>}}, + jwtf:decode(Encoded, [exp], nil)). + + +missing_kid_test() -> + Encoded = encode({[]}, {[]}), + ?assertEqual({error, {bad_request, <<"Missing kid claim">>}}, + jwtf:decode(Encoded, [kid], nil)). + + +public_key_not_found_test() -> + Encoded = encode( + {[{<<"alg">>, <<"RS256">>}, {<<"kid">>, <<"1">>}]}, + {[]}), + KS = fun(_, _) -> throw(not_found) end, + Expected = {error, not_found}, + ?assertEqual(Expected, jwtf:decode(Encoded, [], KS)). + + +bad_rs256_sig_test() -> + Encoded = encode( + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"RS256">>}]}, + {[]}), + KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, + ?assertEqual({error, {bad_request, <<"Bad signature">>}}, + jwtf:decode(Encoded, [], KS)). + + +bad_hs256_sig_test() -> + Encoded = encode( + {[{<<"typ">>, <<"JWT">>}, {<<"alg">>, <<"HS256">>}]}, + {[]}), + KS = fun(<<"HS256">>, undefined) -> <<"bad">> end, + ?assertEqual({error, {bad_request, <<"Bad HMAC">>}}, + jwtf:decode(Encoded, [], KS)). + + +malformed_token_test() -> + ?assertEqual({error, {bad_request, <<"Malformed token">>}}, + jwtf:decode(<<"a.b.c.d">>, [], nil)). + +unknown_atom_check_test() -> + ?assertError({unknown_checks, [foo, bar]}, + jwtf:decode(<<"a.b.c">>, [exp, foo, iss, bar], nil)). + +unknown_binary_check_test() -> + ?assertError({unknown_checks, [<<"bar">>]}, + jwtf:decode(<<"a.b.c">>, [exp, iss, <<"bar">>], nil)). + +duplicate_check_test() -> + ?assertError({duplicate_checks, [exp]}, + jwtf:decode(<<"a.b.c">>, [exp, exp], nil)). + + +%% jwt.io generated +hs256_test() -> + EncodedToken = <<"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6IjEyMzQ1Ni" + "J9.eyJpc3MiOiJodHRwczovL2Zvby5jb20iLCJpYXQiOjAsImV4cCI" + "6MTAwMDAwMDAwMDAwMDAsImtpZCI6ImJhciJ9.iS8AH11QHHlczkBn" + "Hl9X119BYLOZyZPllOVhSBZ4RZs">>, + KS = fun(<<"HS256">>, <<"123456">>) -> <<"secret">> end, + Checks = [{iss, <<"https://foo.com">>}, iat, exp, typ, alg, kid], + ?assertMatch({ok, _}, catch jwtf:decode(EncodedToken, Checks, KS)). + + +%% pip install PyJWT +%% > import jwt +%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS384') +hs384_test() -> + EncodedToken = <<"eyJhbGciOiJIUzM4NCIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYXIif" + "Q.2quwghs6I56GM3j7ZQbn-ASZ53xdBqzPzTDHm_CtVec32LUy-Ezy" + "L3JjIe7WjL93">>, + KS = fun(<<"HS384">>, _) -> <<"secret">> end, + ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, + catch jwtf:decode(EncodedToken, [], KS)). + + +%% pip install PyJWT +%% > import jwt +%% > jwt.encode({'foo':'bar'}, 'secret', algorithm='HS512') +hs512_test() -> + EncodedToken = <<"eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJmb28iOiJiYX" + "IifQ.WePl7achkd0oGNB8XRF_LJwxlyiPZqpdNgdKpDboAjSTsW" + "q-aOGNynTp8TOv8KjonFym8vwFwppXOLoLXbkIaQ">>, + KS = fun(<<"HS512">>, _) -> <<"secret">> end, + ?assertMatch({ok, {[{<<"foo">>,<<"bar">>}]}}, + catch jwtf:decode(EncodedToken, [], KS)). + + +%% jwt.io generated +rs256_test() -> + EncodedToken = <<"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0N" + "TY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiYWRtaW4iOnRydWV9.Ek" + "N-DOsnsuRjRO6BxXemmJDm3HbxrbRzXglbN2S4sOkopdU4IsDxTI8j" + "O19W_A4K8ZPJijNLis4EZsHeY559a4DFOd50_OqgHGuERTqYZyuhtF" + "39yxJPAjUESwxk2J5k_4zM3O-vtd1Ghyo4IbqKKSy6J9mTniYJPenn" + "5-HIirE">>, + + Checks = [sig, alg], + KS = fun(<<"RS256">>, undefined) -> jwt_io_pubkey() end, + + ExpectedPayload = {[ + {<<"sub">>, <<"1234567890">>}, + {<<"name">>, <<"John Doe">>}, + {<<"admin">>, true} + ]}, + + ?assertMatch({ok, ExpectedPayload}, jwtf:decode(EncodedToken, Checks, KS)). + + +encode_missing_alg_test() -> + ?assertEqual({error, {bad_request, <<"Missing alg header parameter">>}}, + jwtf:encode({[]}, {[]}, <<"foo">>)). + + +encode_invalid_alg_test() -> + ?assertEqual({error, {bad_request, <<"Invalid alg header parameter">>}}, + jwtf:encode({[{<<"alg">>, <<"BOGUS">>}]}, {[]}, <<"foo">>)). + + +encode_decode_test_() -> + [{Alg, encode_decode(Alg)} || Alg <- jwtf:valid_algorithms()]. + + +encode_decode(Alg) -> + {EncodeKey, DecodeKey} = case jwtf:verification_algorithm(Alg) of + {public_key, _Algorithm} -> + create_keypair(); + {hmac, _Algorithm} -> + Key = <<"a-super-secret-key">>, + {Key, Key} + end, + Claims = claims(), + {ok, Encoded} = jwtf:encode(header(Alg), Claims, EncodeKey), + KS = fun(_, _) -> DecodeKey end, + {ok, Decoded} = jwtf:decode(Encoded, [], KS), + ?_assertMatch(Claims, Decoded). + + +header(Alg) -> + {[ + {<<"typ">>, <<"JWT">>}, + {<<"alg">>, Alg}, + {<<"kid">>, <<"20170520-00:00:00">>} + ]}. + + +claims() -> + EpochSeconds = os:system_time(second), + {[ + {<<"iat">>, EpochSeconds}, + {<<"exp">>, EpochSeconds + 3600} + ]}. + +create_keypair() -> + %% https://tools.ietf.org/html/rfc7517#appendix-C + N = decode(<<"t6Q8PWSi1dkJj9hTP8hNYFlvadM7DflW9mWepOJhJ66w7nyoK1gPNqFMSQRy" + "O125Gp-TEkodhWr0iujjHVx7BcV0llS4w5ACGgPrcAd6ZcSR0-Iqom-QFcNP" + "8Sjg086MwoqQU_LYywlAGZ21WSdS_PERyGFiNnj3QQlO8Yns5jCtLCRwLHL0" + "Pb1fEv45AuRIuUfVcPySBWYnDyGxvjYGDSM-AqWS9zIQ2ZilgT-GqUmipg0X" + "OC0Cc20rgLe2ymLHjpHciCKVAbY5-L32-lSeZO-Os6U15_aXrk9Gw8cPUaX1" + "_I8sLGuSiVdt3C_Fn2PZ3Z8i744FPFGGcG1qs2Wz-Q">>), + E = decode(<<"AQAB">>), + D = decode(<<"GRtbIQmhOZtyszfgKdg4u_N-R_mZGU_9k7JQ_jn1DnfTuMdSNprTeaSTyWfS" + "NkuaAwnOEbIQVy1IQbWVV25NY3ybc_IhUJtfri7bAXYEReWaCl3hdlPKXy9U" + "vqPYGR0kIXTQRqns-dVJ7jahlI7LyckrpTmrM8dWBo4_PMaenNnPiQgO0xnu" + "ToxutRZJfJvG4Ox4ka3GORQd9CsCZ2vsUDmsXOfUENOyMqADC6p1M3h33tsu" + "rY15k9qMSpG9OX_IJAXmxzAh_tWiZOwk2K4yxH9tS3Lq1yX8C1EWmeRDkK2a" + "hecG85-oLKQt5VEpWHKmjOi_gJSdSgqcN96X52esAQ">>), + RSAPrivateKey = #'RSAPrivateKey'{ + modulus = N, + publicExponent = E, + privateExponent = D + }, + RSAPublicKey = #'RSAPublicKey'{ + modulus = N, + publicExponent = E + }, + {RSAPrivateKey, RSAPublicKey}. + + +decode(Goop) -> + crypto:bytes_to_integer(b64url:decode(Goop)). + + +now_seconds() -> + {MegaSecs, Secs, _MicroSecs} = os:timestamp(), + MegaSecs * 1000000 + Secs. diff --git a/src/ken/src/ken_server.erl b/src/ken/src/ken_server.erl index b33d01f35..74c8e25ac 100644 --- a/src/ken/src/ken_server.erl +++ b/src/ken/src/ken_server.erl @@ -16,7 +16,9 @@ -behaviour(gen_server). -vsn(1). -export([init/1, terminate/2]). --export([handle_call/3, handle_cast/2, handle_info/2, code_change/3]). +-export([ + handle_call/3, handle_cast/2, handle_info/2, code_change/3,format_status/2 +]). % Public interface -export([start_link/0]). @@ -228,6 +230,18 @@ handle_info(Msg, State) -> code_change(_OldVsn, State, _Extra) -> {ok, State}. + +format_status(_Opt, [_PDict, State]) -> + #state{ + q = Queue + } = State, + Scrubbed = State#state{ + q = {queue_length, queue:len(Queue)} + }, + [{data, [{"State", + ?record_to_keyval(state, Scrubbed) + }]}]. + %% private functions maybe_start_next_queued_job(#state{dbworker = {_,_}} = State) -> diff --git a/src/mango/README.md b/src/mango/README.md index 4c4bb60a6..7cec1af35 100644 --- a/src/mango/README.md +++ b/src/mango/README.md @@ -7,18 +7,37 @@ A MongoDB inspired query language interface for Apache CouchDB. Motivation ---------- -Mango provides a single HTTP API endpoint that accepts JSON bodies via HTTP POST. These bodies provide a set of instructions that will be handled with the results being returned to the client in the same order as they were specified. The general principle of this API is to be simple to implement on the client side while providing users a more natural conversion to Apache CouchDB than would otherwise exist using the standard RESTful HTTP interface that already exists. +Mango provides a single HTTP API endpoint that accepts JSON bodies via +HTTP POST. These bodies provide a set of instructions that will be +handled with the results being returned to the client in the same +order as they were specified. The general principle of this API is to +be simple to implement on the client side while providing users a more +natural conversion to Apache CouchDB than would otherwise exist using +the standard RESTful HTTP interface that already exists. Actions ------- -The general API exposes a set of actions that are similar to what MongoDB exposes (although not all of MongoDB's API is supported). These are meant to be loosely and obviously inspired by MongoDB but without too much attention to maintaining the exact behavior. - -Each action is specified as a JSON object with a number of keys that affect the behavior. Each action object has at least one field named "action" which must -have a string value indicating the action to be performed. For each action there are zero or more fields that will affect behavior. Some of these fields are required and some are optional. - -For convenience, the HTTP API will accept a JSON body that is either a single JSON object which specifies a single action or a JSON array that specifies a list of actions that will then be invoked serially. While multiple commands can be batched into a single HTTP request, there are no guarantees about atomicity or isolation for a batch of commands. +The general API exposes a set of actions that are similar to what +MongoDB exposes (although not all of MongoDB's API is +supported). These are meant to be loosely and obviously inspired by +MongoDB but without too much attention to maintaining the exact +behavior. + +Each action is specified as a JSON object with a number of keys that +affect the behavior. Each action object has at least one field named +"action" which must have a string value indicating the action to be +performed. For each action there are zero or more fields that will +affect behavior. Some of these fields are required and some are +optional. + +For convenience, the HTTP API will accept a JSON body that is either a +single JSON object which specifies a single action or a JSON array +that specifies a list of actions that will then be invoked +serially. While multiple commands can be batched into a single HTTP +request, there are no guarantees about atomicity or isolation for a +batch of commands. Activating Query on a cluster -------------------------------------------- @@ -32,24 +51,36 @@ rpc:multicall(config, set, ["native_query_servers", "query", "{mango_native_proc HTTP API ======== -This API adds a single URI endpoint to the existing CouchDB HTTP API. Creating databases, authentication, Map/Reduce views, etc are all still supported exactly as currently document. No existing behavior is changed. +This API adds a single URI endpoint to the existing CouchDB HTTP +API. Creating databases, authentication, Map/Reduce views, etc are all +still supported exactly as currently document. No existing behavior is +changed. -The endpoint added is for the URL pattern `/dbname/_query` and has the following characteristics: +The endpoint added is for the URL pattern `/dbname/_query` and has the +following characteristics: * The only HTTP method supported is `POST`. * The request `Content-Type` must be `application/json`. * The response status code will either be `200`, `4XX`, or `5XX` * The response `Content-Type` will be `application/json` * The response `Transfer-Encoding` will be `chunked`. -* The response is a single JSON object or array that matches to the single command or list of commands that exist in the request. +* The response is a single JSON object or array that matches to the + single command or list of commands that exist in the request. -This is intended to be a significantly simpler use of HTTP than the current APIs. This is motivated by the fact that this entire API is aimed at customers who are not as savvy at HTTP or non-relational document stores. Once a customer is comfortable using this API we hope to expose any other "power features" through the existing HTTP API and its adherence to HTTP semantics. +This is intended to be a significantly simpler use of HTTP than the +current APIs. This is motivated by the fact that this entire API is +aimed at customers who are not as savvy at HTTP or non-relational +document stores. Once a customer is comfortable using this API we hope +to expose any other "power features" through the existing HTTP API and +its adherence to HTTP semantics. Supported Actions ================= -This is a list of supported actions that Mango understands. For the time being it is limited to the four normal CRUD actions plus one meta action to create indices on the database. +This is a list of supported actions that Mango understands. For the +time being it is limited to the four normal CRUD actions plus one meta +action to create indices on the database. insert ------ @@ -62,9 +93,15 @@ Keys: * docs - The JSON document to insert * w (optional) (default: 2) - An integer > 0 for the write quorum size -If the provided document or documents do not contain an "\_id" field one will be added using an automatically generated UUID. +If the provided document or documents do not contain an "\_id" field +one will be added using an automatically generated UUID. -It is more performant to specify multiple documents in the "docs" field than it is to specify multiple independent insert actions. Each insert action is submitted as a single bulk update (ie, \_bulk\_docs in CouchDB terminology). This, however, does not make any guarantees on the isolation or atomicity of the bulk operation. It is merely a performance benefit. +It is more performant to specify multiple documents in the "docs" +field than it is to specify multiple independent insert actions. Each +insert action is submitted as a single bulk update (ie, \_bulk\_docs +in CouchDB terminology). This, however, does not make any guarantees +on the isolation or atomicity of the bulk operation. It is merely a +performance benefit. find @@ -76,18 +113,41 @@ Keys: * action - "find" * selector - JSON object following selector syntax, described below -* limit (optional) (default: 25) - integer >= 0, Limit the number of rows returned -* skip (optional) (default: 0) - integer >= 0, Skip the specified number of rows -* sort (optional) (default: []) - JSON array following sort syntax, described below -* fields (optional) (default: null) - JSON array following the field syntax, described below -* r (optional) (default: 1) - By default a find will return the document that was found when traversing the index. Optionally there can be a quorum read for each document using `r` as the read quorum. This is obviously less performant than using the document local to the index. -* conflicts (optional) (default: false) - boolean, whether or not to include information about any existing conflicts for the document. - -The important thing to note about the find command is that it must execute over a generated index. If a selector is provided that cannot be satisfied using an existing index the list of basic indices that could be used will be returned. - -For the most part, indices are generated in response to the "create\_index" action (described below) although there are two special indices that can be used as well. The "\_id" is automatically indexed and is similar to every other index. There is also a special "\_seq" index to retrieve documents in the order of their update sequence. - -Its also quite possible to generate a query that can't be satisfied by any index. In this case an error will be returned stating that fact. Generally speaking the easiest way to stumble onto this is to attempt to OR two separate fields which would require a complete table scan. In the future I expect to support these more complicated queries using an extended indexing API (which deviates from the current MongoDB model a bit). +* limit (optional) (default: 25) - integer >= 0, Limit the number of + rows returned +* skip (optional) (default: 0) - integer >= 0, Skip the specified + number of rows +* sort (optional) (default: []) - JSON array following sort syntax, + described below +* fields (optional) (default: null) - JSON array following the field + syntax, described below +* r (optional) (default: 1) - By default a find will return the + document that was found when traversing the index. Optionally there + can be a quorum read for each document using `r` as the read + quorum. This is obviously less performant than using the document + local to the index. +* conflicts (optional) (default: false) - boolean, whether or not to + include information about any existing conflicts for the document. + +The important thing to note about the find command is that it must +execute over a generated index. If a selector is provided that cannot +be satisfied using an existing index the list of basic indices that +could be used will be returned. + +For the most part, indices are generated in response to the +"create\_index" action (described below) although there are two +special indices that can be used as well. The "\_id" is automatically +indexed and is similar to every other index. There is also a special +"\_seq" index to retrieve documents in the order of their update +sequence. + +Its also quite possible to generate a query that can't be satisfied by +any index. In this case an error will be returned stating that +fact. Generally speaking the easiest way to stumble onto this is to +attempt to OR two separate fields which would require a complete table +scan. In the future I expect to support these more complicated queries +using an extended indexing API (which deviates from the current +MongoDB model a bit). update @@ -100,15 +160,24 @@ Keys: * action - "update" * selector - JSON object following selector syntax, described below * update - JSON object following update syntax, described below -* upsert - (optional) (default: false) - boolean, Whether or not to create a new document if the selector does not match any documents in the database -* limit (optional) (default: 1) - integer > 0, How many documents returned from the selector should be modified. Currently has a maximum value of 100 -* sort - (optional) (default: []) - JSON array following sort syntax, described below +* upsert - (optional) (default: false) - boolean, Whether or not to + create a new document if the selector does not match any documents + in the database +* limit (optional) (default: 1) - integer > 0, How many documents + returned from the selector should be modified. Currently has a + maximum value of 100 +* sort - (optional) (default: []) - JSON array following sort syntax, + described below * r (optional) (default: 1) - integer > 0, read quorum constant * w (optional) (default: 2) - integer > 0, write quorum constant -Updates are fairly straightforward other than to mention that the selector (like find) must be satisifiable using an existing index. +Updates are fairly straightforward other than to mention that the +selector (like find) must be satisifiable using an existing index. -On the update field, if the provided JSON object has one or more update operator (described below) then the operation is applied onto the existing document (if one exists) else the entire contents are replaced with exactly the value of the `update` field. +On the update field, if the provided JSON object has one or more +update operator (described below) then the operation is applied onto +the existing document (if one exists) else the entire contents are +replaced with exactly the value of the `update` field. delete @@ -120,15 +189,24 @@ Keys: * action - "delete" * selector - JSON object following selector syntax, described below -* force (optional) (default: false) - Delete all conflicted versions of the document as well -* limit - (optional) (default: 1) - integer > 0, How many documents to delete from the database. Currently has a maximum value of 100 -* sort - (optional) (default: []) - JSON array following sort syntax, described below +* force (optional) (default: false) - Delete all conflicted versions + of the document as well +* limit - (optional) (default: 1) - integer > 0, How many documents to + delete from the database. Currently has a maximum value of 100 +* sort - (optional) (default: []) - JSON array following sort syntax, + described below * r (optional) (default: 1) - integer > 1, read quorum constant * w (optional) (default: 2) - integer > 0, write quorum constant -Deletes behave quite similarly to update except they attempt to remove documents from the database. Its important to note that if a document has conflicts it may "appear" that delete's aren't having an effect. This is because the delete operation by default only removes a single revision. Specify `"force":true` if you would like to attempt to delete all live revisions. +Deletes behave quite similarly to update except they attempt to remove +documents from the database. Its important to note that if a document +has conflicts it may "appear" that delete's aren't having an +effect. This is because the delete operation by default only removes a +single revision. Specify `"force":true` if you would like to attempt +to delete all live revisions. -If you wish to delete a specific revision of the document, you can specify it in the selector using the special "\_rev" field. +If you wish to delete a specific revision of the document, you can +specify it in the selector using the special "\_rev" field. create\_index @@ -140,17 +218,43 @@ Keys: * action - "create\_index" * index - JSON array following sort syntax, described below -* type (optional) (default: "json") - string, specifying the index type to create. Currently only "json" indexes are supported but in the future we will provide full-text indexes as well as Geo spatial indexes -* name (optional) - string, optionally specify a name for the index. If a name is not provided one will be automatically generated -* ddoc (optional) - Indexes can be grouped into design documents underneath the hood for efficiency. This is an advanced feature. Don't specify a design document here unless you know the consequences of index invalidation. By default each index is placed in its own separate design document for isolation. - -Anytime an operation is required to locate a document in the database it is required that an index must exist that can be used to locate it. By default the only two indices that exist are for the document "\_id" and the special "\_seq" index. - -Indices are created in the background. If you attempt to create an index on a large database and then immediately utilize it, the request may block for a considerable amount of time before the request completes. - -Indices can specify multiple fields to index simultaneously. This is roughly analogous to a compound index in SQL with the corresponding tradeoffs. For instance, an index may contain the (ordered set of) fields "foo", "bar", and "baz". If a selector specifying "bar" is received, it can not be answered. Although if a selector specifying "foo" and "bar" is received, it can be answered more efficiently than if there were only an index on "foo" and "bar" independently. - -NB: while the index allows the ability to specify sort directions these are currently not supported. The sort direction must currently be specified as "asc" in the JSON. [INTERNAL]: This will require that we patch the view engine as well as the cluster coordinators in Fabric to follow the specified sort orders. The concepts are straightforward but the implementation may need some thought to fit into the current shape of things. +* type (optional) (default: "json") - string, specifying the index + type to create. Currently only "json" indexes are supported but in + the future we will provide full-text indexes as well as Geo spatial + indexes +* name (optional) - string, optionally specify a name for the + index. If a name is not provided one will be automatically generated +* ddoc (optional) - Indexes can be grouped into design documents + underneath the hood for efficiency. This is an advanced + feature. Don't specify a design document here unless you know the + consequences of index invalidation. By default each index is placed + in its own separate design document for isolation. + +Anytime an operation is required to locate a document in the database +it is required that an index must exist that can be used to locate +it. By default the only two indices that exist are for the document +"\_id" and the special "\_seq" index. + +Indices are created in the background. If you attempt to create an +index on a large database and then immediately utilize it, the request +may block for a considerable amount of time before the request +completes. + +Indices can specify multiple fields to index simultaneously. This is +roughly analogous to a compound index in SQL with the corresponding +tradeoffs. For instance, an index may contain the (ordered set of) +fields "foo", "bar", and "baz". If a selector specifying "bar" is +received, it can not be answered. Although if a selector specifying +"foo" and "bar" is received, it can be answered more efficiently than +if there were only an index on "foo" and "bar" independently. + +NB: while the index allows the ability to specify sort directions +these are currently not supported. The sort direction must currently +be specified as "asc" in the JSON. [INTERNAL]: This will require that +we patch the view engine as well as the cluster coordinators in Fabric +to follow the specified sort orders. The concepts are straightforward +but the implementation may need some thought to fit into the current +shape of things. list\_indexes @@ -172,9 +276,13 @@ Keys: * action - "delete\_index" * name - string, the index to delete -* design\_doc - string, the design doc id from which to delete the index. For auto-generated index names and design docs, you can retrieve this information from the `list\_indexes` action +* design\_doc - string, the design doc id from which to delete the + index. For auto-generated index names and design docs, you can + retrieve this information from the `list\_indexes` action -Indexes require resources to maintain. If you find that an index is no longer necessary then it can be beneficial to remove it from the database. +Indexes require resources to maintain. If you find that an index is no +longer necessary then it can be beneficial to remove it from the +database. describe\_selector @@ -186,36 +294,55 @@ Keys: * action - "describe\_selector" * selector - JSON object in selector syntax, described below -* extended (optional) (default: false) - Show information on what existing indexes could be used with this selector +* extended (optional) (default: false) - Show information on what + existing indexes could be used with this selector -This is a useful debugging utility that will show how a given selector is normalized before execution as well as information on what indexes could be used to satisfy it. +This is a useful debugging utility that will show how a given selector +is normalized before execution as well as information on what indexes +could be used to satisfy it. -If `"extended": true` is included then the list of existing indices that could be used for this selector are also returned. +If `"extended": true` is included then the list of existing indices +that could be used for this selector are also returned. JSON Syntax Descriptions ======================== -This API uses a few defined JSON structures for various operations. Here we'll describe each in detail. +This API uses a few defined JSON structures for various +operations. Here we'll describe each in detail. Selector Syntax --------------- -The Mango query language is expressed as a JSON object describing documents of interest. Within this structure it is also possible to express conditional logic using specially named fields. This is inspired by and intended to maintain a fairly close parity to the existing MongoDB behavior. +The Mango query language is expressed as a JSON object describing +documents of interest. Within this structure it is also possible to +express conditional logic using specially named fields. This is +inspired by and intended to maintain a fairly close parity to the +existing MongoDB behavior. As an example, the simplest selector for Mango might look something like such: +```json {"_id": "Paul"} +``` -Which would match the document named "Paul" (if one exists). Extending this example using other fields might look like such: +Which would match the document named "Paul" (if one exists). Extending +this example using other fields might look like such: +```json {"_id": "Paul", "location": "Boston"} +``` -This would match a document named "Paul" *AND* having a "location" value of "Boston". Seeing as though I'm sitting in my basement in Omaha, this is unlikely. +This would match a document named "Paul" *AND* having a "location" +value of "Boston". Seeing as though I'm sitting in my basement in +Omaha, this is unlikely. -There are two special syntax elements for the object keys in a selector. The first is that the period (full stop, or simply `.`) character denotes subfields in a document. For instance, here are two equivalent examples: +There are two special syntax elements for the object keys in a +selector. The first is that the period (full stop, or simply `.`) +character denotes subfields in a document. For instance, here are two +equivalent examples: {"location": {"city": "Omaha"}} {"location.city": "Omaha"} @@ -224,26 +351,36 @@ If the object's key contains the period it could be escaped with backslash, i.e. {"location\\.city": "Omaha"} -Note that the double backslash here is necessary to encode an actual single backslash. +Note that the double backslash here is necessary to encode an actual +single backslash. -The second important syntax element is the use of a dollar sign (`$`) prefix to denote operators. For example: +The second important syntax element is the use of a dollar sign (`$`) +prefix to denote operators. For example: {"age": {"$gt": 21}} In this example, we have created the boolean expression `age > 21`. -There are two core types of operators in the selector syntax: combination operators and condition operators. In general, combination operators contain groups of condition operators. We'll describe the list of each below. +There are two core types of operators in the selector syntax: +combination operators and condition operators. In general, combination +operators contain groups of condition operators. We'll describe the +list of each below. ### Implicit Operators -For the most part every operator must be of the form `{"$operator": argument}`. Though there are two implicit operators for selectors. +For the most part every operator must be of the form `{"$operator": +argument}`. Though there are two implicit operators for selectors. -First, any JSON object that is not the argument to a condition operator is an implicit `$and` operator on each field. For instance, these two examples are identical: +First, any JSON object that is not the argument to a condition +operator is an implicit `$and` operator on each field. For instance, +these two examples are identical: {"foo": "bar", "baz": true} {"$and": [{"foo": {"$eq": "bar"}}, {"baz": {"$eq": true}}]} -And as shown, any field that contains a JSON value that has no operators in it is an equality condition. For instance, these are equivalent: +And as shown, any field that contains a JSON value that has no +operators in it is an equality condition. For instance, these are +equivalent: {"foo": "bar"} {"foo": {"$eq": "bar"}} @@ -260,9 +397,12 @@ Although, the previous example would actually be normalized internally to this: ### Combination Operators -These operators are responsible for combining groups of condition operators. Most familiar are the standard boolean operators plus a few extra for working with JSON arrays. +These operators are responsible for combining groups of condition +operators. Most familiar are the standard boolean operators plus a few +extra for working with JSON arrays. -Each of the combining operators take a single argument that is either a condition operator or an array of condition operators. +Each of the combining operators take a single argument that is either +a condition operator or an array of condition operators. The list of combining characters: @@ -276,7 +416,13 @@ The list of combining characters: ### Condition Operators -Condition operators are specified on a per field basis and apply to the value indexed for that field. For instance, the basic "$eq" operator matches when the indexed field is equal to its argument. There is currently support for the basic equality and inequality operators as well as a number of meta operators. Some of these operators will accept any JSON argument while some require a specific JSON formatted argument. Each is noted below. +Condition operators are specified on a per field basis and apply to +the value indexed for that field. For instance, the basic "$eq" +operator matches when the indexed field is equal to its +argument. There is currently support for the basic equality and +inequality operators as well as a number of meta operators. Some of +these operators will accept any JSON argument while some require a +specific JSON formatted argument. Each is noted below. The list of conditional arguments: @@ -291,19 +437,28 @@ The list of conditional arguments: Object related operators -* "$exists" - boolean, check whether the field exists or not regardless of its value +* "$exists" - boolean, check whether the field exists or not + regardless of its value * "$type" - string, check the document field's type Array related operators -* "$in" - array of JSON values, the document field must exist in the list provided -* "$nin" - array of JSON values, the document field must not exist in the list provided -* "$size" - integer, special condition to match the length of an array field in a document. Non-array fields cannot match this condition. +* "$in" - array of JSON values, the document field must exist in the + list provided +* "$nin" - array of JSON values, the document field must not exist in + the list provided +* "$size" - integer, special condition to match the length of an array + field in a document. Non-array fields cannot match this condition. Misc related operators -* "$mod" - [Divisor, Remainder], where Divisor and Remainder are both positive integers (ie, greater than 0). Matches documents where (field % Divisor == Remainder) is true. This is false for any non-integer field -* "$regex" - string, a regular expression pattern to match against the document field. Only matches when the field is a string value and matches the supplied matches +* "$mod" - [Divisor, Remainder], where Divisor and Remainder are both + positive integers (ie, greater than 0). Matches documents where + (field % Divisor == Remainder) is true. This is false for any + non-integer field +* "$regex" - string, a regular expression pattern to match against the + document field. Only matches when the field is a string value and + matches the supplied matches Update Syntax @@ -315,19 +470,30 @@ Need to describe the syntax for update operators. Sort Syntax ----------- -The sort syntax is a basic array of field name and direction pairs. It looks like such: +The sort syntax is a basic array of field name and direction pairs. It +looks like such: [{field1: dir1} | ...] -Where field1 can be any field (dotted notation is available for sub-document fields) and dir1 can be "asc" or "desc". +Where field1 can be any field (dotted notation is available for +sub-document fields) and dir1 can be "asc" or "desc". -Note that it is highly recommended that you specify a single key per object in your sort ordering so that the order is not dependent on the combination of JSON libraries between your application and the internals of Mango's indexing engine. +Note that it is highly recommended that you specify a single key per +object in your sort ordering so that the order is not dependent on the +combination of JSON libraries between your application and the +internals of Mango's indexing engine. Fields Syntax ------------- -When retrieving documents from the database you can specify that only a subset of the fields are returned. This allows you to limit your results strictly to the parts of the document that are interesting for the local application logic. The fields returned are specified as an array. Unlike MongoDB only the fields specified are included, there is no automatic inclusion of the "\_id" or other metadata fields when a field list is included. +When retrieving documents from the database you can specify that only +a subset of the fields are returned. This allows you to limit your +results strictly to the parts of the document that are interesting for +the local application logic. The fields returned are specified as an +array. Unlike MongoDB only the fields specified are included, there is +no automatic inclusion of the "\_id" or other metadata fields when a +field list is included. A trivial example: @@ -344,16 +510,20 @@ POST /dbname/\_find Issue a query. -Request body is a JSON object that has the selector and the various options like limit/skip etc. Or we could post the selector and put the other options into the query string. Though I'd probably prefer to have it all in the body for consistency. +Request body is a JSON object that has the selector and the various +options like limit/skip etc. Or we could post the selector and put the +other options into the query string. Though I'd probably prefer to +have it all in the body for consistency. -Response is streamed out like a view. +Response is streamed out like a view. POST /dbname/\_index -------------------------- Request body contains the index definition. -Response body is empty and the result is returned as the status code (200 OK -> created, 3something for exists). +Response body is empty and the result is returned as the status code +(200 OK -> created, 3something for exists). GET /dbname/\_index ------------------------- diff --git a/src/mango/TODO.md b/src/mango/TODO.md index ce2d85f3d..95055dd06 100644 --- a/src/mango/TODO.md +++ b/src/mango/TODO.md @@ -1,9 +1,18 @@ -* Patch the view engine to do alternative sorts. This will include both the lower level couch\_view* modules as well as the fabric coordinators. +* Patch the view engine to do alternative sorts. This will include + both the lower level couch\_view* modules as well as the fabric + coordinators. -* Patch the view engine so we can specify options when returning docs from cursors. We'll want this so that we can delete specific revisions from a document. +* Patch the view engine so we can specify options when returning docs + from cursors. We'll want this so that we can delete specific + revisions from a document. -* Need to figure out how to do raw collation on some indices because at -least the _id index uses it forcefully. +* Need to figure out how to do raw collation on some indices because + at least the _id index uses it forcefully. -* Add lots more to the update API. Mongo appears to be missing some pretty obvious easy functionality here. Things like managing values doing things like multiplying numbers, or common string mutations would be obvious examples. Also it could be interesting to add to the language so that you can do conditional updates based on other document attributes. Definitely not a V1 endeavor.
\ No newline at end of file +* Add lots more to the update API. Mongo appears to be missing some + pretty obvious easy functionality here. Things like managing values + doing things like multiplying numbers, or common string mutations + would be obvious examples. Also it could be interesting to add to + the language so that you can do conditional updates based on other + document attributes. Definitely not a V1 endeavor. diff --git a/src/mango/rebar.config b/src/mango/rebar.config new file mode 100644 index 000000000..e0d18443b --- /dev/null +++ b/src/mango/rebar.config @@ -0,0 +1,2 @@ +{cover_enabled, true}. +{cover_print_enabled, true}. diff --git a/src/mango/src/mango_crud.erl b/src/mango/src/mango_crud.erl index 41a4d143d..66cef65b3 100644 --- a/src/mango/src/mango_crud.erl +++ b/src/mango/src/mango_crud.erl @@ -33,10 +33,8 @@ insert(Db, #doc{}=Doc, Opts) -> insert(Db, [Doc], Opts); insert(Db, {_}=Doc, Opts) -> insert(Db, [Doc], Opts); -insert(Db, Docs, Opts0) when is_list(Docs) -> - Opts1 = maybe_add_user_ctx(Db, Opts0), - Opts2 = maybe_int_to_str(w, Opts1), - case fabric:update_docs(Db, Docs, Opts2) of +insert(Db, Docs, Opts) when is_list(Docs) -> + case fabric2_db:update_docs(Db, Docs, Opts) of {ok, Results0} -> {ok, lists:zipwith(fun result_to_json/2, Docs, Results0)}; {accepted, Results0} -> @@ -46,10 +44,8 @@ insert(Db, Docs, Opts0) when is_list(Docs) -> end. -find(Db, Selector, Callback, UserAcc, Opts0) -> - Opts1 = maybe_add_user_ctx(Db, Opts0), - Opts2 = maybe_int_to_str(r, Opts1), - {ok, Cursor} = mango_cursor:create(Db, Selector, Opts2), +find(Db, Selector, Callback, UserAcc, Opts) -> + {ok, Cursor} = mango_cursor:create(Db, Selector, Opts), mango_cursor:execute(Cursor, Callback, UserAcc). @@ -99,30 +95,11 @@ delete(Db, Selector, Options) -> end. -explain(Db, Selector, Opts0) -> - Opts1 = maybe_add_user_ctx(Db, Opts0), - Opts2 = maybe_int_to_str(r, Opts1), - {ok, Cursor} = mango_cursor:create(Db, Selector, Opts2), +explain(Db, Selector, Opts) -> + {ok, Cursor} = mango_cursor:create(Db, Selector, Opts), mango_cursor:explain(Cursor). -maybe_add_user_ctx(Db, Opts) -> - case lists:keyfind(user_ctx, 1, Opts) of - {user_ctx, _} -> - Opts; - false -> - [{user_ctx, couch_db:get_user_ctx(Db)} | Opts] - end. - - -maybe_int_to_str(_Key, []) -> - []; -maybe_int_to_str(Key, [{Key, Val} | Rest]) when is_integer(Val) -> - [{Key, integer_to_list(Val)} | maybe_int_to_str(Key, Rest)]; -maybe_int_to_str(Key, [KV | Rest]) -> - [KV | maybe_int_to_str(Key, Rest)]. - - result_to_json(#doc{id=Id}, Result) -> result_to_json(Id, Result); result_to_json({Props}, Result) -> diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index b1cb4148e..63b449cdc 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -48,7 +48,9 @@ create(Db, Selector0, Opts) -> Selector = mango_selector:normalize(Selector0), - UsableIndexes = mango_idx:get_usable_indexes(Db, Selector, Opts), + UsableIndexes = fabric2_fdb:transactional(Db, fun (TxDb) -> + mango_idx:get_usable_indexes(TxDb, Selector, Opts) + end), case mango_cursor:maybe_filter_indexes_by_ddoc(UsableIndexes, Opts) of [] -> % use_index doesn't match a valid index - fall back to a valid one @@ -72,7 +74,6 @@ explain(#cursor{}=Cursor) -> {[ {dbname, mango_idx:dbname(Idx)}, {index, mango_idx:to_json(Idx)}, - {partitioned, mango_idx:partitioned(Idx)}, {selector, Selector}, {opts, {Opts}}, {limit, Limit}, @@ -206,12 +207,9 @@ invalid_index_warning_int(_, _) -> % returned, implying a lot of in-memory filtering index_scan_warning(#execution_stats { totalDocsExamined = Docs, - totalQuorumDocsExamined = DocsQuorum, resultsReturned = ResultCount }) -> - % Docs and DocsQuorum are mutually exclusive so it's safe to sum them - DocsScanned = Docs + DocsQuorum, - Ratio = calculate_index_scan_ratio(DocsScanned, ResultCount), + Ratio = calculate_index_scan_ratio(Docs, ResultCount), Threshold = config:get_integer("mango", "index_scan_warning_threshold", 10), case Threshold > 0 andalso Ratio > Threshold of true -> diff --git a/src/mango/src/mango_cursor_text.erl b/src/mango/src/mango_cursor_text.erl index 43ef84e4c..65811046e 100644 --- a/src/mango/src/mango_cursor_text.erl +++ b/src/mango/src/mango_cursor_text.erl @@ -43,7 +43,7 @@ }). -create(Db, Indexes, Selector, Opts0) -> +create(Db, Indexes, Selector, Opts) -> Index = case Indexes of [Index0] -> Index0; @@ -51,7 +51,7 @@ create(Db, Indexes, Selector, Opts0) -> ?MANGO_ERROR(multiple_text_indexes) end, - Opts = unpack_bookmark(couch_db:name(Db), Opts0), + Bookmark = unpack_bookmark(couch_db:name(Db), Opts), DreyfusLimit = get_dreyfus_limit(), Limit = erlang:min(DreyfusLimit, couch_util:get_value(limit, Opts, mango_opts:default_limit())), @@ -66,7 +66,8 @@ create(Db, Indexes, Selector, Opts0) -> opts = Opts, limit = Limit, skip = Skip, - fields = Fields + fields = Fields, + bookmark = Bookmark }}. @@ -77,7 +78,6 @@ explain(Cursor) -> } = Cursor, [ {'query', mango_selector_text:convert(Selector)}, - {partition, get_partition(Opts, null)}, {sort, sort_query(Opts, Selector)} ]. @@ -90,12 +90,12 @@ execute(Cursor, UserFun, UserAcc) -> skip = Skip, selector = Selector, opts = Opts, - execution_stats = Stats + execution_stats = Stats, + bookmark = Bookmark } = Cursor, Query = mango_selector_text:convert(Selector), QueryArgs = #index_query_args{ q = Query, - partition = get_partition(Opts, nil), sort = sort_query(Opts, Selector), raw_bookmark = true }, @@ -104,7 +104,7 @@ execute(Cursor, UserFun, UserAcc) -> dbname = couch_db:name(Db), ddocid = ddocid(Idx), idx_name = mango_idx:name(Idx), - bookmark = get_bookmark(Opts), + bookmark = Bookmark, limit = Limit, skip = Skip, query_args = QueryArgs, @@ -250,13 +250,6 @@ sort_query(Opts, Selector) -> end. -get_partition(Opts, Default) -> - case couch_util:get_value(partition, Opts) of - <<>> -> Default; - Else -> Else - end. - - get_bookmark(Opts) -> case lists:keyfind(bookmark, 1, Opts) of {_, BM} when is_list(BM), BM /= [] -> @@ -282,7 +275,7 @@ pack_bookmark(Bookmark) -> unpack_bookmark(DbName, Opts) -> - NewBM = case lists:keyfind(bookmark, 1, Opts) of + case lists:keyfind(bookmark, 1, Opts) of {_, nil} -> []; {_, Bin} -> @@ -291,8 +284,7 @@ unpack_bookmark(DbName, Opts) -> catch _:_ -> ?MANGO_ERROR({invalid_bookmark, Bin}) end - end, - lists:keystore(bookmark, 1, Opts, {bookmark, NewBM}). + end. ddocid(Idx) -> diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index 240ef501d..411f4af65 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -19,7 +19,6 @@ ]). -export([ - view_cb/2, handle_message/2, handle_all_docs_message/2, composite_indexes/2, @@ -34,7 +33,6 @@ -include("mango_cursor.hrl"). -include("mango_idx_view.hrl"). --define(HEARTBEAT_INTERVAL_IN_USEC, 4000000). create(Db, Indexes, Selector, Opts) -> FieldRanges = mango_idx_view:field_ranges(Selector), @@ -73,7 +71,6 @@ explain(Cursor) -> {include_docs, Args#mrargs.include_docs}, {view_type, Args#mrargs.view_type}, {reduce, Args#mrargs.reduce}, - {partition, couch_mrview_util:get_extra(Args, partition, null)}, {start_key, maybe_replace_max_json(Args#mrargs.start_key)}, {end_key, maybe_replace_max_json(Args#mrargs.end_key)}, {direction, Args#mrargs.direction}, @@ -89,20 +86,20 @@ explain(Cursor) -> maybe_replace_max_json([]) -> []; +maybe_replace_max_json([?MAX_JSON_OBJ | T]) -> + [<<"<MAX>">> | maybe_replace_max_json(T)]; + +maybe_replace_max_json([H | T]) -> + [H | maybe_replace_max_json(T)]; + maybe_replace_max_json(?MAX_STR) -> <<"<MAX>">>; -maybe_replace_max_json([H | T] = EndKey) when is_list(EndKey) -> - H1 = if H == ?MAX_JSON_OBJ -> <<"<MAX>">>; - true -> H - end, - [H1 | maybe_replace_max_json(T)]; - maybe_replace_max_json(EndKey) -> EndKey. -base_args(#cursor{index = Idx, selector = Selector} = Cursor) -> +base_args(#cursor{index = Idx} = Cursor) -> {StartKey, EndKey} = case Cursor#cursor.ranges of [empty] -> {null, null}; @@ -116,7 +113,9 @@ base_args(#cursor{index = Idx, selector = Selector} = Cursor) -> start_key = StartKey, end_key = EndKey, include_docs = true, - extra = [{callback, {?MODULE, view_cb}}, {selector, Selector}] + extra = [ + {ignore_partition_query_limit, true} + ] }. @@ -135,18 +134,19 @@ execute(#cursor{db = Db, index = Idx, execution_stats = Stats} = Cursor0, UserFu #cursor{opts = Opts, bookmark = Bookmark} = Cursor, Args0 = apply_opts(Opts, BaseArgs), Args = mango_json_bookmark:update_args(Bookmark, Args0), - UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), - DbOpts = [{user_ctx, UserCtx}], Result = case mango_idx:def(Idx) of all_docs -> CB = fun ?MODULE:handle_all_docs_message/2, - fabric:all_docs(Db, DbOpts, CB, Cursor, Args); + AllDocOpts = fabric2_util:all_docs_view_opts(Args) + ++ [{restart_tx, true}], + fabric2_db:fold_docs(Db, CB, Cursor, AllDocOpts); _ -> CB = fun ?MODULE:handle_message/2, % Normal view - DDoc = ddocid(Idx), + DDocId = mango_idx:ddoc(Idx), + {ok, DDoc} = fabric2_db:open_doc(Db, DDocId), Name = mango_idx:name(Idx), - fabric:query_view(Db, DbOpts, DDoc, Name, CB, Cursor, Args) + couch_views:query(Db, DDoc, Name, CB, Cursor, Args) end, case Result of {ok, LastCursor} -> @@ -227,70 +227,10 @@ choose_best_index(_DbName, IndexRanges) -> {SelectedIndex, SelectedIndexRanges}. -view_cb({meta, Meta}, Acc) -> - % Map function starting - put(mango_docs_examined, 0), - set_mango_msg_timestamp(), - ok = rexi:stream2({meta, Meta}), - {ok, Acc}; -view_cb({row, Row}, #mrargs{extra = Options} = Acc) -> - ViewRow = #view_row{ - id = couch_util:get_value(id, Row), - key = couch_util:get_value(key, Row), - doc = couch_util:get_value(doc, Row) - }, - case ViewRow#view_row.doc of - null -> - maybe_send_mango_ping(); - undefined -> - % include_docs=false. Use quorum fetch at coordinator - ok = rexi:stream2(ViewRow), - set_mango_msg_timestamp(); - Doc -> - put(mango_docs_examined, get(mango_docs_examined) + 1), - Selector = couch_util:get_value(selector, Options), - couch_stats:increment_counter([mango, docs_examined]), - case mango_selector:match(Selector, Doc) of - true -> - ok = rexi:stream2(ViewRow), - set_mango_msg_timestamp(); - false -> - maybe_send_mango_ping() - end - end, - {ok, Acc}; -view_cb(complete, Acc) -> - % Send shard-level execution stats - ok = rexi:stream2({execution_stats, {docs_examined, get(mango_docs_examined)}}), - % Finish view output - ok = rexi:stream_last(complete), - {ok, Acc}; -view_cb(ok, ddoc_updated) -> - rexi:reply({ok, ddoc_updated}). - - -maybe_send_mango_ping() -> - Current = os:timestamp(), - LastPing = get(mango_last_msg_timestamp), - % Fabric will timeout if it has not heard a response from a worker node - % after 5 seconds. Send a ping every 4 seconds so the timeout doesn't happen. - case timer:now_diff(Current, LastPing) > ?HEARTBEAT_INTERVAL_IN_USEC of - false -> - ok; - true -> - rexi:ping(), - set_mango_msg_timestamp() - end. - - -set_mango_msg_timestamp() -> - put(mango_last_msg_timestamp, os:timestamp()). - - handle_message({meta, _}, Cursor) -> {ok, Cursor}; handle_message({row, Props}, Cursor) -> - case doc_member(Cursor, Props) of + case match_doc(Cursor, Props) of {ok, Doc, {execution_stats, Stats}} -> Cursor1 = Cursor#cursor { execution_stats = Stats @@ -343,29 +283,8 @@ handle_doc(C, _Doc) -> {stop, C}. -ddocid(Idx) -> - case mango_idx:ddoc(Idx) of - <<"_design/", Rest/binary>> -> - Rest; - Else -> - Else - end. - - apply_opts([], Args) -> Args; -apply_opts([{r, RStr} | Rest], Args) -> - IncludeDocs = case list_to_integer(RStr) of - 1 -> - true; - R when R > 1 -> - % We don't load the doc in the view query because - % we have to do a quorum read in the coordinator - % so there's no point. - false - end, - NewArgs = Args#mrargs{include_docs = IncludeDocs}, - apply_opts(Rest, NewArgs); apply_opts([{conflicts, true} | Rest], Args) -> NewArgs = Args#mrargs{conflicts = true}, apply_opts(Rest, NewArgs); @@ -410,51 +329,23 @@ apply_opts([{update, false} | Rest], Args) -> update = false }, apply_opts(Rest, NewArgs); -apply_opts([{partition, <<>>} | Rest], Args) -> - apply_opts(Rest, Args); -apply_opts([{partition, Partition} | Rest], Args) when is_binary(Partition) -> - NewArgs = couch_mrview_util:set_extra(Args, partition, Partition), - apply_opts(Rest, NewArgs); apply_opts([{_, _} | Rest], Args) -> % Ignore unknown options apply_opts(Rest, Args). -doc_member(Cursor, RowProps) -> - Db = Cursor#cursor.db, - Opts = Cursor#cursor.opts, - ExecutionStats = Cursor#cursor.execution_stats, - Selector = Cursor#cursor.selector, - case couch_util:get_value(doc, RowProps) of - {DocProps} -> - % only matching documents are returned; the selector - % is evaluated at the shard level in view_cb({row, Row}, - {ok, {DocProps}, {execution_stats, ExecutionStats}}; - undefined -> - % an undefined doc was returned, indicating we should - % perform a quorum fetch - ExecutionStats1 = mango_execution_stats:incr_quorum_docs_examined(ExecutionStats), - couch_stats:increment_counter([mango, quorum_docs_examined]), - Id = couch_util:get_value(id, RowProps), - case mango_util:defer(fabric, open_doc, [Db, Id, Opts]) of - {ok, #doc{}=DocProps} -> - Doc = couch_doc:to_json_obj(DocProps, []), - match_doc(Selector, Doc, ExecutionStats1); - Else -> - Else - end; - _ -> - % no doc, no match - {no_match, null, {execution_stats, ExecutionStats}} - end. - - -match_doc(Selector, Doc, ExecutionStats) -> +match_doc(Cursor, RowProps) -> + #cursor{ + execution_stats = Stats0, + selector = Selector + } = Cursor, + Stats1 = mango_execution_stats:incr_docs_examined(Stats0, 1), + Doc = couch_util:get_value(doc, RowProps), case mango_selector:match(Selector, Doc) of true -> - {ok, Doc, {execution_stats, ExecutionStats}}; + {ok, Doc, {execution_stats, Stats1}}; false -> - {no_match, Doc, {execution_stats, ExecutionStats}} + {no_match, Doc, {execution_stats, Stats1}} end. @@ -474,34 +365,3 @@ update_bookmark_keys(#cursor{limit = Limit} = Cursor, Props) when Limit > 0 -> }; update_bookmark_keys(Cursor, _Props) -> Cursor. - - -%%%%%%%% module tests below %%%%%%%% - --ifdef(TEST). --include_lib("eunit/include/eunit.hrl"). - - -does_not_refetch_doc_with_value_test() -> - Cursor = #cursor { - db = <<"db">>, - opts = [], - execution_stats = #execution_stats{}, - selector = mango_selector:normalize({[{<<"user_id">>, <<"1234">>}]}) - }, - RowProps = [ - {id,<<"b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4">>}, - {key,<<"b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4">>}, - {doc,{ - [ - {<<"_id">>,<<"b06aadcf-cd0f-4ca6-9f7e-2c993e48d4c4">>}, - {<<"_rev">>,<<"1-a954fe2308f14307756067b0e18c2968">>}, - {<<"user_id">>,11} - ] - }} - ], - {Match, _, _} = doc_member(Cursor, RowProps), - ?assertEqual(Match, ok). - - --endif. diff --git a/src/mango/src/mango_epi.erl b/src/mango/src/mango_epi.erl index 1fcd05b7f..d593d6371 100644 --- a/src/mango/src/mango_epi.erl +++ b/src/mango/src/mango_epi.erl @@ -33,7 +33,9 @@ providers() -> ]. services() -> - []. + [ + {mango, mango_plugin} + ]. data_subscriptions() -> []. diff --git a/src/mango/src/mango_error.erl b/src/mango/src/mango_error.erl index bb545ad67..9ac8f6368 100644 --- a/src/mango/src/mango_error.erl +++ b/src/mango/src/mango_error.erl @@ -28,13 +28,6 @@ info(mango_idx, {no_usable_index, missing_sort_index}) -> <<"No index exists for this sort, " "try indexing by the sort fields.">> }; -info(mango_idx, {no_usable_index, missing_sort_index_partitioned}) -> - { - 400, - <<"no_usable_index">>, - <<"No partitioned index exists for this sort, " - "try indexing by the sort fields.">> - }; info(mango_idx, {no_usable_index, missing_sort_index_global}) -> { 400, @@ -118,13 +111,6 @@ info(mango_idx, {invalid_index_type, BadType}) -> <<"invalid_index">>, fmt("Invalid type for index: ~s", [BadType]) }; -info(mango_idx, {partitioned_option_mismatch, BadDDoc}) -> - { - 400, - <<"invalid_partitioned_option">>, - fmt("Requested partitioned option does not match existing value on" - " design document ~s", [BadDDoc]) - }; info(mango_idx, invalid_query_ddoc_language) -> { 400, diff --git a/src/mango/src/mango_eval.erl b/src/mango/src/mango_eval.erl new file mode 100644 index 000000000..59d784b49 --- /dev/null +++ b/src/mango/src/mango_eval.erl @@ -0,0 +1,115 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + + +-module(mango_eval). +-behavior(couch_eval). + + +-export([ + acquire_map_context/1, + release_map_context/1, + map_docs/2 +]). + + +-export([ + index_doc/2 +]). + + +-include_lib("couch/include/couch_db.hrl"). +-include("mango_idx.hrl"). + + +acquire_map_context(Opts) -> + #{ + db_name := DbName, + ddoc_id := DDocId, + map_funs := MapFuns + } = Opts, + Indexes = lists:map(fun (Def) -> + #idx{ + type = <<"json">>, + dbname = DbName, + ddoc = DDocId, + def = Def + } + end, MapFuns), + {ok, Indexes}. + + +release_map_context(_) -> + ok. + + +map_docs(Indexes, Docs) -> + {ok, lists:map(fun(Doc) -> + Json = couch_doc:to_json_obj(Doc, []), + Results = index_doc(Indexes, Json), + {Doc#doc.id, Results} + end, Docs)}. + + +index_doc(Indexes, Doc) -> + lists:map(fun(Idx) -> + {IdxDef} = mango_idx:def(Idx), + Results = get_index_entries(IdxDef, Doc), + case lists:member(not_found, Results) of + true -> + []; + false -> + [{Results, null}] + end + end, Indexes). + + +get_index_entries(IdxDef, Doc) -> + {Fields} = couch_util:get_value(<<"fields">>, IdxDef), + Selector = get_index_partial_filter_selector(IdxDef), + case should_index(Selector, Doc) of + false -> + [not_found]; + true -> + get_index_values(Fields, Doc) + end. + + +get_index_values(Fields, Doc) -> + lists:map(fun({Field, _Dir}) -> + case mango_doc:get_field(Doc, Field) of + not_found -> not_found; + bad_path -> not_found; + Value -> Value + end + end, Fields). + + +get_index_partial_filter_selector(IdxDef) -> + case couch_util:get_value(<<"partial_filter_selector">>, IdxDef, {[]}) of + {[]} -> + % this is to support legacy text indexes that had the + % partial_filter_selector set as selector + couch_util:get_value(<<"selector">>, IdxDef, {[]}); + Else -> + Else + end. + + +should_index(Selector, Doc) -> + NormSelector = mango_selector:normalize(Selector), + Matches = mango_selector:match(NormSelector, Doc), + IsDesign = case mango_doc:get_field(Doc, <<"_id">>) of + <<"_design/", _/binary>> -> true; + _ -> false + end, + Matches and not IsDesign. diff --git a/src/mango/src/mango_execution_stats.erl b/src/mango/src/mango_execution_stats.erl index 5878a3190..fe9d27b90 100644 --- a/src/mango/src/mango_execution_stats.erl +++ b/src/mango/src/mango_execution_stats.erl @@ -18,7 +18,6 @@ incr_keys_examined/1, incr_docs_examined/1, incr_docs_examined/2, - incr_quorum_docs_examined/1, incr_results_returned/1, log_start/1, log_end/1, @@ -33,7 +32,6 @@ to_json(Stats) -> {[ {total_keys_examined, Stats#execution_stats.totalKeysExamined}, {total_docs_examined, Stats#execution_stats.totalDocsExamined}, - {total_quorum_docs_examined, Stats#execution_stats.totalQuorumDocsExamined}, {results_returned, Stats#execution_stats.resultsReturned}, {execution_time_ms, Stats#execution_stats.executionTimeMs} ]}. @@ -55,12 +53,6 @@ incr_docs_examined(Stats, N) -> }. -incr_quorum_docs_examined(Stats) -> - Stats#execution_stats { - totalQuorumDocsExamined = Stats#execution_stats.totalQuorumDocsExamined + 1 - }. - - incr_results_returned(Stats) -> couch_stats:increment_counter([mango, results_returned]), Stats#execution_stats { diff --git a/src/mango/src/mango_execution_stats.hrl b/src/mango/src/mango_execution_stats.hrl index ea5ed5ee8..783c1e7f9 100644 --- a/src/mango/src/mango_execution_stats.hrl +++ b/src/mango/src/mango_execution_stats.hrl @@ -13,7 +13,6 @@ -record(execution_stats, { totalKeysExamined = 0, totalDocsExamined = 0, - totalQuorumDocsExamined = 0, resultsReturned = 0, executionStartTime, executionTimeMs diff --git a/src/mango/src/mango_httpd.erl b/src/mango/src/mango_httpd.erl index 379d2e127..8d5a2123d 100644 --- a/src/mango/src/mango_httpd.erl +++ b/src/mango/src/mango_httpd.erl @@ -32,9 +32,8 @@ threshold = 1490 }). -handle_req(#httpd{} = Req, Db0) -> +handle_req(#httpd{} = Req, Db) -> try - Db = set_user_ctx(Req, Db0), handle_req_int(Req, Db) catch throw:{mango_error, Module, Reason} -> @@ -61,7 +60,9 @@ handle_req_int(_, _) -> handle_index_req(#httpd{method='GET', path_parts=[_, _]}=Req, Db) -> Params = lists:flatmap(fun({K, V}) -> parse_index_param(K, V) end, chttpd:qs(Req)), - Idxs = lists:sort(mango_idx:list(Db)), + Idxs = fabric2_fdb:transactional(Db, fun(TxDb) -> + lists:sort(mango_idx:list(TxDb)) + end), JsonIdxs0 = lists:map(fun mango_idx:to_json/1, Idxs), TotalRows = length(JsonIdxs0), Limit = case couch_util:get_value(limit, Params, TotalRows) of @@ -87,26 +88,27 @@ handle_index_req(#httpd{method='POST', path_parts=[_, _]}=Req, Db) -> {ok, Idx0} = mango_idx:new(Db, Opts), {ok, Idx} = mango_idx:validate_new(Idx0, Db), DbOpts = [{user_ctx, Req#httpd.user_ctx}, deleted, ejson_body], - {ok, DDoc} = mango_util:load_ddoc(Db, mango_idx:ddoc(Idx), DbOpts), - Id = Idx#idx.ddoc, - Name = Idx#idx.name, - Status = case mango_idx:add(DDoc, Idx) of - {ok, DDoc} -> - <<"exists">>; - {ok, NewDDoc} -> - CreateOpts = get_idx_w_opts(Opts), - case mango_crud:insert(Db, NewDDoc, CreateOpts) of - {ok, [{RespProps}]} -> - case lists:keyfind(error, 1, RespProps) of - {error, Reason} -> - ?MANGO_ERROR({error_saving_ddoc, Reason}); - _ -> - <<"created">> - end; - _ -> - ?MANGO_ERROR(error_saving_ddoc) - end - end, + Id = mango_idx:ddoc(Idx), + Name = mango_idx:name(Idx), + Status = fabric2_fdb:transactional(Db, fun(TxDb) -> + {ok, DDoc} = mango_util:load_ddoc(TxDb, Id, DbOpts), + case mango_idx:add(DDoc, Idx) of + {ok, DDoc} -> + <<"exists">>; + {ok, NewDDoc} -> + case mango_crud:insert(TxDb, NewDDoc, Opts) of + {ok, [{RespProps}]} -> + case lists:keyfind(error, 1, RespProps) of + {error, Reason} -> + ?MANGO_ERROR({error_saving_ddoc, Reason}); + _ -> + <<"created">> + end; + _ -> + ?MANGO_ERROR(error_saving_ddoc) + end + end + end), chttpd:send_json(Req, {[{result, Status}, {id, Id}, {name, Name}]}); handle_index_req(#httpd{path_parts=[_, _]}=Req, _Db) -> @@ -119,20 +121,21 @@ handle_index_req(#httpd{method='POST', path_parts=[_, <<"_index">>, <<"_bulk_delete">>]}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), {ok, Opts} = mango_opts:validate_bulk_delete(chttpd:json_body_obj(Req)), - Idxs = mango_idx:list(Db), - DDocs = get_bulk_delete_ddocs(Opts), - DelOpts = get_idx_w_opts(Opts), - {Success, Fail} = lists:foldl(fun(DDocId0, {Success0, Fail0}) -> - DDocId = convert_to_design_id(DDocId0), - Filt = fun(Idx) -> mango_idx:ddoc(Idx) == DDocId end, - Id = {<<"id">>, DDocId}, - case mango_idx:delete(Filt, Db, Idxs, DelOpts) of - {ok, true} -> - {[{[Id, {<<"ok">>, true}]} | Success0], Fail0}; - {error, Error} -> - {Success0, [{[Id, {<<"error">>, Error}]} | Fail0]} - end - end, {[], []}, DDocs), + {Success, Fail} = fabric2_fdb:transactional(Db, fun (TxDb) -> + Idxs = mango_idx:list(TxDb), + DDocs = get_bulk_delete_ddocs(Opts), + lists:foldl(fun(DDocId0, {Success0, Fail0}) -> + DDocId = convert_to_design_id(DDocId0), + Filt = fun(Idx) -> mango_idx:ddoc(Idx) == DDocId end, + Id = {<<"id">>, DDocId}, + case mango_idx:delete(Filt, TxDb, Idxs, Opts) of + {ok, true} -> + {[{[Id, {<<"ok">>, true}]} | Success0], Fail0}; + {error, Error} -> + {Success0, [{[Id, {<<"error">>, Error}]} | Fail0]} + end + end, {[], []}, DDocs) + end), chttpd:send_json(Req, {[{<<"success">>, Success}, {<<"fail">>, Fail}]}); handle_index_req(#httpd{path_parts=[_, <<"_index">>, @@ -146,16 +149,18 @@ handle_index_req(#httpd{method='DELETE', handle_index_req(#httpd{method='DELETE', path_parts=[_, _, DDocId0, Type, Name]}=Req, Db) -> - Idxs = mango_idx:list(Db), - DDocId = convert_to_design_id(DDocId0), - DelOpts = get_idx_del_opts(Req), - Filt = fun(Idx) -> - IsDDoc = mango_idx:ddoc(Idx) == DDocId, - IsType = mango_idx:type(Idx) == Type, - IsName = mango_idx:name(Idx) == Name, - IsDDoc andalso IsType andalso IsName - end, - case mango_idx:delete(Filt, Db, Idxs, DelOpts) of + Result = fabric2_fdb:transactional(Db, fun(TxDb) -> + Idxs = mango_idx:list(TxDb), + DDocId = convert_to_design_id(DDocId0), + Filt = fun(Idx) -> + IsDDoc = mango_idx:ddoc(Idx) == DDocId, + IsType = mango_idx:type(Idx) == Type, + IsName = mango_idx:name(Idx) == Name, + IsDDoc andalso IsType andalso IsName + end, + mango_idx:delete(Filt, TxDb, Idxs, []) + end), + case Result of {ok, true} -> chttpd:send_json(Req, {[{ok, true}]}); {error, not_found} -> @@ -170,27 +175,30 @@ handle_index_req(#httpd{path_parts=[_, _, _DDocId0, _Type, _Name]}=Req, _Db) -> handle_explain_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - Body = maybe_set_partition(Req), + Body = chttpd:json_body_obj(Req), {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), - Resp = mango_crud:explain(Db, Sel, Opts), + Resp = fabric2_fdb:transactional(Db, fun(TxDb) -> + mango_crud:explain(TxDb, Sel, Opts) + end), chttpd:send_json(Req, Resp); handle_explain_req(Req, _Db) -> chttpd:send_method_not_allowed(Req, "POST"). -handle_find_req(#httpd{method='POST'}=Req, Db) -> - chttpd:validate_ctype(Req, "application/json"), - Body = maybe_set_partition(Req), +handle_find_req(#httpd{method='POST'}=Req0, Db) -> + {ok, Req1} = mango_plugin:before_find(Req0), + chttpd:validate_ctype(Req1, "application/json"), + Body = chttpd:json_body_obj(Req1), {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), - {ok, Resp0} = start_find_resp(Req), + {ok, Resp0} = start_find_resp(Req1), case run_find(Resp0, Db, Sel, Opts) of {ok, AccOut} -> - end_find_resp(AccOut); + end_find_resp(Req1, AccOut); {error, Error} -> - chttpd:send_error(Req, Error) + chttpd:send_error(Req1, Error) end; @@ -198,20 +206,6 @@ handle_find_req(Req, _Db) -> chttpd:send_method_not_allowed(Req, "POST"). -set_user_ctx(#httpd{user_ctx=Ctx}, Db) -> - {ok, NewDb} = couch_db:set_user_ctx(Db, Ctx), - NewDb. - - -get_idx_w_opts(Opts) -> - case lists:keyfind(w, 1, Opts) of - {w, N} when is_integer(N), N > 0 -> - [{w, integer_to_list(N)}]; - _ -> - [{w, "2"}] - end. - - get_bulk_delete_ddocs(Opts) -> case lists:keyfind(docids, 1, Opts) of {docids, DDocs} when is_list(DDocs) -> @@ -221,33 +215,6 @@ get_bulk_delete_ddocs(Opts) -> end. -get_idx_del_opts(Req) -> - try - WStr = chttpd:qs_value(Req, "w", "2"), - _ = list_to_integer(WStr), - [{w, WStr}] - catch _:_ -> - [{w, "2"}] - end. - - -maybe_set_partition(Req) -> - {Props} = chttpd:json_body_obj(Req), - case chttpd:qs_value(Req, "partition", undefined) of - undefined -> - {Props}; - Partition -> - case couch_util:get_value(<<"partition">>, Props) of - undefined -> - {[{<<"partition">>, ?l2b(Partition)} | Props]}; - Partition -> - {Props}; - OtherPartition -> - ?MANGO_ERROR({bad_partition, OtherPartition}) - end - end. - - convert_to_design_id(DDocId) -> case DDocId of <<"_design/", _/binary>> -> DDocId; @@ -259,14 +226,15 @@ start_find_resp(Req) -> chttpd:start_delayed_json_response(Req, 200, [], "{\"docs\":["). -end_find_resp(Acc0) -> - #vacc{resp=Resp00, buffer=Buf, kvs=KVs, threshold=Max} = Acc0, +end_find_resp(Req, Acc0) -> + #vacc{resp=Resp00, buffer=Buf, kvs=KVs0, threshold=Max} = Acc0, {ok, Resp0} = chttpd:close_delayed_json_object(Resp00, Buf, "\r\n]", Max), + {ok, KVs1} = mango_plugin:after_find(Req, Resp0, KVs0), FinalAcc = lists:foldl(fun({K, V}, Acc) -> JK = ?JSON_ENCODE(K), JV = ?JSON_ENCODE(V), [JV, ": ", JK, ",\r\n" | Acc] - end, [], KVs), + end, [], KVs1), Chunk = lists:reverse(FinalAcc, ["}\r\n"]), {ok, Resp1} = chttpd:send_delayed_chunk(Resp0, Chunk), chttpd:end_delayed_json_response(Resp1). diff --git a/src/mango/src/mango_httpd_handlers.erl b/src/mango/src/mango_httpd_handlers.erl index 80e5e277e..c1ddd6c4e 100644 --- a/src/mango/src/mango_httpd_handlers.erl +++ b/src/mango/src/mango_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(mango_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(_) -> no_match. @@ -22,3 +22,32 @@ db_handler(<<"_find">>) -> fun mango_httpd:handle_req/2; db_handler(_) -> no_match. design_handler(_) -> no_match. + +handler_info('GET', [Db, <<"_index">>], _) -> + {'db.mango.index.read', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_index">>], _) -> + {'db.mango.index.create', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_index">>, <<"_bulk_delete">>], _) -> + {'db.mango.index.delete', #{'db.name' => Db, multi => true}}; + +handler_info('DELETE', [Db, <<"_index">>, <<"_design">>, Name, Type, Idx], _) -> + {'db.mango.index.delete', #{ + 'db.name' => Db, + 'design.id' => Name, + 'index.type' => Type, + 'index.name' => Idx + }}; + +handler_info(M, [Db, <<"_index">>, <<"_design/", N/binary>>, T, I], R) -> + handler_info(M, [Db, <<"_index">>, <<"_design">>, N, T, I], R); + +handler_info('POST', [Db, <<"_explain">>], _) -> + {'db.mango.explain.execute', #{'db.name' => Db}}; + +handler_info('POST', [Db, <<"_find">>], _) -> + {'db.mango.find.execute', #{'db.name' => Db}}; + +handler_info(_, _, _) -> + no_match.
\ No newline at end of file diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index 5d06a8fe3..37b6e03eb 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -19,7 +19,6 @@ -export([ list/1, - recover/1, new/2, validate_new/2, @@ -33,7 +32,6 @@ name/1, type/1, def/1, - partitioned/1, opts/1, columns/1, is_usable/3, @@ -51,11 +49,35 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango.hrl"). -include("mango_idx.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). list(Db) -> - {ok, Indexes} = ddoc_cache:open(db_to_name(Db), ?MODULE), - Indexes. + DDocs = couch_views_ddoc:get_mango_list(Db), + DbName = fabric2_db:name(Db), + Indexes = lists:foldl(fun(DDoc, Acc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), + + case proplists:get_value(<<"language">>, Props) == <<"query">> of + true -> + {ok, Mrst} = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + + IsInteractive = couch_views_ddoc:is_interactive(DDoc), + BuildState = couch_views_fdb:get_build_status(Db, Mrst), + + Idxs = lists:map(fun(Idx) -> + Idx#idx{ + build_status = BuildState, + interactive = IsInteractive + } + end, from_ddoc(Db, DDoc)), + Acc ++ Idxs; + false -> + Acc + end + + end, [], DDocs), + Indexes ++ special(Db). get_usable_indexes(Db, Selector, Opts) -> @@ -63,14 +85,14 @@ get_usable_indexes(Db, Selector, Opts) -> GlobalIndexes = mango_cursor:remove_indexes_with_partial_filter_selector( ExistingIndexes ), + BuiltIndexes = remove_unbuilt_indexes(GlobalIndexes), UserSpecifiedIndex = mango_cursor:maybe_filter_indexes_by_ddoc(ExistingIndexes, Opts), - UsableIndexes0 = lists:usort(GlobalIndexes ++ UserSpecifiedIndex), - UsableIndexes1 = filter_partition_indexes(UsableIndexes0, Opts), + UsableIndexes0 = lists:usort(BuiltIndexes ++ UserSpecifiedIndex), SortFields = get_sort_fields(Opts), UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end, - case lists:filter(UsableFilter, UsableIndexes1) of + case lists:filter(UsableFilter, UsableIndexes0) of [] -> mango_sort_error(Db, Opts); UsableIndexes -> @@ -78,30 +100,8 @@ get_usable_indexes(Db, Selector, Opts) -> end. -mango_sort_error(Db, Opts) -> - case {fabric_util:is_partitioned(Db), is_opts_partitioned(Opts)} of - {false, _} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index}); - {true, true} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index_partitioned}); - {true, false} -> - ?MANGO_ERROR({no_usable_index, missing_sort_index_global}) - end. - - -recover(Db) -> - {ok, DDocs0} = mango_util:open_ddocs(Db), - Pred = fun({Props}) -> - case proplists:get_value(<<"language">>, Props) of - <<"query">> -> true; - _ -> false - end - end, - DDocs = lists:filter(Pred, DDocs0), - Special = special(Db), - {ok, Special ++ lists:flatmap(fun(Doc) -> - from_ddoc(Db, Doc) - end, DDocs)}. +mango_sort_error(_Db, _Opts) -> + ?MANGO_ERROR({no_usable_index, missing_sort_index}). get_sort_fields(Opts) -> @@ -124,7 +124,6 @@ new(Db, Opts) -> name = IdxName, type = Type, def = Def, - partitioned = get_idx_partitioned(Opts), opts = filter_opts(Opts) }}. @@ -136,11 +135,10 @@ validate_new(Idx, Db) -> add(DDoc, Idx) -> Mod = idx_mod(Idx), - {ok, NewDDoc1} = Mod:add(DDoc, Idx), - NewDDoc2 = set_ddoc_partitioned(NewDDoc1, Idx), + {ok, NewDDoc} = Mod:add(DDoc, Idx), % Round trip through JSON for normalization - Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc2#doc.body)), - {ok, NewDDoc2#doc{body = Body}}. + Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc#doc.body)), + {ok, NewDDoc#doc{body = Body}}. remove(DDoc, Idx) -> @@ -173,16 +171,17 @@ delete(Filt, Db, Indexes, DelOpts) -> end. -from_ddoc(Db, {Props}) -> +from_ddoc(Db, #doc{id = DDocId} = DDoc) -> + {Props} = couch_doc:to_json_obj(DDoc, []), DbName = db_to_name(Db), - DDoc = proplists:get_value(<<"_id">>, Props), + DDocId = proplists:get_value(<<"_id">>, Props), case proplists:get_value(<<"language">>, Props) of <<"query">> -> ok; _ -> ?MANGO_ERROR(invalid_query_ddoc_language) end, - IdxMods = case clouseau_rpc:connected() of + IdxMods = case is_text_service_available() of true -> [mango_idx_view, mango_idx_text]; false -> @@ -192,8 +191,7 @@ from_ddoc(Db, {Props}) -> lists:map(fun(Idx) -> Idx#idx{ dbname = DbName, - ddoc = DDoc, - partitioned = get_idx_partitioned(Db, Props) + ddoc = DDocId } end, Idxs). @@ -204,7 +202,8 @@ special(Db) -> name = <<"_all_docs">>, type = <<"special">>, def = all_docs, - opts = [] + opts = [], + build_status = ?INDEX_READY }, % Add one for _update_seq [AllDocs]. @@ -230,10 +229,6 @@ def(#idx{def=Def}) -> Def. -partitioned(#idx{partitioned=Partitioned}) -> - Partitioned. - - opts(#idx{opts=Opts}) -> Opts. @@ -294,7 +289,7 @@ db_to_name(Name) when is_binary(Name) -> db_to_name(Name) when is_list(Name) -> iolist_to_binary(Name); db_to_name(Db) -> - couch_db:name(Db). + fabric2_db:name(Db). get_idx_def(Opts) -> @@ -309,7 +304,7 @@ get_idx_def(Opts) -> get_idx_type(Opts) -> case proplists:get_value(type, Opts) of <<"json">> -> <<"json">>; - <<"text">> -> case clouseau_rpc:connected() of + <<"text">> -> case is_text_service_available() of true -> <<"text">>; false -> @@ -322,6 +317,11 @@ get_idx_type(Opts) -> end. +is_text_service_available() -> + erlang:function_exported(clouseau_rpc, connected, 0) andalso + clouseau_rpc:connected(). + + get_idx_ddoc(Idx, Opts) -> case proplists:get_value(ddoc, Opts) of <<"_design/", _Rest/binary>> = Name -> @@ -350,97 +350,6 @@ gen_name(Idx, Opts0) -> mango_util:enc_hex(Sha). -get_idx_partitioned(Opts) -> - case proplists:get_value(partitioned, Opts) of - B when is_boolean(B) -> - B; - db_default -> - % Default to the partitioned setting on - % the database. - undefined - end. - - -set_ddoc_partitioned(DDoc, Idx) -> - % We have to verify that the new index being added - % to this design document either matches the current - % ddoc's design options *or* this is a new design doc - #doc{ - id = DDocId, - revs = Revs, - body = {BodyProps} - } = DDoc, - OldDOpts = couch_util:get_value(<<"options">>, BodyProps), - OldOpt = case OldDOpts of - {OldDOptProps} when is_list(OldDOptProps) -> - couch_util:get_value(<<"partitioned">>, OldDOptProps); - _ -> - undefined - end, - % If new matches old we're done - if Idx#idx.partitioned == OldOpt -> DDoc; true -> - % If we're creating a ddoc then we can set the options - case Revs == {0, []} of - true when Idx#idx.partitioned /= undefined -> - set_ddoc_partitioned_option(DDoc, Idx#idx.partitioned); - true when Idx#idx.partitioned == undefined -> - DDoc; - false -> - ?MANGO_ERROR({partitioned_option_mismatch, DDocId}) - end - end. - - -set_ddoc_partitioned_option(DDoc, Partitioned) -> - #doc{ - body = {BodyProps} - } = DDoc, - NewProps = case couch_util:get_value(<<"options">>, BodyProps) of - {Existing} when is_list(Existing) -> - Opt = {<<"partitioned">>, Partitioned}, - New = lists:keystore(<<"partitioned">>, 1, Existing, Opt), - lists:keystore(<<"options">>, 1, BodyProps, {<<"options">>, New}); - undefined -> - New = {<<"options">>, {[{<<"partitioned">>, Partitioned}]}}, - lists:keystore(<<"options">>, 1, BodyProps, New) - end, - DDoc#doc{body = {NewProps}}. - - -get_idx_partitioned(Db, DDocProps) -> - Default = fabric_util:is_partitioned(Db), - case couch_util:get_value(<<"options">>, DDocProps) of - {DesignOpts} -> - case couch_util:get_value(<<"partitioned">>, DesignOpts) of - P when is_boolean(P) -> - P; - undefined -> - Default - end; - undefined -> - Default - end. - -is_opts_partitioned(Opts) -> - case couch_util:get_value(partition, Opts, <<>>) of - <<>> -> - false; - Partition when is_binary(Partition) -> - true - end. - - -filter_partition_indexes(Indexes, Opts) -> - PFilt = case is_opts_partitioned(Opts) of - false -> - fun(#idx{partitioned = P}) -> not P end; - true -> - fun(#idx{partitioned = P}) -> P end - end, - Filt = fun(Idx) -> type(Idx) == <<"special">> orelse PFilt(Idx) end, - lists:filter(Filt, Indexes). - - filter_opts([]) -> []; filter_opts([{user_ctx, _} | Rest]) -> @@ -451,10 +360,6 @@ filter_opts([{name, _} | Rest]) -> filter_opts(Rest); filter_opts([{type, _} | Rest]) -> filter_opts(Rest); -filter_opts([{w, _} | Rest]) -> - filter_opts(Rest); -filter_opts([{partitioned, _} | Rest]) -> - filter_opts(Rest); filter_opts([Opt | Rest]) -> [Opt | filter_opts(Rest)]. @@ -478,6 +383,17 @@ get_legacy_selector(Def) -> Selector -> Selector end. +% remove any interactive indexes that are not built. If an index is not +% interactive than we do not remove it as it will be built when queried +remove_unbuilt_indexes(Indexes) -> + lists:filter(fun(Idx) -> + case Idx#idx.interactive of + true -> Idx#idx.build_status == ?INDEX_READY; + _ -> true + end + end, Indexes). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). @@ -488,8 +404,9 @@ index(SelectorName, Selector) -> <<"Selected">>,<<"json">>, {[{<<"fields">>,{[{<<"location">>,<<"asc">>}]}}, {SelectorName,{Selector}}]}, - false, - [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}] + [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}], + <<"ready">>, + false }. get_partial_filter_all_docs_test() -> diff --git a/src/mango/src/mango_idx.hrl b/src/mango/src/mango_idx.hrl index 97259500b..68e5aaaf0 100644 --- a/src/mango/src/mango_idx.hrl +++ b/src/mango/src/mango_idx.hrl @@ -16,6 +16,7 @@ name, type, def, - partitioned, - opts + opts, + build_status, + interactive }). diff --git a/src/mango/src/mango_idx_special.erl b/src/mango/src/mango_idx_special.erl index ac6efc707..3548372b6 100644 --- a/src/mango/src/mango_idx_special.erl +++ b/src/mango/src/mango_idx_special.erl @@ -28,6 +28,7 @@ -include_lib("couch/include/couch_db.hrl"). -include("mango_idx.hrl"). +-include_lib("couch_views/include/couch_views.hrl"). validate(_) -> @@ -55,7 +56,8 @@ to_json(#idx{def=all_docs}) -> {<<"fields">>, [{[ {<<"_id">>, <<"asc">>} ]}]} - ]}} + ]}}, + {build_status, ?INDEX_READY} ]}. diff --git a/src/mango/src/mango_idx_text.erl b/src/mango/src/mango_idx_text.erl index 1d4becfb3..71eaf110a 100644 --- a/src/mango/src/mango_idx_text.erl +++ b/src/mango/src/mango_idx_text.erl @@ -100,7 +100,6 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, - {partitioned, Idx#idx.partitioned}, {def, {def_to_json(Idx#idx.def)}} ]}. diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 37911498c..a73d82ae6 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -54,7 +54,16 @@ add(#doc{body={Props0}}=DDoc, Idx) -> NewView = make_view(Idx), Views2 = lists:keystore(element(1, NewView), 1, Views1, NewView), Props1 = lists:keystore(<<"views">>, 1, Props0, {<<"views">>, {Views2}}), - {ok, DDoc#doc{body={Props1}}}. + + {Opts0} = proplists:get_value(<<"options">>, Props1, {[]}), + Opts1 = case lists:keymember(<<"interactive">>, 1, Opts0) of + true -> Opts0; + false -> Opts0 ++ [{<<"interactive">>, true}] + end, + Props2 = lists:keystore(<<"options">>, 1, Props1, {<<"options">>, {Opts1}}), + + Props3 = [{<<"autoupdate">>, false}], + {ok, DDoc#doc{body={Props2 ++ Props3}}}. remove(#doc{body={Props0}}=DDoc, Idx) -> @@ -68,13 +77,15 @@ remove(#doc{body={Props0}}=DDoc, Idx) -> if Views2 /= Views1 -> ok; true -> ?MANGO_ERROR({index_not_found, Idx#idx.name}) end, - Props1 = case Views2 of + Props3 = case Views2 of [] -> - lists:keydelete(<<"views">>, 1, Props0); + Props1 = lists:keydelete(<<"views">>, 1, Props0), + Props2 = lists:keydelete(<<"options">>, 1, Props1), + lists:keydelete(<<"autoupdate">>, 1, Props2); _ -> lists:keystore(<<"views">>, 1, Props0, {<<"views">>, {Views2}}) end, - {ok, DDoc#doc{body={Props1}}}. + {ok, DDoc#doc{body={Props3}}}. from_ddoc({Props}) -> @@ -104,8 +115,8 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, - {partitioned, Idx#idx.partitioned}, - {def, {def_to_json(Idx#idx.def)}} + {def, {def_to_json(Idx#idx.def)}}, + {build_status, Idx#idx.build_status} ]}. @@ -121,7 +132,7 @@ is_usable(Idx, Selector, SortFields) -> % and the selector is not a text search (so requires a text index) RequiredFields = columns(Idx), - % sort fields are required to exist in the results so + % sort fields are required to exist in the results so % we don't need to check the selector for these RequiredFields1 = ordsets:subtract(lists:usort(RequiredFields), lists:usort(SortFields)), diff --git a/src/mango/src/mango_idx_view.hrl b/src/mango/src/mango_idx_view.hrl index 0d213e56e..d0f46748b 100644 --- a/src/mango/src/mango_idx_view.hrl +++ b/src/mango/src/mango_idx_view.hrl @@ -10,4 +10,4 @@ % License for the specific language governing permissions and limitations under % the License. --define(MAX_JSON_OBJ, {<<255, 255, 255, 255>>}).
\ No newline at end of file +-define(MAX_JSON_OBJ, {[{<<255, 255, 255, 255>>, <<>>}]}).
\ No newline at end of file diff --git a/src/mango/src/mango_json_bookmark.erl b/src/mango/src/mango_json_bookmark.erl index 97f81cfb8..83fd00f29 100644 --- a/src/mango/src/mango_json_bookmark.erl +++ b/src/mango/src/mango_json_bookmark.erl @@ -54,7 +54,7 @@ unpack(nil) -> nil; unpack(Packed) -> try - Bookmark = binary_to_term(couch_util:decodeBase64Url(Packed)), + Bookmark = binary_to_term(couch_util:decodeBase64Url(Packed), [safe]), verify(Bookmark) catch _:_ -> ?MANGO_ERROR({invalid_bookmark, Packed}) diff --git a/src/mango/src/mango_native_proc.erl b/src/mango/src/mango_native_proc.erl deleted file mode 100644 index 274ae11de..000000000 --- a/src/mango/src/mango_native_proc.erl +++ /dev/null @@ -1,378 +0,0 @@ -% Licensed under the Apache License, Version 2.0 (the "License"); you may not -% use this file except in compliance with the License. You may obtain a copy of -% the License at -% -% http://www.apache.org/licenses/LICENSE-2.0 -% -% Unless required by applicable law or agreed to in writing, software -% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -% License for the specific language governing permissions and limitations under -% the License. - --module(mango_native_proc). --behavior(gen_server). - - --include("mango_idx.hrl"). - - --export([ - start_link/0, - set_timeout/2, - prompt/2 -]). - --export([ - init/1, - terminate/2, - handle_call/3, - handle_cast/2, - handle_info/2, - code_change/3 -]). - - --record(st, { - indexes = [], - timeout = 5000 -}). - - --record(tacc, { - index_array_lengths = true, - fields = all_fields, - path = [] -}). - - -start_link() -> - gen_server:start_link(?MODULE, [], []). - - -set_timeout(Pid, TimeOut) when is_integer(TimeOut), TimeOut > 0 -> - gen_server:call(Pid, {set_timeout, TimeOut}). - - -prompt(Pid, Data) -> - gen_server:call(Pid, {prompt, Data}). - - -init(_) -> - {ok, #st{}}. - - -terminate(_Reason, _St) -> - ok. - - -handle_call({set_timeout, TimeOut}, _From, St) -> - {reply, ok, St#st{timeout=TimeOut}}; - -handle_call({prompt, [<<"reset">>]}, _From, St) -> - {reply, true, St#st{indexes=[]}}; - -handle_call({prompt, [<<"reset">>, _QueryConfig]}, _From, St) -> - {reply, true, St#st{indexes=[]}}; - -handle_call({prompt, [<<"add_fun">>, IndexInfo]}, _From, St) -> - Indexes = case validate_index_info(IndexInfo) of - true -> - St#st.indexes ++ [IndexInfo]; - false -> - couch_log:error("No Valid Indexes For: ~p", [IndexInfo]), - St#st.indexes - end, - NewSt = St#st{indexes = Indexes}, - {reply, true, NewSt}; - -handle_call({prompt, [<<"map_doc">>, Doc]}, _From, St) -> - {reply, map_doc(St, mango_json:to_binary(Doc)), St}; - -handle_call({prompt, [<<"reduce">>, RedSrcs, _]}, _From, St) -> - {reply, [true, [null || _ <- RedSrcs]], St}; - -handle_call({prompt, [<<"rereduce">>, RedSrcs, _]}, _From, St) -> - {reply, [true, [null || _ <- RedSrcs]], St}; - -handle_call({prompt, [<<"index_doc">>, Doc]}, _From, St) -> - Vals = case index_doc(St, mango_json:to_binary(Doc)) of - [] -> - [[]]; - Else -> - Else - end, - {reply, Vals, St}; - - -handle_call(Msg, _From, St) -> - {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}. - - -handle_cast(garbage_collect, St) -> - erlang:garbage_collect(), - {noreply, St}; - -handle_cast(stop, St) -> - {stop, normal, St}; - -handle_cast(Msg, St) -> - {stop, {invalid_cast, Msg}, St}. - - -handle_info(Msg, St) -> - {stop, {invalid_info, Msg}, St}. - - -code_change(_OldVsn, St, _Extra) -> - {ok, St}. - - -map_doc(#st{indexes=Indexes}, Doc) -> - lists:map(fun(Idx) -> get_index_entries(Idx, Doc) end, Indexes). - - -index_doc(#st{indexes=Indexes}, Doc) -> - lists:map(fun(Idx) -> get_text_entries(Idx, Doc) end, Indexes). - - -get_index_entries({IdxProps}, Doc) -> - {Fields} = couch_util:get_value(<<"fields">>, IdxProps), - Selector = get_index_partial_filter_selector(IdxProps), - case should_index(Selector, Doc) of - false -> - []; - true -> - Values = get_index_values(Fields, Doc), - case lists:member(not_found, Values) of - true -> []; - false -> [[Values, null]] - end - end. - - -get_index_values(Fields, Doc) -> - lists:map(fun({Field, _Dir}) -> - case mango_doc:get_field(Doc, Field) of - not_found -> not_found; - bad_path -> not_found; - Value -> Value - end - end, Fields). - - -get_text_entries({IdxProps}, Doc) -> - Selector = get_index_partial_filter_selector(IdxProps), - case should_index(Selector, Doc) of - true -> - get_text_entries0(IdxProps, Doc); - false -> - [] - end. - - -get_index_partial_filter_selector(IdxProps) -> - case couch_util:get_value(<<"partial_filter_selector">>, IdxProps, {[]}) of - {[]} -> - % this is to support legacy text indexes that had the partial_filter_selector - % set as selector - couch_util:get_value(<<"selector">>, IdxProps, {[]}); - Else -> - Else - end. - - -get_text_entries0(IdxProps, Doc) -> - DefaultEnabled = get_default_enabled(IdxProps), - IndexArrayLengths = get_index_array_lengths(IdxProps), - FieldsList = get_text_field_list(IdxProps), - TAcc = #tacc{ - index_array_lengths = IndexArrayLengths, - fields = FieldsList - }, - Fields0 = get_text_field_values(Doc, TAcc), - Fields = if not DefaultEnabled -> Fields0; true -> - add_default_text_field(Fields0) - end, - FieldNames = get_field_names(Fields), - Converted = convert_text_fields(Fields), - FieldNames ++ Converted. - - -get_text_field_values({Props}, TAcc) when is_list(Props) -> - get_text_field_values_obj(Props, TAcc, []); - -get_text_field_values(Values, TAcc) when is_list(Values) -> - IndexArrayLengths = TAcc#tacc.index_array_lengths, - NewPath = ["[]" | TAcc#tacc.path], - NewTAcc = TAcc#tacc{path = NewPath}, - case IndexArrayLengths of - true -> - % We bypass make_text_field and directly call make_text_field_name - % because the length field name is not part of the path. - LengthFieldName = make_text_field_name(NewTAcc#tacc.path, <<"length">>), - LengthField = [{LengthFieldName, <<"length">>, length(Values)}], - get_text_field_values_arr(Values, NewTAcc, LengthField); - _ -> - get_text_field_values_arr(Values, NewTAcc, []) - end; - -get_text_field_values(Bin, TAcc) when is_binary(Bin) -> - make_text_field(TAcc, <<"string">>, Bin); - -get_text_field_values(Num, TAcc) when is_number(Num) -> - make_text_field(TAcc, <<"number">>, Num); - -get_text_field_values(Bool, TAcc) when is_boolean(Bool) -> - make_text_field(TAcc, <<"boolean">>, Bool); - -get_text_field_values(null, TAcc) -> - make_text_field(TAcc, <<"null">>, true). - - -get_text_field_values_obj([], _, FAcc) -> - FAcc; -get_text_field_values_obj([{Key, Val} | Rest], TAcc, FAcc) -> - NewPath = [Key | TAcc#tacc.path], - NewTAcc = TAcc#tacc{path = NewPath}, - Fields = get_text_field_values(Val, NewTAcc), - get_text_field_values_obj(Rest, TAcc, Fields ++ FAcc). - - -get_text_field_values_arr([], _, FAcc) -> - FAcc; -get_text_field_values_arr([Value | Rest], TAcc, FAcc) -> - Fields = get_text_field_values(Value, TAcc), - get_text_field_values_arr(Rest, TAcc, Fields ++ FAcc). - - -get_default_enabled(Props) -> - case couch_util:get_value(<<"default_field">>, Props, {[]}) of - Bool when is_boolean(Bool) -> - Bool; - {[]} -> - true; - {Opts}-> - couch_util:get_value(<<"enabled">>, Opts, true) - end. - - -get_index_array_lengths(Props) -> - couch_util:get_value(<<"index_array_lengths">>, Props, true). - - -add_default_text_field(Fields) -> - DefaultFields = add_default_text_field(Fields, []), - DefaultFields ++ Fields. - - -add_default_text_field([], Acc) -> - Acc; -add_default_text_field([{_Name, <<"string">>, Value} | Rest], Acc) -> - NewAcc = [{<<"$default">>, <<"string">>, Value} | Acc], - add_default_text_field(Rest, NewAcc); -add_default_text_field([_ | Rest], Acc) -> - add_default_text_field(Rest, Acc). - - -%% index of all field names -get_field_names(Fields) -> - FieldNameSet = lists:foldl(fun({Name, _, _}, Set) -> - gb_sets:add([<<"$fieldnames">>, Name, []], Set) - end, gb_sets:new(), Fields), - gb_sets:to_list(FieldNameSet). - - -convert_text_fields([]) -> - []; -convert_text_fields([{Name, _Type, Value} | Rest]) -> - [[Name, Value, []] | convert_text_fields(Rest)]. - - -should_index(Selector, Doc) -> - % We should do this - NormSelector = mango_selector:normalize(Selector), - Matches = mango_selector:match(NormSelector, Doc), - IsDesign = case mango_doc:get_field(Doc, <<"_id">>) of - <<"_design/", _/binary>> -> true; - _ -> false - end, - Matches and not IsDesign. - - -get_text_field_list(IdxProps) -> - case couch_util:get_value(<<"fields">>, IdxProps) of - Fields when is_list(Fields) -> - RawList = lists:flatmap(fun get_text_field_info/1, Fields), - [mango_util:lucene_escape_user(Field) || Field <- RawList]; - _ -> - all_fields - end. - - -get_text_field_info({Props}) -> - Name = couch_util:get_value(<<"name">>, Props), - Type0 = couch_util:get_value(<<"type">>, Props), - if not is_binary(Name) -> []; true -> - Type = get_text_field_type(Type0), - [iolist_to_binary([Name, ":", Type])] - end. - - -get_text_field_type(<<"number">>) -> - <<"number">>; -get_text_field_type(<<"boolean">>) -> - <<"boolean">>; -get_text_field_type(_) -> - <<"string">>. - - -make_text_field(TAcc, Type, Value) -> - FieldName = make_text_field_name(TAcc#tacc.path, Type), - Fields = TAcc#tacc.fields, - case Fields == all_fields orelse lists:member(FieldName, Fields) of - true -> - [{FieldName, Type, Value}]; - false -> - [] - end. - - -make_text_field_name([P | Rest], Type) -> - Parts = lists:reverse(Rest, [iolist_to_binary([P, ":", Type])]), - Escaped = [mango_util:lucene_escape_field(N) || N <- Parts], - iolist_to_binary(mango_util:join(".", Escaped)). - - -validate_index_info(IndexInfo) -> - IdxTypes = case clouseau_rpc:connected() of - true -> - [mango_idx_view, mango_idx_text]; - false -> - [mango_idx_view] - end, - Results = lists:foldl(fun(IdxType, Results0) -> - try - IdxType:validate_index_def(IndexInfo), - [valid_index | Results0] - catch _:_ -> - [invalid_index | Results0] - end - end, [], IdxTypes), - lists:member(valid_index, Results). - - --ifdef(TEST). - --include_lib("eunit/include/eunit.hrl"). - -handle_garbage_collect_cast_test() -> - ?assertEqual({noreply, []}, handle_cast(garbage_collect, [])). - -handle_stop_cast_test() -> - ?assertEqual({stop, normal, []}, handle_cast(stop, [])). - -handle_invalid_cast_test() -> - ?assertEqual({stop, {invalid_cast, random}, []}, handle_cast(random, [])). - --endif. diff --git a/src/mango/src/mango_opts.erl b/src/mango/src/mango_opts.erl index 92c07f743..7bae9c90d 100644 --- a/src/mango/src/mango_opts.erl +++ b/src/mango/src/mango_opts.erl @@ -34,7 +34,6 @@ validate_sort/1, validate_fields/1, validate_bulk_delete/1, - validate_partitioned/1, default_limit/0 ]). @@ -71,12 +70,6 @@ validate_idx_create({Props}) -> {optional, true}, {default, 2}, {validator, fun is_pos_integer/1} - ]}, - {<<"partitioned">>, [ - {tag, partitioned}, - {optional, true}, - {default, db_default}, - {validator, fun validate_partitioned/1} ]} ], validate(Props, Opts). @@ -124,12 +117,6 @@ validate_find({Props}) -> {default, []}, {validator, fun validate_fields/1} ]}, - {<<"partition">>, [ - {tag, partition}, - {optional, true}, - {default, <<>>}, - {validator, fun validate_partition/1} - ]}, {<<"r">>, [ {tag, r}, {optional, true}, @@ -309,23 +296,6 @@ validate_fields(Value) -> mango_fields:new(Value). -validate_partitioned(true) -> - {ok, true}; -validate_partitioned(false) -> - {ok, false}; -validate_partitioned(db_default) -> - {ok, db_default}; -validate_partitioned(Else) -> - ?MANGO_ERROR({invalid_partitioned_value, Else}). - - -validate_partition(<<>>) -> - {ok, <<>>}; -validate_partition(Partition) -> - couch_partition:validate_partition(Partition), - {ok, Partition}. - - validate_opts([], Props, Acc) -> {Props, lists:reverse(Acc)}; validate_opts([{Name, Desc} | Rest], Props, Acc) -> diff --git a/src/mango/src/mango_plugin.erl b/src/mango/src/mango_plugin.erl new file mode 100644 index 000000000..de23f8e7c --- /dev/null +++ b/src/mango/src/mango_plugin.erl @@ -0,0 +1,46 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mango_plugin). + +-export([ + before_find/1, + after_find/3 +]). + +-define(SERVICE_ID, mango). + +%% ------------------------------------------------------------------ +%% API Function Definitions +%% ------------------------------------------------------------------ + +before_find(HttpReq0) -> + [HttpReq1] = with_pipe(before_find, [HttpReq0]), + {ok, HttpReq1}. + + +after_find(HttpReq, HttpResp, Arg0) -> + [_HttpReq, _HttpResp, Arg1] = with_pipe(after_find, [HttpReq, HttpResp, Arg0]), + {ok, Arg1}. + + +%% ------------------------------------------------------------------ +%% Internal Function Definitions +%% ------------------------------------------------------------------ + +with_pipe(Func, Args) -> + do_apply(Func, Args, [pipe]). + + +do_apply(Func, Args, Opts) -> + Handle = couch_epi:get_handle(?SERVICE_ID), + couch_epi:apply(Handle, ?SERVICE_ID, Func, Args, Opts). diff --git a/src/mango/src/mango_selector.erl b/src/mango/src/mango_selector.erl index 3ea83c220..fc6a6d1a7 100644 --- a/src/mango/src/mango_selector.erl +++ b/src/mango/src/mango_selector.erl @@ -138,6 +138,11 @@ norm_ops({[{<<"$allMatch">>, {_}=Arg}]}) -> norm_ops({[{<<"$allMatch">>, Arg}]}) -> ?MANGO_ERROR({bad_arg, '$allMatch', Arg}); +norm_ops({[{<<"$keyMapMatch">>, {_}=Arg}]}) -> + {[{<<"$keyMapMatch">>, norm_ops(Arg)}]}; +norm_ops({[{<<"$keyMapMatch">>, Arg}]}) -> + ?MANGO_ERROR({bad_arg, '$keyMapMatch', Arg}); + norm_ops({[{<<"$size">>, Arg}]}) when is_integer(Arg), Arg >= 0 -> {[{<<"$size">>, Arg}]}; norm_ops({[{<<"$size">>, Arg}]}) -> @@ -253,6 +258,10 @@ norm_fields({[{<<"$allMatch">>, Arg}]}, Path) -> Cond = {[{<<"$allMatch">>, norm_fields(Arg)}]}, {[{Path, Cond}]}; +norm_fields({[{<<"$keyMapMatch">>, Arg}]}, Path) -> + Cond = {[{<<"$keyMapMatch">>, norm_fields(Arg)}]}, + {[{Path, Cond}]}; + % The text operator operates against the internal % $default field. This also asserts that the $default @@ -334,6 +343,9 @@ norm_negations({[{<<"$elemMatch">>, Arg}]}) -> norm_negations({[{<<"$allMatch">>, Arg}]}) -> {[{<<"$allMatch">>, norm_negations(Arg)}]}; +norm_negations({[{<<"$keyMapMatch">>, Arg}]}) -> + {[{<<"$keyMapMatch">>, norm_negations(Arg)}]}; + % All other conditions can't introduce negations anywhere % further down the operator tree. norm_negations(Cond) -> @@ -421,7 +433,7 @@ match({[{<<"$not">>, Arg}]}, Value, Cmp) -> not match(Arg, Value, Cmp); match({[{<<"$all">>, []}]}, _, _) -> - true; + false; % All of the values in Args must exist in Values or % Values == hd(Args) if Args is a single element list % that contains a list. @@ -491,6 +503,26 @@ match({[{<<"$allMatch">>, Arg}]}, [_ | _] = Values, Cmp) -> match({[{<<"$allMatch">>, _Arg}]}, _Value, _Cmp) -> false; +% Matches when any key in the map value matches the +% sub-selector Arg. +match({[{<<"$keyMapMatch">>, Arg}]}, Value, Cmp) when is_tuple(Value) -> + try + lists:foreach(fun(V) -> + case match(Arg, V, Cmp) of + true -> throw(matched); + _ -> ok + end + end, [Key || {Key, _} <- element(1, Value)]), + false + catch + throw:matched -> + true; + _:_ -> + false + end; +match({[{<<"$keyMapMatch">>, _Arg}]}, _Value, _Cmp) -> + false; + % Our comparison operators are fairly straight forward match({[{<<"$lt">>, Arg}]}, Value, Cmp) -> Cmp(Value, Arg) < 0; @@ -506,7 +538,7 @@ match({[{<<"$gt">>, Arg}]}, Value, Cmp) -> Cmp(Value, Arg) > 0; match({[{<<"$in">>, []}]}, _, _) -> - true; + false; match({[{<<"$in">>, Args}]}, Values, Cmp) when is_list(Values)-> Pred = fun(Arg) -> lists:foldl(fun(Value,Match) -> diff --git a/src/mango/src/mango_util.erl b/src/mango/src/mango_util.erl index 0d31f15f9..d649f95f1 100644 --- a/src/mango/src/mango_util.erl +++ b/src/mango/src/mango_util.erl @@ -15,13 +15,9 @@ -export([ open_doc/2, - open_ddocs/1, load_ddoc/2, load_ddoc/3, - defer/3, - do_defer/3, - assert_ejson/1, to_lower/1, @@ -85,23 +81,7 @@ open_doc(Db, DocId) -> open_doc(Db, DocId, Options) -> - case mango_util:defer(fabric, open_doc, [Db, DocId, Options]) of - {ok, Doc} -> - {ok, Doc}; - {not_found, _} -> - not_found; - _ -> - ?MANGO_ERROR({error_loading_doc, DocId}) - end. - - -open_ddocs(Db) -> - case mango_util:defer(fabric, design_docs, [Db]) of - {ok, Docs} -> - {ok, Docs}; - _ -> - ?MANGO_ERROR(error_loading_ddocs) - end. + fabric2_db:open_doc(Db, DocId, Options). load_ddoc(Db, DDocId) -> @@ -111,7 +91,7 @@ load_ddoc(Db, DDocId, DbOpts) -> case open_doc(Db, DDocId, DbOpts) of {ok, Doc} -> {ok, check_lang(Doc)}; - not_found -> + {not_found, missing} -> Body = {[ {<<"language">>, <<"query">>} ]}, @@ -119,40 +99,6 @@ load_ddoc(Db, DDocId, DbOpts) -> end. -defer(Mod, Fun, Args) -> - {Pid, Ref} = erlang:spawn_monitor(?MODULE, do_defer, [Mod, Fun, Args]), - receive - {'DOWN', Ref, process, Pid, {mango_defer_ok, Value}} -> - Value; - {'DOWN', Ref, process, Pid, {mango_defer_throw, Value}} -> - erlang:throw(Value); - {'DOWN', Ref, process, Pid, {mango_defer_error, Value}} -> - erlang:error(Value); - {'DOWN', Ref, process, Pid, {mango_defer_exit, Value}} -> - erlang:exit(Value) - end. - - -do_defer(Mod, Fun, Args) -> - try erlang:apply(Mod, Fun, Args) of - Resp -> - erlang:exit({mango_defer_ok, Resp}) - catch - throw:Error -> - Stack = erlang:get_stacktrace(), - couch_log:error("Defered error: ~w~n ~p", [{throw, Error}, Stack]), - erlang:exit({mango_defer_throw, Error}); - error:Error -> - Stack = erlang:get_stacktrace(), - couch_log:error("Defered error: ~w~n ~p", [{error, Error}, Stack]), - erlang:exit({mango_defer_error, Error}); - exit:Error -> - Stack = erlang:get_stacktrace(), - couch_log:error("Defered error: ~w~n ~p", [{exit, Error}, Stack]), - erlang:exit({mango_defer_exit, Error}) - end. - - assert_ejson({Props}) -> assert_ejson_obj(Props); assert_ejson(Vals) when is_list(Vals) -> diff --git a/src/mango/test/01-index-crud-test.py b/src/mango/test/01-index-crud-test.py index b60239992..13ae300dd 100644 --- a/src/mango/test/01-index-crud-test.py +++ b/src/mango/test/01-index-crud-test.py @@ -113,6 +113,21 @@ class IndexCrudTests(mango.DbPerClass): return raise AssertionError("index not created") + def test_ignore_design_docs(self): + fields = ["baz", "foo"] + ret = self.db.create_index(fields, name="idx_02") + assert ret is True + self.db.save_doc({ + "_id": "_design/ignore", + "views": { + "view1": { + "map": "function (doc) { emit(doc._id, 1)}" + } + } + }) + indexes = self.db.list_indexes() + self.assertEqual(len(indexes), 2) + def test_read_idx_doc(self): self.db.create_index(["foo", "bar"], name="idx_01") self.db.create_index(["hello", "bar"]) diff --git a/src/mango/test/02-basic-find-test.py b/src/mango/test/02-basic-find-test.py index afdba03a2..2a03a3a55 100644 --- a/src/mango/test/02-basic-find-test.py +++ b/src/mango/test/02-basic-find-test.py @@ -100,16 +100,6 @@ class BasicFindTests(mango.UserDocsTests): else: raise AssertionError("bad find") - def test_bad_r(self): - bad_rs = ([None, True, False, 1.2, "no limit!", {"foo": "bar"}, [2]],) - for br in bad_rs: - try: - self.db.find({"int": {"$gt": 2}}, r=br) - except Exception as e: - assert e.response.status_code == 400 - else: - raise AssertionError("bad find") - def test_bad_conflicts(self): bad_conflicts = ([None, 1.2, "no limit!", {"foo": "bar"}, [2]],) for bc in bad_conflicts: @@ -262,11 +252,6 @@ class BasicFindTests(mango.UserDocsTests): assert sorted(d.keys()) == ["location", "user_id"] assert sorted(d["location"].keys()) == ["address"] - def test_r(self): - for r in [1, 2, 3]: - docs = self.db.find({"age": {"$gt": 0}}, r=r) - assert len(docs) == 15 - def test_empty(self): docs = self.db.find({}) # 15 users diff --git a/src/mango/test/03-operator-test.py b/src/mango/test/03-operator-test.py index 935f470bb..a67ef91f3 100644 --- a/src/mango/test/03-operator-test.py +++ b/src/mango/test/03-operator-test.py @@ -66,6 +66,15 @@ class OperatorTests: docs = self.db.find({"emptybang": {"$allMatch": {"foo": {"$eq": 2}}}}) self.assertEqual(len(docs), 0) + def test_keymap_match(self): + amdocs = [ + {"foo": {"aa": "bar", "bb": "bang"}}, + {"foo": {"cc": "bar", "bb": "bang"}}, + ] + self.db.save_docs(amdocs, w=3) + docs = self.db.find({"foo": {"$keyMapMatch": {"$eq": "aa"}}}) + self.assertEqual(len(docs), 1) + def test_in_operator_array(self): docs = self.db.find({"manager": True, "favorites": {"$in": ["Ruby", "Python"]}}) self.assertUserIds([2, 6, 7, 9, 11, 12], docs) diff --git a/src/mango/test/05-index-selection-test.py b/src/mango/test/05-index-selection-test.py index cb4d32986..bae3d58f1 100644 --- a/src/mango/test/05-index-selection-test.py +++ b/src/mango/test/05-index-selection-test.py @@ -14,6 +14,8 @@ import mango import user_docs import unittest +import requests + class IndexSelectionTests: def test_basic(self): @@ -201,8 +203,11 @@ class IndexSelectionTests: } }, } - with self.assertRaises(KeyError): + try: self.db.save_doc(design_doc) + assert False, "Should not get here." + except requests.exceptions.HTTPError as e: + self.assertEqual(e.response.json()['error'], 'invalid_design_doc') def test_explain_sort_reverse(self): selector = {"manager": {"$gt": None}} diff --git a/src/mango/test/08-text-limit-test.py b/src/mango/test/08-text-limit-test.py index ae827813d..ef0509ff3 100644 --- a/src/mango/test/08-text-limit-test.py +++ b/src/mango/test/08-text-limit-test.py @@ -133,3 +133,13 @@ class LimitTests(mango.LimitDocsTextTests): assert json["bookmark"] != bm bm = json["bookmark"] assert len(seen_docs) == len(limit_docs.DOCS) + + def run_explain_check(self, size): + q = {"age": {"$gt": 0}} + seen_docs = set() + bm = None + results1 = self.db.find(q, limit=size, bookmark=bm, return_raw=True) + assert results1["bookmark"] != bm + bm = results1["bookmark"] + results2 = self.db.find(q, limit=size, bookmark=bm, explain=True) + assert results2["bookmark"] == bm diff --git a/src/mango/test/12-use-correct-index-test.py b/src/mango/test/12-use-correct-index-test.py index c21ad6c5e..a7f07b5e8 100644 --- a/src/mango/test/12-use-correct-index-test.py +++ b/src/mango/test/12-use-correct-index-test.py @@ -54,36 +54,41 @@ class ChooseCorrectIndexForDocs(mango.DbPerClass): self.db.save_docs(copy.deepcopy(DOCS)) def test_choose_index_with_one_field_in_index(self): - self.db.create_index(["name", "age", "user_id"], ddoc="aaa") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "age", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie"}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/zzz") def test_choose_index_with_two(self): - self.db.create_index(["name", "age", "user_id"], ddoc="aaa") - self.db.create_index(["name", "age"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "age", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name", "age"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "age": {"$gte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/bbb") def test_choose_index_alphabetically(self): - self.db.create_index(["name"], ddoc="aaa") - self.db.create_index(["name"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "age": {"$gte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/aaa") def test_choose_index_most_accurate(self): - self.db.create_index(["name", "age", "user_id"], ddoc="aaa") - self.db.create_index(["name", "age"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "age", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name", "age"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "age": {"$gte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/bbb") def test_choose_index_most_accurate_in_memory_selector(self): - self.db.create_index(["name", "location", "user_id"], ddoc="aaa") - self.db.create_index(["name", "age", "user_id"], ddoc="bbb") - self.db.create_index(["name"], ddoc="zzz") + self.db.create_index(["name", "location", "user_id"], ddoc="aaa", wait_for_built_index=False) + self.db.create_index(["name", "age", "user_id"], ddoc="bbb", wait_for_built_index=False) + self.db.create_index(["name"], ddoc="zzz", wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find({"name": "Eddie", "number": {"$lte": 12}}, explain=True) self.assertEqual(explain["index"]["ddoc"], "_design/zzz") @@ -100,8 +105,9 @@ class ChooseCorrectIndexForDocs(mango.DbPerClass): def test_chooses_idxA(self): DOCS2 = [{"a": 1, "b": 1, "c": 1}, {"a": 1000, "d": 1000, "e": 1000}] self.db.save_docs(copy.deepcopy(DOCS2)) - self.db.create_index(["a", "b", "c"]) - self.db.create_index(["a", "d", "e"]) + self.db.create_index(["a", "b", "c"], wait_for_built_index=False) + self.db.create_index(["a", "d", "e"], wait_for_built_index=False) + self.db.wait_for_built_indexes() explain = self.db.find( {"a": {"$gt": 0}, "b": {"$gt": 0}, "c": {"$gt": 0}}, explain=True ) diff --git a/src/mango/test/13-stable-update-test.py b/src/mango/test/13-stable-update-test.py deleted file mode 100644 index 303f3fab1..000000000 --- a/src/mango/test/13-stable-update-test.py +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -import copy -import mango - -DOCS1 = [ - { - "_id": "54af50626de419f5109c962f", - "user_id": 0, - "age": 10, - "name": "Jimi", - "location": "UK", - "number": 4, - }, - { - "_id": "54af50622071121b25402dc3", - "user_id": 1, - "age": 12, - "name": "Eddie", - "location": "ZAR", - "number": 2, - }, -] - - -class SupportStableAndUpdate(mango.DbPerClass): - def setUp(self): - self.db.recreate() - # Hack to prevent auto-indexer from foiling update=False test - # https://github.com/apache/couchdb/issues/2313 - self.db.save_doc( - {"_id": "_design/foo", "language": "query", "autoupdate": False} - ) - self.db.create_index(["name"], ddoc="foo") - self.db.save_docs(copy.deepcopy(DOCS1)) - - def test_update_updates_view_when_specified(self): - docs = self.db.find({"name": "Eddie"}, update=False) - assert len(docs) == 0 - docs = self.db.find({"name": "Eddie"}, update=True) - assert len(docs) == 1 diff --git a/src/mango/test/13-users-db-find-test.py b/src/mango/test/13-users-db-find-test.py index 73d15ea1a..9f9b53a81 100644 --- a/src/mango/test/13-users-db-find-test.py +++ b/src/mango/test/13-users-db-find-test.py @@ -12,10 +12,15 @@ # the License. -import mango, requests +import mango, requests, unittest +# Re-enable once the _users db is implemented class UsersDbFindTests(mango.UsersDbTests): + @classmethod + def setUpClass(klass): + raise unittest.SkipTest("Re-enable once the _users db is implemented") + def test_simple_find(self): docs = self.db.find({"name": {"$eq": "demo02"}}) assert len(docs) == 1 diff --git a/src/mango/test/15-execution-stats-test.py b/src/mango/test/15-execution-stats-test.py index 537a19add..6ccc04b44 100644 --- a/src/mango/test/15-execution-stats-test.py +++ b/src/mango/test/15-execution-stats-test.py @@ -22,7 +22,6 @@ class ExecutionStatsTests(mango.UserDocsTests): self.assertEqual(len(resp["docs"]), 3) self.assertEqual(resp["execution_stats"]["total_keys_examined"], 0) self.assertEqual(resp["execution_stats"]["total_docs_examined"], 3) - self.assertEqual(resp["execution_stats"]["total_quorum_docs_examined"], 0) self.assertEqual(resp["execution_stats"]["results_returned"], 3) # See https://github.com/apache/couchdb/issues/1732 # Erlang os:timestamp() only has ms accuracy on Windows! @@ -35,12 +34,11 @@ class ExecutionStatsTests(mango.UserDocsTests): def test_quorum_json_index(self): resp = self.db.find( - {"age": {"$lt": 35}}, return_raw=True, r=3, executionStats=True + {"age": {"$lt": 35}}, return_raw=True, executionStats=True ) self.assertEqual(len(resp["docs"]), 3) self.assertEqual(resp["execution_stats"]["total_keys_examined"], 0) - self.assertEqual(resp["execution_stats"]["total_docs_examined"], 0) - self.assertEqual(resp["execution_stats"]["total_quorum_docs_examined"], 3) + self.assertEqual(resp["execution_stats"]["total_docs_examined"], 3) self.assertEqual(resp["execution_stats"]["results_returned"], 3) # See https://github.com/apache/couchdb/issues/1732 # Erlang os:timestamp() only has ms accuracy on Windows! @@ -70,7 +68,6 @@ class ExecutionStatsTests_Text(mango.UserDocsTextTests): self.assertEqual(len(resp["docs"]), 1) self.assertEqual(resp["execution_stats"]["total_keys_examined"], 0) self.assertEqual(resp["execution_stats"]["total_docs_examined"], 1) - self.assertEqual(resp["execution_stats"]["total_quorum_docs_examined"], 0) self.assertEqual(resp["execution_stats"]["results_returned"], 1) self.assertGreater(resp["execution_stats"]["execution_time_ms"], 0) diff --git a/src/mango/test/16-index-selectors-test.py b/src/mango/test/16-index-selectors-test.py index 4510065f5..cde8438fc 100644 --- a/src/mango/test/16-index-selectors-test.py +++ b/src/mango/test/16-index-selectors-test.py @@ -246,6 +246,14 @@ class IndexSelectorJson(mango.DbPerClass): docs = self.db.find(selector, use_index="oldschooltext") self.assertEqual(len(docs), 3) + def test_text_old_index_not_used(self): + selector = {"location": {"$gte": "FRA"}} + self.db.save_doc(oldschoolddoctext) + resp = self.db.find(selector, explain=True) + self.assertEqual(resp["index"]["name"], "_all_docs") + docs = self.db.find(selector) + self.assertEqual(len(docs), 3) + @unittest.skipUnless(mango.has_text_service(), "requires text service") def test_text_old_selector_still_supported_via_api(self): selector = {"location": {"$gte": "FRA"}} diff --git a/src/mango/test/17-multi-type-value-test.py b/src/mango/test/17-multi-type-value-test.py index 21e7afda4..5a8fcedef 100644 --- a/src/mango/test/17-multi-type-value-test.py +++ b/src/mango/test/17-multi-type-value-test.py @@ -53,9 +53,9 @@ class MultiValueFieldTests: class MultiValueFieldJSONTests(mango.DbPerClass, MultiValueFieldTests): def setUp(self): self.db.recreate() + self.db.create_index(["name"], wait_for_built_index=False) + self.db.create_index(["age", "name"], wait_for_built_index=True) self.db.save_docs(copy.deepcopy(DOCS)) - self.db.create_index(["name"]) - self.db.create_index(["age", "name"]) # @unittest.skipUnless(mango.has_text_service(), "requires text service") diff --git a/src/mango/test/19-find-conflicts.py b/src/mango/test/19-find-conflicts.py index bf865d6ea..3bf3c0693 100644 --- a/src/mango/test/19-find-conflicts.py +++ b/src/mango/test/19-find-conflicts.py @@ -12,11 +12,12 @@ import mango import copy +import unittest -DOC = [{"_id": "doc", "a": 2}] +DOC = [{"_id": "doc", "a": 2}, {"_id": "doc1", "b": 2}] CONFLICT = [{"_id": "doc", "_rev": "1-23202479633c2b380f79507a776743d5", "a": 1}] - +CONFLICT2 = [{"_id": "doc1", "_rev": "1-23202479633c2b380f79507a776743d5", "b": 1}] class ChooseCorrectIndexForDocs(mango.DbPerClass): def setUp(self): @@ -25,7 +26,7 @@ class ChooseCorrectIndexForDocs(mango.DbPerClass): self.db.save_docs_with_conflicts(copy.deepcopy(CONFLICT)) def test_retrieve_conflicts(self): - self.db.create_index(["_conflicts"]) + self.db.create_index(["_conflicts"], wait_for_built_index=False) result = self.db.find({"_conflicts": {"$exists": True}}, conflicts=True) self.assertEqual( result[0]["_conflicts"][0], "1-23202479633c2b380f79507a776743d5" diff --git a/src/mango/test/20-no-timeout-test.py b/src/mango/test/20-no-timeout-test.py deleted file mode 100644 index cffdfc335..000000000 --- a/src/mango/test/20-no-timeout-test.py +++ /dev/null @@ -1,32 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -import mango -import copy -import unittest - - -class LongRunningMangoTest(mango.DbPerClass): - def setUp(self): - self.db.recreate() - docs = [] - for i in range(100000): - docs.append({"_id": str(i), "another": "field"}) - if i % 20000 == 0: - self.db.save_docs(docs) - docs = [] - - # This test should run to completion and not timeout - def test_query_does_not_time_out(self): - selector = {"_id": {"$gt": 0}, "another": "wrong"} - docs = self.db.find(selector) - self.assertEqual(len(docs), 0) diff --git a/src/mango/test/21-empty-selector-tests.py b/src/mango/test/21-empty-selector-tests.py index beb222c85..8fd76fcd5 100644 --- a/src/mango/test/21-empty-selector-tests.py +++ b/src/mango/test/21-empty-selector-tests.py @@ -35,14 +35,32 @@ def make_empty_selector_suite(klass): docs = self.db.find({"age": 22, "$or": []}) assert len(docs) == 1 + def test_empty_array_in_with_age(self): + resp = self.db.find({"age": 22, "company": {"$in": []}}, explain=True) + self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) + docs = self.db.find({"age": 22, "company": {"$in": []}}) + assert len(docs) == 0 + def test_empty_array_and_with_age(self): - resp = self.db.find( - {"age": 22, "$and": [{"b": {"$all": []}}]}, explain=True - ) + resp = self.db.find({"age": 22, "$and": []}, explain=True) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) docs = self.db.find({"age": 22, "$and": []}) assert len(docs) == 1 + def test_empty_array_all_age(self): + resp = self.db.find({"age": 22, "company": {"$all": []}}, explain=True) + self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) + docs = self.db.find({"age": 22, "company": {"$all": []}}) + assert len(docs) == 0 + + def test_empty_array_nested_all_with_age(self): + resp = self.db.find( + {"age": 22, "$and": [{"company": {"$all": []}}]}, explain=True + ) + self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) + docs = self.db.find({"age": 22, "$and": [{"company": {"$all": []}}]}) + assert len(docs) == 0 + def test_empty_arrays_complex(self): resp = self.db.find({"$or": [], "a": {"$in": []}}, explain=True) self.assertEqual(resp["index"]["type"], klass.INDEX_TYPE) diff --git a/src/mango/test/22-build-wait-selected-index.py b/src/mango/test/22-build-wait-selected-index.py new file mode 100644 index 000000000..fd856f4d6 --- /dev/null +++ b/src/mango/test/22-build-wait-selected-index.py @@ -0,0 +1,50 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import mango +import copy +import unittest + + +class BuildAndWaitOnSelectedIndex(mango.DbPerClass): + def setUp(self): + self.db.recreate() + docs = [] + for i in range(1000): + docs.append({"_id": str(i), "val": i}) + if len(docs) == 250: + self.db.save_docs(docs) + docs = [] + + def test_wait_for_query(self): + self.db.create_index(["val"], ddoc="my-ddoc", wait_for_built_index=False) + + explain = self.db.find({'val': {"$gt": 990}}, use_index="my-ddoc", explain=True) + self.assertEqual(explain["index"]["ddoc"], "_design/my-ddoc") + + docs = self.db.find({'val': {"$gte": 990}}, limit=10) + + self.assertEqual(len(docs), 10) + + def test_dont_wait(self): + self.db.create_index(["val"], ddoc="my-ddoc", wait_for_built_index=False) + + explain = self.db.find({'val': {"$gt": 990}}, explain=True) + self.assertEqual(explain["index"]["name"], "_all_docs") + + docs = self.db.find({'val': {"$gte": 990}}) + self.assertEqual(len(docs), 10) + + def test_update_false(self): + self.db.create_index(["val"], ddoc="my-ddoc", wait_for_built_index=False) + docs = self.db.find({'val': {"$gte": 990}}, update=False, use_index="my-ddoc") + self.assertEqual(docs, []) diff --git a/src/mango/test/README.md b/src/mango/test/README.md index 509e32e47..9eae278b1 100644 --- a/src/mango/test/README.md +++ b/src/mango/test/README.md @@ -11,7 +11,7 @@ To run these, do this in the Mango top level directory: $ venv/bin/nosetests To run an individual test suite: - nosetests --nocapture test/12-use-correct-index.py + nosetests --nocapture test/12-use-correct-index.py To run the tests with text index support: MANGO_TEXT_INDEXES=1 nosetests --nocapture test @@ -22,8 +22,13 @@ Test configuration The following environment variables can be used to configure the test fixtures: - * `COUCH_HOST` - root url (including port) of the CouchDB instance to run the tests against. Default is `"http://127.0.0.1:15984"`. - * `COUCH_USER` - CouchDB username (with admin premissions). Default is `"testuser"`. - * `COUCH_PASSWORD` - CouchDB password. Default is `"testpass"`. - * `COUCH_AUTH_HEADER` - Optional Authorization header value. If specified, this is used instead of basic authentication with the username/password variables above. - * `MANGO_TEXT_INDEXES` - Set to `"1"` to run the tests only applicable to text indexes. + * `COUCH_HOST` - root url (including port) of the CouchDB instance to + run the tests against. Default is `"http://127.0.0.1:15984"`. + * `COUCH_USER` - CouchDB username (with admin premissions). Default + is `"adm"`. + * `COUCH_PASSWORD` - CouchDB password. Default is `"pass"`. + * `COUCH_AUTH_HEADER` - Optional Authorization header value. If + specified, this is used instead of basic authentication with the + username/password variables above. + * `MANGO_TEXT_INDEXES` - Set to `"1"` to run the tests only + applicable to text indexes. diff --git a/src/mango/test/mango.py b/src/mango/test/mango.py index 03cb85f48..05c4e65c4 100644 --- a/src/mango/test/mango.py +++ b/src/mango/test/mango.py @@ -48,8 +48,8 @@ class Database(object): dbname, host="127.0.0.1", port="15984", - user="testuser", - password="testpass", + user="adm", + password="pass", ): root_url = get_from_environment("COUCH_HOST", "http://{}:{}".format(host, port)) auth_header = get_from_environment("COUCH_AUTH_HEADER", None) @@ -139,8 +139,9 @@ class Database(object): ddoc=None, partial_filter_selector=None, selector=None, + wait_for_built_index=True, ): - body = {"index": {"fields": fields}, "type": idx_type, "w": 3} + body = {"index": {"fields": fields}, "type": idx_type} if name is not None: body["name"] = name if ddoc is not None: @@ -156,13 +157,22 @@ class Database(object): assert r.json()["name"] is not None created = r.json()["result"] == "created" - if created: - # wait until the database reports the index as available - while len(self.get_index(r.json()["id"], r.json()["name"])) < 1: - delay(t=0.1) + if created and wait_for_built_index: + # wait until the database reports the index as available and build + while True: + idx = self.get_index(r.json()["id"], r.json()["name"])[0] + if idx["build_status"] == "ready": + break + delay(t=0.2) return created + def wait_for_built_indexes(self): + while True: + if all(idx["build_status"] == "ready" for idx in self.list_indexes()): + break + delay(t=0.2) + def create_text_index( self, analyzer=None, @@ -244,7 +254,6 @@ class Database(object): skip=0, sort=None, fields=None, - r=1, conflicts=False, use_index=None, explain=False, @@ -258,7 +267,6 @@ class Database(object): "use_index": use_index, "limit": limit, "skip": skip, - "r": r, "conflicts": conflicts, } if sort is not None: @@ -299,6 +307,10 @@ class UsersDbTests(unittest.TestCase): klass.db = Database("_users") user_docs.setup_users(klass.db) + @classmethod + def tearDownClass(klass): + user_docs.teardown_users(klass.db) + def setUp(self): self.db = self.__class__.db @@ -309,6 +321,10 @@ class DbPerClass(unittest.TestCase): klass.db = Database(random_db_name()) klass.db.create(q=1, n=1) + @classmethod + def tearDownClass(klass): + klass.db.delete() + def setUp(self): self.db = self.__class__.db diff --git a/src/mango/test/user_docs.py b/src/mango/test/user_docs.py index 8f0ed2e04..c30198347 100644 --- a/src/mango/test/user_docs.py +++ b/src/mango/test/user_docs.py @@ -59,14 +59,17 @@ def setup_users(db, **kwargs): db.save_docs(copy.deepcopy(USERS_DOCS)) +def teardown_users(db): + [db.delete_doc(doc["_id"]) for doc in USERS_DOCS] + + def setup(db, index_type="view", **kwargs): db.recreate() - db.save_docs(copy.deepcopy(DOCS)) if index_type == "view": add_view_indexes(db, kwargs) elif index_type == "text": add_text_indexes(db, kwargs) - + db.save_docs(copy.deepcopy(DOCS)) def add_view_indexes(db, kwargs): indexes = [ @@ -90,7 +93,9 @@ def add_view_indexes(db, kwargs): (["ordered"], "ordered"), ] for (idx, name) in indexes: - assert db.create_index(idx, name=name, ddoc=name) is True + assert db.create_index(idx, name=name, ddoc=name, + wait_for_built_index=False) is True + db.wait_for_built_indexes() def add_text_indexes(db, kwargs): diff --git a/src/mem3/src/mem3_httpd_handlers.erl b/src/mem3/src/mem3_httpd_handlers.erl index 7dd6ab052..eeec1edf3 100644 --- a/src/mem3/src/mem3_httpd_handlers.erl +++ b/src/mem3/src/mem3_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(mem3_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(<<"_membership">>) -> fun mem3_httpd:handle_membership_req/1; url_handler(<<"_reshard">>) -> fun mem3_reshard_httpd:handle_reshard_req/1; @@ -23,3 +23,39 @@ db_handler(<<"_sync_shards">>) -> fun mem3_httpd:handle_sync_req/2; db_handler(_) -> no_match. design_handler(_) -> no_match. + +handler_info('GET', [<<"_membership">>], _) -> + {'cluster.membership.read', #{}}; + +handler_info('GET', [<<"_reshard">>], _) -> + {'reshard.summary.read', #{}}; + +handler_info('GET', [<<"_reshard">>, <<"state">>], _) -> + {'reshard.state.read', #{}}; + +handler_info('PUT', [<<"_reshard">>, <<"state">>], _) -> + {'reshard.state.write', #{}}; + +handler_info('GET', [<<"_reshard">>, <<"jobs">>], _) -> + {'reshard.jobs.read', #{}}; + +handler_info('POST', [<<"_reshard">>, <<"jobs">>], _) -> + {'reshard.jobs.create', #{}}; + +handler_info('GET', [<<"_reshard">>, <<"jobs">>, JobId], _) -> + {'reshard.job.read', #{'job.id' => JobId}}; + +handler_info('DELETE', [<<"_reshard">>, <<"jobs">>, JobId], _) -> + {'reshard.job.delete', #{'job.id' => JobId}}; + +handler_info('GET', [DbName, <<"_shards">>], _) -> + {'db.shards.read', #{'db.name' => DbName}}; + +handler_info('GET', [DbName, <<"_shards">>, DocId], _) -> + {'db.shards.read', #{'db.name' => DbName, 'doc.id' => DocId}}; + +handler_info('POST', [DbName, <<"_sync_shards">>], _) -> + {'db.shards.sync', #{'db.name' => DbName}}; + +handler_info(_, _, _) -> + no_match. diff --git a/src/mem3/src/mem3_reshard.erl b/src/mem3/src/mem3_reshard.erl index 620b1bc73..234670c34 100644 --- a/src/mem3/src/mem3_reshard.erl +++ b/src/mem3/src/mem3_reshard.erl @@ -213,11 +213,6 @@ reset_state() -> % Gen server functions init(_) -> - % Advertise resharding API feature only if it is not disabled - case is_disabled() of - true -> ok; - false -> config:enable_feature('reshard') - end, couch_log:notice("~p start init()", [?MODULE]), EtsOpts = [named_table, {keypos, #job.id}, {read_concurrency, true}], ?MODULE = ets:new(?MODULE, EtsOpts), diff --git a/src/mem3/src/mem3_reshard_dbdoc.erl b/src/mem3/src/mem3_reshard_dbdoc.erl index 7eb3e9f13..4a0a35c1f 100644 --- a/src/mem3/src/mem3_reshard_dbdoc.erl +++ b/src/mem3/src/mem3_reshard_dbdoc.erl @@ -146,9 +146,8 @@ replicate_to_all_nodes(TimeoutMSec) -> write_shard_doc(#doc{id = Id} = Doc, Body) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), UpdatedDoc = Doc#doc{body = Body}, - couch_util:with_db(DbName, fun(Db) -> + couch_util:with_db(mem3_sync:shards_db(), fun(Db) -> try {ok, _} = couch_db:update_doc(Db, UpdatedDoc, []) catch diff --git a/src/mem3/src/mem3_rpc.erl b/src/mem3/src/mem3_rpc.erl index 0991aa745..5d1c62c06 100644 --- a/src/mem3/src/mem3_rpc.erl +++ b/src/mem3/src/mem3_rpc.erl @@ -401,7 +401,7 @@ rexi_call(Node, MFA, Timeout) -> get_or_create_db(DbName, Options) -> - couch_db:open_int(DbName, [{create_if_missing, true} | Options]). + mem3_util:get_or_create_db(DbName, Options). -ifdef(TEST). diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl index 110e227dd..4f3323740 100644 --- a/src/mem3/src/mem3_shards.erl +++ b/src/mem3/src/mem3_shards.erl @@ -20,6 +20,7 @@ -export([handle_config_change/5, handle_config_terminate/3]). -export([start_link/0]). +-export([opts_for_db/1]). -export([for_db/1, for_db/2, for_docid/2, for_docid/3, get/3, local/1, fold/2]). -export([for_shard_range/1]). -export([set_max_size/1]). @@ -45,6 +46,15 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). +opts_for_db(DbName) -> + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), + case couch_db:open_doc(Db, DbName, [ejson_body]) of + {ok, #doc{body = {Props}}} -> + mem3_util:get_shard_opts(Props); + {not_found, _} -> + erlang:error(database_does_not_exist, ?b2l(DbName)) + end. + for_db(DbName) -> for_db(DbName, []). @@ -144,8 +154,7 @@ local(DbName) -> lists:filter(Pred, for_db(DbName)). fold(Fun, Acc) -> - DbName = config:get("mem3", "shards_db", "_dbs"), - {ok, Db} = mem3_util:ensure_exists(DbName), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), FAcc = {Db, Fun, Acc}, try {ok, LastAcc} = couch_db:fold_docs(Db, fun fold_fun/2, FAcc), @@ -309,15 +318,13 @@ fold_fun(#doc_info{}=DI, {Db, UFun, UAcc}) -> end. get_update_seq() -> - DbName = config:get("mem3", "shards_db", "_dbs"), - {ok, Db} = mem3_util:ensure_exists(DbName), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), Seq = couch_db:get_update_seq(Db), couch_db:close(Db), Seq. listen_for_changes(Since) -> - DbName = config:get("mem3", "shards_db", "_dbs"), - {ok, Db} = mem3_util:ensure_exists(DbName), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), Args = #changes_args{ feed = "continuous", since = Since, @@ -362,8 +369,7 @@ changes_callback(timeout, _) -> load_shards_from_disk(DbName) when is_binary(DbName) -> couch_stats:increment_counter([mem3, shard_cache, miss]), - X = ?l2b(config:get("mem3", "shards_db", "_dbs")), - {ok, Db} = mem3_util:ensure_exists(X), + {ok, Db} = mem3_util:ensure_exists(mem3_sync:shards_db()), try load_shards_from_db(Db, DbName) after diff --git a/src/mem3/src/mem3_sync_event_listener.erl b/src/mem3/src/mem3_sync_event_listener.erl index b6fbe3279..cad34225d 100644 --- a/src/mem3/src/mem3_sync_event_listener.erl +++ b/src/mem3/src/mem3_sync_event_listener.erl @@ -236,7 +236,7 @@ teardown_all(_) -> setup() -> {ok, Pid} = ?MODULE:start_link(), erlang:unlink(Pid), - meck:wait(config_notifier, subscribe, '_', 1000), + wait_config_subscribed(Pid), Pid. teardown(Pid) -> @@ -338,4 +338,16 @@ wait_state(Pid, Field, Val) when is_pid(Pid), is_integer(Field) -> end, test_util:wait(WaitFun). + +wait_config_subscribed(Pid) -> + WaitFun = fun() -> + Handlers = gen_event:which_handlers(config_event), + Pids = [Id || {config_notifier, Id} <- Handlers], + case lists:member(Pid, Pids) of + true -> true; + false -> wait + end + end, + test_util:wait(WaitFun). + -endif. diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index 3fc9b4f8e..28cb17778 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -14,8 +14,10 @@ -export([name_shard/2, create_partition_map/5, build_shards/2, n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, - shard_info/1, ensure_exists/1, open_db_doc/1]). + shard_info/1, ensure_exists/1, open_db_doc/1, get_or_create_db/2]). -export([is_deleted/1, rotate_list/2]). +-export([get_shard_opts/1, get_engine_opt/1, get_props_opt/1]). +-export([get_shard_props/1, find_dirty_shards/0]). -export([ iso8601_timestamp/0, live_nodes/0, @@ -87,13 +89,11 @@ attach_nodes([S | Rest], Acc, [Node | Nodes], UsedNodes) -> attach_nodes(Rest, [S#shard{node=Node} | Acc], Nodes, [Node | UsedNodes]). open_db_doc(DocId) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), + {ok, Db} = couch_db:open(mem3_sync:shards_db(), [?ADMIN_CTX]), try couch_db:open_doc(Db, DocId, [ejson_body]) after couch_db:close(Db) end. write_db_doc(Doc) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - write_db_doc(DbName, Doc, true). + write_db_doc(mem3_sync:shards_db(), Doc, true). write_db_doc(DbName, #doc{id=Id, body=Body} = Doc, ShouldMutate) -> {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), @@ -118,8 +118,7 @@ write_db_doc(DbName, #doc{id=Id, body=Body} = Doc, ShouldMutate) -> delete_db_doc(DocId) -> gen_server:cast(mem3_shards, {cache_remove, DocId}), - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), - delete_db_doc(DbName, DocId, true). + delete_db_doc(mem3_sync:shards_db(), DocId, true). delete_db_doc(DbName, DocId, ShouldMutate) -> {ok, Db} = couch_db:open(DbName, [?ADMIN_CTX]), @@ -324,7 +323,7 @@ live_nodes() -> % which could be a while. % replicate_dbs_to_all_nodes(Timeout) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), + DbName = mem3_sync:shards_db(), Targets= mem3_util:live_nodes() -- [node()], Res = [start_replication(node(), T, DbName, Timeout) || T <- Targets], collect_replication_results(Res, Timeout). @@ -335,7 +334,7 @@ replicate_dbs_to_all_nodes(Timeout) -> % them until they are all done. % replicate_dbs_from_all_nodes(Timeout) -> - DbName = ?l2b(config:get("mem3", "shards_db", "_dbs")), + DbName = mem3_sync:shards_db(), Sources = mem3_util:live_nodes() -- [node()], Res = [start_replication(S, node(), DbName, Timeout) || S <- Sources], collect_replication_results(Res, Timeout). @@ -509,6 +508,75 @@ sort_ranges_fun({B1, _}, {B2, _}) -> B1 =< B2. +get_or_create_db(DbName, Options) -> + case couch_db:open_int(DbName, Options) of + {ok, _} = OkDb -> + OkDb; + {not_found, no_db_file} -> + try + DbOpts = case mem3:dbname(DbName) of + DbName -> []; + MDbName -> mem3_shards:opts_for_db(MDbName) + end, + Options1 = [{create_if_missing, true} | Options], + Options2 = merge_opts(DbOpts, Options1), + couch_db:open_int(DbName, Options2) + catch error:database_does_not_exist -> + throw({error, missing_target}) + end; + Else -> + Else + end. + + +%% merge two proplists, atom options only valid in Old +merge_opts(New, Old) -> + lists:foldl(fun({Key, Val}, Acc) -> + lists:keystore(Key, 1, Acc, {Key, Val}) + end, Old, New). + + +get_shard_props(ShardName) -> + case couch_db:open_int(ShardName, []) of + {ok, Db} -> + Props = case couch_db_engine:get_props(Db) of + undefined -> []; + Else -> Else + end, + %% We don't normally store the default engine name + EngineProps = case couch_db_engine:get_engine(Db) of + couch_bt_engine -> + []; + EngineName -> + [{engine, EngineName}] + end, + [{props, Props} | EngineProps]; + {not_found, _} -> + not_found; + Else -> + Else + end. + + +find_dirty_shards() -> + mem3_shards:fold(fun(#shard{node=Node, name=Name, opts=Opts}=Shard, Acc) -> + case Opts of + [] -> + Acc; + [{props, []}] -> + Acc; + _ -> + Props = rpc:call(Node, ?MODULE, get_shard_props, [Name]), + case Props =:= Opts of + true -> + Acc; + false -> + [{Shard, Props} | Acc] + end + end + end, []). + + -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/setup/src/setup.erl b/src/setup/src/setup.erl index 3d23229b8..5129765da 100644 --- a/src/setup/src/setup.erl +++ b/src/setup/src/setup.erl @@ -165,7 +165,7 @@ enable_cluster_int(Options, false) -> couch_log:debug("Enable Cluster: ~p~n", [Options]). set_admin(Username, Password) -> - config:set("admins", binary_to_list(Username), binary_to_list(Password)). + config:set("admins", binary_to_list(Username), binary_to_list(Password), #{sensitive => true}). setup_node(NewCredentials, NewBindAddress, NodeCount, Port) -> case NewCredentials of @@ -198,6 +198,9 @@ setup_node(NewCredentials, NewBindAddress, NodeCount, Port) -> finish_cluster(Options) -> + % ensure that uuid is set + couch_server:get_uuid(), + ok = wait_connected(), ok = sync_admins(), ok = sync_uuid(), @@ -262,7 +265,7 @@ sync_config(Section, Key, Value) -> ok -> ok; error -> - log:error("~p sync_admin results ~p errors ~p", + couch_log:error("~p sync_admin results ~p errors ~p", [?MODULE, Results, Errors]), Reason = "Cluster setup unable to sync admin passwords", throw({setup_error, Reason}) diff --git a/src/setup/src/setup_httpd.erl b/src/setup/src/setup_httpd.erl index f4e05ce09..48b1b2a5a 100644 --- a/src/setup/src/setup_httpd.erl +++ b/src/setup/src/setup_httpd.erl @@ -19,7 +19,7 @@ handle_setup_req(#httpd{method='POST'}=Req) -> ok = chttpd:verify_is_server_admin(Req), couch_httpd:validate_ctype(Req, "application/json"), Setup = get_body(Req), - couch_log:notice("Setup: ~p~n", [Setup]), + couch_log:notice("Setup: ~p~n", [remove_sensitive(Setup)]), Action = binary_to_list(couch_util:get_value(<<"action">>, Setup, <<"missing">>)), case handle_action(Action, Setup) of ok -> @@ -31,24 +31,30 @@ handle_setup_req(#httpd{method='GET'}=Req) -> ok = chttpd:verify_is_server_admin(Req), Dbs = chttpd:qs_json_value(Req, "ensure_dbs_exist", setup:cluster_system_dbs()), couch_log:notice("Dbs: ~p~n", [Dbs]), - case erlang:list_to_integer(config:get("cluster", "n", undefined)) of - 1 -> - case setup:is_single_node_enabled(Dbs) of - false -> - chttpd:send_json(Req, 200, {[{state, single_node_disabled}]}); - true -> - chttpd:send_json(Req, 200, {[{state, single_node_enabled}]}) - end; + SingleNodeConfig = config:get_boolean("couchdb", "single_node", false), + case SingleNodeConfig of + true -> + chttpd:send_json(Req, 200, {[{state, single_node_enabled}]}); _ -> - case setup:is_cluster_enabled() of - false -> - chttpd:send_json(Req, 200, {[{state, cluster_disabled}]}); - true -> - case setup:has_cluster_system_dbs(Dbs) of + case config:get("cluster", "n", undefined) of + "1" -> + case setup:is_single_node_enabled(Dbs) of false -> - chttpd:send_json(Req, 200, {[{state, cluster_enabled}]}); + chttpd:send_json(Req, 200, {[{state, single_node_disabled}]}); true -> - chttpd:send_json(Req, 200, {[{state, cluster_finished}]}) + chttpd:send_json(Req, 200, {[{state, single_node_enabled}]}) + end; + _ -> + case setup:is_cluster_enabled() of + false -> + chttpd:send_json(Req, 200, {[{state, cluster_disabled}]}); + true -> + case setup:has_cluster_system_dbs(Dbs) of + false -> + chttpd:send_json(Req, 200, {[{state, cluster_enabled}]}); + true -> + chttpd:send_json(Req, 200, {[{state, cluster_finished}]}) + end end end end; @@ -85,7 +91,7 @@ handle_action("enable_cluster", Setup) -> handle_action("finish_cluster", Setup) -> - couch_log:notice("finish_cluster: ~p~n", [Setup]), + couch_log:notice("finish_cluster: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {ensure_dbs_exist, <<"ensure_dbs_exist">>} @@ -99,7 +105,7 @@ handle_action("finish_cluster", Setup) -> end; handle_action("enable_single_node", Setup) -> - couch_log:notice("enable_single_node: ~p~n", [Setup]), + couch_log:notice("enable_single_node: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {ensure_dbs_exist, <<"ensure_dbs_exist">>}, @@ -119,7 +125,7 @@ handle_action("enable_single_node", Setup) -> handle_action("add_node", Setup) -> - couch_log:notice("add_node: ~p~n", [Setup]), + couch_log:notice("add_node: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {username, <<"username">>}, @@ -141,10 +147,10 @@ handle_action("add_node", Setup) -> end; handle_action("remove_node", Setup) -> - couch_log:notice("remove_node: ~p~n", [Setup]); + couch_log:notice("remove_node: ~p~n", [remove_sensitive(Setup)]); handle_action("receive_cookie", Setup) -> - couch_log:notice("receive_cookie: ~p~n", [Setup]), + couch_log:notice("receive_cookie: ~p~n", [remove_sensitive(Setup)]), Options = get_options([ {cookie, <<"cookie">>} ], Setup), @@ -167,3 +173,8 @@ get_body(Req) -> couch_log:notice("Body Fail: ~p~n", [Else]), couch_httpd:send_error(Req, 400, <<"bad_request">>, <<"Missing JSON body'">>) end. + +remove_sensitive(KVList0) -> + KVList1 = lists:keyreplace(<<"username">>, 1, KVList0, {<<"username">>, <<"****">>}), + KVList2 = lists:keyreplace(<<"password">>, 1, KVList1, {<<"password">>, <<"****">>}), + KVList2.
\ No newline at end of file diff --git a/src/setup/src/setup_httpd_handlers.erl b/src/setup/src/setup_httpd_handlers.erl index 994c217e8..e26fbc3c4 100644 --- a/src/setup/src/setup_httpd_handlers.erl +++ b/src/setup/src/setup_httpd_handlers.erl @@ -12,7 +12,7 @@ -module(setup_httpd_handlers). --export([url_handler/1, db_handler/1, design_handler/1]). +-export([url_handler/1, db_handler/1, design_handler/1, handler_info/3]). url_handler(<<"_cluster_setup">>) -> fun setup_httpd:handle_setup_req/1; url_handler(_) -> no_match. @@ -20,3 +20,13 @@ url_handler(_) -> no_match. db_handler(_) -> no_match. design_handler(_) -> no_match. + + +handler_info('GET', [<<"_cluster_setup">>], _) -> + {'cluster_setup.read', #{}}; + +handler_info('POST', [<<"_cluster_setup">>], _) -> + {'cluster_setup.write', #{}}; + +handler_info(_, _, _) -> + no_match.
\ No newline at end of file diff --git a/src/smoosh/src/smoosh_channel.erl b/src/smoosh/src/smoosh_channel.erl index d8a8d14a9..2bc98be9d 100644 --- a/src/smoosh/src/smoosh_channel.erl +++ b/src/smoosh/src/smoosh_channel.erl @@ -122,10 +122,9 @@ handle_info({'DOWN', Ref, _, Job, Reason}, State0) -> #state{active=Active0, starting=Starting0} = State, case lists:keytake(Job, 2, Active0) of {value, {Key, _Pid}, Active1} -> - couch_log:warning("exit for compaction of ~p: ~p", [ - smoosh_utils:stringify(Key), Reason]), - {ok, _} = timer:apply_after(5000, smoosh_server, enqueue, [Key]), - {noreply, maybe_start_compaction(State#state{active=Active1})}; + State1 = maybe_remonitor_cpid(State#state{active=Active1}, Key, + Reason), + {noreply, maybe_start_compaction(State1)}; false -> case lists:keytake(Ref, 1, Starting0) of {value, {_, Key}, Starting1} -> @@ -281,8 +280,7 @@ start_compact(State, Db) -> Ref = erlang:monitor(process, DbPid), DbPid ! {'$gen_call', {self(), Ref}, start_compact}, State#state{starting=[{Ref, Key}|State#state.starting]}; - % database is still compacting so we can just monitor the existing - % compaction pid + % Compaction is already running, so monitor existing compaction pid. CPid -> couch_log:notice("Db ~s continuing compaction", [smoosh_utils:stringify(Key)]), @@ -293,6 +291,27 @@ start_compact(State, Db) -> false end. +maybe_remonitor_cpid(State, DbName, Reason) when is_binary(DbName) -> + {ok, Db} = couch_db:open_int(DbName, []), + case couch_db:get_compactor_pid_sync(Db) of + nil -> + couch_log:warning("exit for compaction of ~p: ~p", + [smoosh_utils:stringify(DbName), Reason]), + {ok, _} = timer:apply_after(5000, smoosh_server, enqueue, [DbName]), + State; + CPid -> + couch_log:notice("~s compaction already running. Re-monitor Pid ~p", + [smoosh_utils:stringify(DbName), CPid]), + erlang:monitor(process, CPid), + State#state{active=[{DbName, CPid}|State#state.active]} + end; +% not a database compaction, so ignore the pid check +maybe_remonitor_cpid(State, Key, Reason) -> + couch_log:warning("exit for compaction of ~p: ~p", + [smoosh_utils:stringify(Key), Reason]), + {ok, _} = timer:apply_after(5000, smoosh_server, enqueue, [Key]), + State. + schedule_unpause() -> WaitSecs = list_to_integer(config:get("smoosh", "wait_secs", "30")), erlang:send_after(WaitSecs * 1000, self(), unpause). |