diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2019-12-04 11:38:48 -0600 |
---|---|---|
committer | Paul J. Davis <paul.joseph.davis@gmail.com> | 2020-02-15 12:39:02 -0600 |
commit | eb1a09e114dafc55fa9511d477b9ada4350134eb (patch) | |
tree | edb78cf33ac10085a128dce7110af10f418261c5 | |
parent | 8bb8e702364be2e74611234fdc6ba19c8f2f70bb (diff) | |
download | couchdb-eb1a09e114dafc55fa9511d477b9ada4350134eb.tar.gz |
Track the size of data stored in a database
This tracks the number of bytes that would be required to store the
contents of a database as flat files on disk. Currently the following
items are tracked:
* Doc ids
* Revisions
* Doc body as JSON
* Attachment names
* Attachment type
* Attachment length
* Attachment md5s
* Attachment headers
* Local doc id
* Local doc revision
* Local doc bodies
-rw-r--r-- | src/couch/src/couch_att.erl | 15 | ||||
-rw-r--r-- | src/fabric/include/fabric2.hrl | 7 | ||||
-rw-r--r-- | src/fabric/src/fabric2_db.erl | 11 | ||||
-rw-r--r-- | src/fabric/src/fabric2_fdb.erl | 144 | ||||
-rw-r--r-- | src/fabric/src/fabric2_util.erl | 52 | ||||
-rw-r--r-- | src/fabric/test/fabric2_doc_crud_tests.erl | 5 |
6 files changed, 205 insertions, 29 deletions
diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index 837170c99..d41ab5bf2 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -27,6 +27,7 @@ ]). -export([ + external_size/1, size_info/1, to_disk_term/1, from_disk_term/3 @@ -177,6 +178,20 @@ merge_stubs([], _, Merged) -> {ok, lists:reverse(Merged)}. +external_size(Att) -> + NameSize = size(fetch(name, Att)), + TypeSize = case fetch(type, Att) of + undefined -> 0; + Type -> size(Type) + end, + AttSize = fetch(att_len, Att), + Md5Size = case fetch(md5, Att) of + undefined -> 0; + Md5 -> size(Md5) + end, + NameSize + TypeSize + AttSize + Md5Size. + + size_info([]) -> {ok, []}; size_info(Atts) -> diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl index d07a73793..b1bd30629 100644 --- a/src/fabric/include/fabric2.hrl +++ b/src/fabric/include/fabric2.hrl @@ -45,8 +45,13 @@ % 0 - Initial implementation % 1 - Added attachment hash +% 2 - Added size information --define(CURR_REV_FORMAT, 1). +-define(CURR_REV_FORMAT, 2). + +% 0 - Adding local doc versions + +-define(CURR_LDOC_FORMAT, 0). % Misc constants diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 3349722ad..b0f7849e2 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -1422,12 +1422,14 @@ update_doc_interactive(Db, Doc0, Future, _Options) -> NewRevInfo = #{ winner => undefined, + exists => false, deleted => NewDeleted, rev_id => {NewRevPos, NewRev}, rev_path => NewRevPath, sequence => undefined, branch_count => undefined, - att_hash => fabric2_util:hash_atts(Atts) + att_hash => fabric2_util:hash_atts(Atts), + rev_size => fabric2_util:rev_size(Doc4) }, % Gather the list of possible winnig revisions @@ -1478,12 +1480,14 @@ update_doc_replicated(Db, Doc0, _Options) -> DocRevInfo0 = #{ winner => undefined, + exists => false, deleted => Deleted, rev_id => {RevPos, Rev}, rev_path => RevPath, sequence => undefined, branch_count => undefined, - att_hash => <<>> + att_hash => <<>>, + rev_size => null }, AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId), @@ -1523,7 +1527,8 @@ update_doc_replicated(Db, Doc0, _Options) -> Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo), Doc3 = flush_doc_atts(Db, Doc2), DocRevInfo2 = DocRevInfo1#{ - atts_hash => fabric2_util:hash_atts(Doc3#doc.atts) + atts_hash => fabric2_util:hash_atts(Doc3#doc.atts), + rev_size => fabric2_util:rev_size(Doc3) }, % Possible winners are the previous winner and diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl index 00bb4855a..e51b8de5d 100644 --- a/src/fabric/src/fabric2_fdb.erl +++ b/src/fabric/src/fabric2_fdb.erl @@ -36,6 +36,7 @@ get_stat/2, incr_stat/3, + incr_stat/4, get_all_revs/2, get_winning_revs/3, @@ -471,6 +472,19 @@ incr_stat(#{} = Db, StatKey, Increment) when is_integer(Increment) -> erlfdb:add(Tx, Key, Increment). +incr_stat(_Db, _Section, _Key, 0) -> + ok; + +incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = ensure_current(Db), + + BinKey = erlfdb_tuple:pack({?DB_STATS, Section, Key}, DbPrefix), + erlfdb:add(Tx, BinKey, Increment). + + get_all_revs(#{} = Db, DocId) -> #{ tx := Tx, @@ -590,6 +604,15 @@ get_local_doc(#{} = Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId) -> get_local_doc_rev(_Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, Val) -> case Val of + <<255, RevBin/binary>> -> + % Versioned local docs + try + case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, Rev, _Size} -> Rev + end + catch _:_ -> + erlang:error({invalid_local_doc_rev, DocId, Val}) + end; <<131, _/binary>> -> % Compatibility clause for an older encoding format try binary_to_term(Val, [safe]) of @@ -656,7 +679,9 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> % Revision tree - NewWinner = NewWinner0#{winner := true}, + NewWinner = NewWinner0#{ + winner := true + }, NewRevId = maps:get(rev_id, NewWinner), {WKey, WVal, WinnerVS} = revinfo_to_fdb(Tx, DbPrefix, DocId, NewWinner), @@ -718,7 +743,7 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> NewSeqVal = erlfdb_tuple:pack({DocId, Deleted, NewRevId}), erlfdb:set_versionstamped_key(Tx, NewSeqKey, NewSeqVal), - % And all the rest... + % Bump db version on design doc changes IsDDoc = case Doc#doc.id of <<?DESIGN_DOC_PREFIX, _/binary>> -> true; @@ -729,6 +754,8 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> bump_db_version(Db) end, + % Update our document counts + case UpdateStatus of created -> if not IsDDoc -> ok; true -> @@ -755,6 +782,11 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) -> ok end, + % Update database size + AddSize = sum_add_rev_sizes([NewWinner | ToUpdate]), + RemSize = sum_rem_rev_sizes(ToRemove), + incr_stat(Db, <<"sizes">>, <<"external">>, AddSize - RemSize), + ok. @@ -766,11 +798,18 @@ write_local_doc(#{} = Db0, Doc) -> Id = Doc#doc.id, - {LDocKey, LDocVal, Rows} = local_doc_to_fdb(Db, Doc), + {LDocKey, LDocVal, NewSize, Rows} = local_doc_to_fdb(Db, Doc), - WasDeleted = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of - <<_/binary>> -> false; - not_found -> true + {WasDeleted, PrevSize} = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of + <<255, RevBin/binary>> -> + case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, _Rev, Size} -> + {false, Size} + end; + <<_/binary>> -> + {false, 0}; + not_found -> + {true, 0} end, BPrefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id}, DbPrefix), @@ -796,6 +835,8 @@ write_local_doc(#{} = Db0, Doc) -> ok end, + incr_stat(Db, <<"sizes">>, <<"external">>, NewSize - PrevSize), + ok. @@ -1086,9 +1127,10 @@ write_doc_body(#{} = Db0, #doc{} = Doc) -> tx := Tx } = Db = ensure_current(Db0), + Rows = doc_to_fdb(Db, Doc), lists:foreach(fun({Key, Value}) -> ok = erlfdb:set(Tx, Key, Value) - end, doc_to_fdb(Db, Doc)). + end, Rows). clear_doc_body(_Db, _DocId, not_found) -> @@ -1164,7 +1206,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> rev_id := {RevPos, Rev}, rev_path := RevPath, branch_count := BranchCount, - att_hash := AttHash + att_hash := AttHash, + rev_size := RevSize } = RevId, VS = new_versionstamp(Tx), Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, @@ -1173,7 +1216,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) -> VS, BranchCount, list_to_tuple(RevPath), - AttHash + AttHash, + RevSize }, KBin = erlfdb_tuple:pack(Key, DbPrefix), VBin = erlfdb_tuple:pack_vs(Val), @@ -1184,39 +1228,44 @@ revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) -> deleted := Deleted, rev_id := {RevPos, Rev}, rev_path := RevPath, - att_hash := AttHash + att_hash := AttHash, + rev_size := RevSize } = RevId, Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev}, - Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash}, + Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash, RevSize}, KBin = erlfdb_tuple:pack(Key, DbPrefix), VBin = erlfdb_tuple:pack(Val), {KBin, VBin, undefined}. -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) -> +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, Sequence, BranchCount, RevPath, AttHash} = Val, + {_RevFormat, Sequence, BranchCount, RevPath, AttHash, RevSize} = Val, #{ winner => true, + exists => true, deleted => not NotDeleted, rev_id => {RevPos, Rev}, rev_path => tuple_to_list(RevPath), sequence => Sequence, branch_count => BranchCount, - att_hash => AttHash + att_hash => AttHash, + rev_size => RevSize }; -fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val) -> +fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) -> {?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key, - {_RevFormat, RevPath, AttHash} = Val, + {_RevFormat, RevPath, AttHash, RevSize} = Val, #{ winner => false, + exists => true, deleted => not NotDeleted, rev_id => {RevPos, Rev}, rev_path => tuple_to_list(RevPath), sequence => undefined, branch_count => undefined, - att_hash => AttHash + att_hash => AttHash, + rev_size => RevSize }; fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> @@ -1225,6 +1274,14 @@ fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) -> fdb_to_revinfo(Key, {0, RPath}) -> Val = {?CURR_REV_FORMAT, RPath, <<>>}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) -> + Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, AttHash, 0}, + fdb_to_revinfo(Key, Val); + +fdb_to_revinfo(Key, {1, RPath, AttHash}) -> + Val = {?CURR_REV_FORMAT, RPath, AttHash, 0}, fdb_to_revinfo(Key, Val). @@ -1244,11 +1301,13 @@ doc_to_fdb(Db, #doc{} = Doc) -> DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts), Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]), + Chunks = chunkify_binary(Value), {Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) -> Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix), {{Key, Chunk}, ChunkId + 1} - end, 0, chunkify_binary(Value)), + end, 0, Chunks), + Rows. @@ -1299,8 +1358,17 @@ local_doc_to_fdb(Db, #doc{} = Doc) -> {{K, Chunk}, ChunkId + 1} end, 0, chunkify_binary(BVal)), - {Key, StoreRev, Rows}. + NewSize = fabric2_util:ldoc_size(Doc), + RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, NewSize}), + + % Prefix our tuple encoding to make upgrades easier + Value = <<255, RawValue/binary>>, + {Key, Value, NewSize, Rows}. + + +fdb_to_local_doc(_Db, _DocId, not_found, []) -> + {not_found, missing}; fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) -> % This is an upgrade clause for the old encoding. We allow reading the old @@ -1313,18 +1381,48 @@ fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) -> body = Body }; -fdb_to_local_doc(_Db, _DocId, not_found, []) -> - {not_found, missing}; +fdb_to_local_doc(_Db, DocId, <<255, RevBin/binary>>, Rows) when is_list(Rows) -> + Rev = case erlfdb_tuple:unpack(RevBin) of + {?CURR_LDOC_FORMAT, Rev0, _Size} -> Rev0 + end, -fdb_to_local_doc(_Db, DocId, Rev, Rows) when is_list(Rows), is_binary(Rev) -> BodyBin = iolist_to_binary(Rows), Body = binary_to_term(BodyBin, [safe]), + #doc{ id = DocId, revs = {0, [Rev]}, deleted = false, body = Body - }. + }; + +fdb_to_local_doc(Db, DocId, RawRev, Rows) -> + BaseRev = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, RawRev, 0}), + Rev = <<255, BaseRev/binary>>, + fdb_to_local_doc(Db, DocId, Rev, Rows). + + +sum_add_rev_sizes(RevInfos) -> + lists:foldl(fun(RI, Acc) -> + #{ + exists := Exists, + rev_size := Size + } = RI, + case Exists of + true -> Acc; + false -> Size + Acc + end + end, 0, RevInfos). + + +sum_rem_rev_sizes(RevInfos) -> + lists:foldl(fun(RI, Acc) -> + #{ + exists := true, + rev_size := Size + } = RI, + Size + Acc + end, 0, RevInfos). chunkify_binary(Data) -> diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl index 2a940659e..a4faf3987 100644 --- a/src/fabric/src/fabric2_util.erl +++ b/src/fabric/src/fabric2_util.erl @@ -17,6 +17,8 @@ revinfo_to_revs/1, revinfo_to_path/1, sort_revinfos/1, + rev_size/1, + ldoc_size/1, seq_zero_vs/0, seq_max_vs/0, @@ -80,6 +82,56 @@ rev_sort_key(#{} = RevInfo) -> {not Deleted, RevPos, Rev}. +rev_size(#doc{} = Doc) -> + #doc{ + id = Id, + revs = Revs, + body = Body, + atts = Atts + } = Doc, + + {Start, Rev} = case Revs of + {0, []} -> {0, <<>>}; + {N, [RevId | _]} -> {N, RevId} + end, + + lists:sum([ + size(Id), + size(erlfdb_tuple:pack({Start})), + size(Rev), + 1, % FDB tuple encoding of booleans for deleted flag is 1 byte + couch_ejson_size:encoded_size(Body), + lists:foldl(fun(Att, Acc) -> + couch_att:external_size(Att) + Acc + end, 0, Atts) + ]). + + +ldoc_size(#doc{id = <<"_local/", _/binary>>} = Doc) -> + #doc{ + id = Id, + revs = {0, [Rev]}, + deleted = Deleted, + body = Body + } = Doc, + + StoreRev = case Rev of + _ when is_integer(Rev) -> integer_to_binary(Rev); + _ when is_binary(Rev) -> Rev + end, + + case Deleted of + true -> + 0; + false -> + lists:sum([ + size(Id), + size(StoreRev), + couch_ejson_size:encoded_size(Body) + ]) + end. + + seq_zero_vs() -> {versionstamp, 0, 0, 0}. diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl index 184eb4a66..46cd4fcfd 100644 --- a/src/fabric/test/fabric2_doc_crud_tests.erl +++ b/src/fabric/test/fabric2_doc_crud_tests.erl @@ -884,11 +884,12 @@ local_doc_with_previous_encoding({Db, _}) -> ?assertEqual(NewBody, Doc3#doc.body), % Old doc now has only the rev number in it - OldDocBin = fabric2_fdb:transactional(Db, fun(TxDb) -> + <<255, OldDocBin/binary>> = fabric2_fdb:transactional(Db, fun(TxDb) -> #{tx := Tx} = TxDb, erlfdb:wait(erlfdb:get(Tx, Key)) end), - ?assertEqual(<<"2">> , OldDocBin). + Unpacked = erlfdb_tuple:unpack(OldDocBin), + ?assertMatch({?CURR_LDOC_FORMAT, <<"2">>, _}, Unpacked). before_doc_update_skips_local_docs({Db0, _}) -> |