summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2019-12-04 11:38:48 -0600
committerPaul J. Davis <paul.joseph.davis@gmail.com>2020-02-15 12:39:02 -0600
commiteb1a09e114dafc55fa9511d477b9ada4350134eb (patch)
treeedb78cf33ac10085a128dce7110af10f418261c5
parent8bb8e702364be2e74611234fdc6ba19c8f2f70bb (diff)
downloadcouchdb-eb1a09e114dafc55fa9511d477b9ada4350134eb.tar.gz
Track the size of data stored in a database
This tracks the number of bytes that would be required to store the contents of a database as flat files on disk. Currently the following items are tracked: * Doc ids * Revisions * Doc body as JSON * Attachment names * Attachment type * Attachment length * Attachment md5s * Attachment headers * Local doc id * Local doc revision * Local doc bodies
-rw-r--r--src/couch/src/couch_att.erl15
-rw-r--r--src/fabric/include/fabric2.hrl7
-rw-r--r--src/fabric/src/fabric2_db.erl11
-rw-r--r--src/fabric/src/fabric2_fdb.erl144
-rw-r--r--src/fabric/src/fabric2_util.erl52
-rw-r--r--src/fabric/test/fabric2_doc_crud_tests.erl5
6 files changed, 205 insertions, 29 deletions
diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl
index 837170c99..d41ab5bf2 100644
--- a/src/couch/src/couch_att.erl
+++ b/src/couch/src/couch_att.erl
@@ -27,6 +27,7 @@
]).
-export([
+ external_size/1,
size_info/1,
to_disk_term/1,
from_disk_term/3
@@ -177,6 +178,20 @@ merge_stubs([], _, Merged) ->
{ok, lists:reverse(Merged)}.
+external_size(Att) ->
+ NameSize = size(fetch(name, Att)),
+ TypeSize = case fetch(type, Att) of
+ undefined -> 0;
+ Type -> size(Type)
+ end,
+ AttSize = fetch(att_len, Att),
+ Md5Size = case fetch(md5, Att) of
+ undefined -> 0;
+ Md5 -> size(Md5)
+ end,
+ NameSize + TypeSize + AttSize + Md5Size.
+
+
size_info([]) ->
{ok, []};
size_info(Atts) ->
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index d07a73793..b1bd30629 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -45,8 +45,13 @@
% 0 - Initial implementation
% 1 - Added attachment hash
+% 2 - Added size information
--define(CURR_REV_FORMAT, 1).
+-define(CURR_REV_FORMAT, 2).
+
+% 0 - Adding local doc versions
+
+-define(CURR_LDOC_FORMAT, 0).
% Misc constants
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index 3349722ad..b0f7849e2 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -1422,12 +1422,14 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
NewRevInfo = #{
winner => undefined,
+ exists => false,
deleted => NewDeleted,
rev_id => {NewRevPos, NewRev},
rev_path => NewRevPath,
sequence => undefined,
branch_count => undefined,
- att_hash => fabric2_util:hash_atts(Atts)
+ att_hash => fabric2_util:hash_atts(Atts),
+ rev_size => fabric2_util:rev_size(Doc4)
},
% Gather the list of possible winnig revisions
@@ -1478,12 +1480,14 @@ update_doc_replicated(Db, Doc0, _Options) ->
DocRevInfo0 = #{
winner => undefined,
+ exists => false,
deleted => Deleted,
rev_id => {RevPos, Rev},
rev_path => RevPath,
sequence => undefined,
branch_count => undefined,
- att_hash => <<>>
+ att_hash => <<>>,
+ rev_size => null
},
AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId),
@@ -1523,7 +1527,8 @@ update_doc_replicated(Db, Doc0, _Options) ->
Doc2 = prep_and_validate(Db, Doc1, PrevRevInfo),
Doc3 = flush_doc_atts(Db, Doc2),
DocRevInfo2 = DocRevInfo1#{
- atts_hash => fabric2_util:hash_atts(Doc3#doc.atts)
+ atts_hash => fabric2_util:hash_atts(Doc3#doc.atts),
+ rev_size => fabric2_util:rev_size(Doc3)
},
% Possible winners are the previous winner and
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index 00bb4855a..e51b8de5d 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -36,6 +36,7 @@
get_stat/2,
incr_stat/3,
+ incr_stat/4,
get_all_revs/2,
get_winning_revs/3,
@@ -471,6 +472,19 @@ incr_stat(#{} = Db, StatKey, Increment) when is_integer(Increment) ->
erlfdb:add(Tx, Key, Increment).
+incr_stat(_Db, _Section, _Key, 0) ->
+ ok;
+
+incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) ->
+ #{
+ tx := Tx,
+ db_prefix := DbPrefix
+ } = ensure_current(Db),
+
+ BinKey = erlfdb_tuple:pack({?DB_STATS, Section, Key}, DbPrefix),
+ erlfdb:add(Tx, BinKey, Increment).
+
+
get_all_revs(#{} = Db, DocId) ->
#{
tx := Tx,
@@ -590,6 +604,15 @@ get_local_doc(#{} = Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId) ->
get_local_doc_rev(_Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, Val) ->
case Val of
+ <<255, RevBin/binary>> ->
+ % Versioned local docs
+ try
+ case erlfdb_tuple:unpack(RevBin) of
+ {?CURR_LDOC_FORMAT, Rev, _Size} -> Rev
+ end
+ catch _:_ ->
+ erlang:error({invalid_local_doc_rev, DocId, Val})
+ end;
<<131, _/binary>> ->
% Compatibility clause for an older encoding format
try binary_to_term(Val, [safe]) of
@@ -656,7 +679,9 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
% Revision tree
- NewWinner = NewWinner0#{winner := true},
+ NewWinner = NewWinner0#{
+ winner := true
+ },
NewRevId = maps:get(rev_id, NewWinner),
{WKey, WVal, WinnerVS} = revinfo_to_fdb(Tx, DbPrefix, DocId, NewWinner),
@@ -718,7 +743,7 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
NewSeqVal = erlfdb_tuple:pack({DocId, Deleted, NewRevId}),
erlfdb:set_versionstamped_key(Tx, NewSeqKey, NewSeqVal),
- % And all the rest...
+ % Bump db version on design doc changes
IsDDoc = case Doc#doc.id of
<<?DESIGN_DOC_PREFIX, _/binary>> -> true;
@@ -729,6 +754,8 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
bump_db_version(Db)
end,
+ % Update our document counts
+
case UpdateStatus of
created ->
if not IsDDoc -> ok; true ->
@@ -755,6 +782,11 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
ok
end,
+ % Update database size
+ AddSize = sum_add_rev_sizes([NewWinner | ToUpdate]),
+ RemSize = sum_rem_rev_sizes(ToRemove),
+ incr_stat(Db, <<"sizes">>, <<"external">>, AddSize - RemSize),
+
ok.
@@ -766,11 +798,18 @@ write_local_doc(#{} = Db0, Doc) ->
Id = Doc#doc.id,
- {LDocKey, LDocVal, Rows} = local_doc_to_fdb(Db, Doc),
+ {LDocKey, LDocVal, NewSize, Rows} = local_doc_to_fdb(Db, Doc),
- WasDeleted = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of
- <<_/binary>> -> false;
- not_found -> true
+ {WasDeleted, PrevSize} = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of
+ <<255, RevBin/binary>> ->
+ case erlfdb_tuple:unpack(RevBin) of
+ {?CURR_LDOC_FORMAT, _Rev, Size} ->
+ {false, Size}
+ end;
+ <<_/binary>> ->
+ {false, 0};
+ not_found ->
+ {true, 0}
end,
BPrefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id}, DbPrefix),
@@ -796,6 +835,8 @@ write_local_doc(#{} = Db0, Doc) ->
ok
end,
+ incr_stat(Db, <<"sizes">>, <<"external">>, NewSize - PrevSize),
+
ok.
@@ -1086,9 +1127,10 @@ write_doc_body(#{} = Db0, #doc{} = Doc) ->
tx := Tx
} = Db = ensure_current(Db0),
+ Rows = doc_to_fdb(Db, Doc),
lists:foreach(fun({Key, Value}) ->
ok = erlfdb:set(Tx, Key, Value)
- end, doc_to_fdb(Db, Doc)).
+ end, Rows).
clear_doc_body(_Db, _DocId, not_found) ->
@@ -1164,7 +1206,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) ->
rev_id := {RevPos, Rev},
rev_path := RevPath,
branch_count := BranchCount,
- att_hash := AttHash
+ att_hash := AttHash,
+ rev_size := RevSize
} = RevId,
VS = new_versionstamp(Tx),
Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev},
@@ -1173,7 +1216,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) ->
VS,
BranchCount,
list_to_tuple(RevPath),
- AttHash
+ AttHash,
+ RevSize
},
KBin = erlfdb_tuple:pack(Key, DbPrefix),
VBin = erlfdb_tuple:pack_vs(Val),
@@ -1184,39 +1228,44 @@ revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) ->
deleted := Deleted,
rev_id := {RevPos, Rev},
rev_path := RevPath,
- att_hash := AttHash
+ att_hash := AttHash,
+ rev_size := RevSize
} = RevId,
Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev},
- Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash},
+ Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash, RevSize},
KBin = erlfdb_tuple:pack(Key, DbPrefix),
VBin = erlfdb_tuple:pack(Val),
{KBin, VBin, undefined}.
-fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) ->
+fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) ->
{?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key,
- {_RevFormat, Sequence, BranchCount, RevPath, AttHash} = Val,
+ {_RevFormat, Sequence, BranchCount, RevPath, AttHash, RevSize} = Val,
#{
winner => true,
+ exists => true,
deleted => not NotDeleted,
rev_id => {RevPos, Rev},
rev_path => tuple_to_list(RevPath),
sequence => Sequence,
branch_count => BranchCount,
- att_hash => AttHash
+ att_hash => AttHash,
+ rev_size => RevSize
};
-fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val) ->
+fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) ->
{?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key,
- {_RevFormat, RevPath, AttHash} = Val,
+ {_RevFormat, RevPath, AttHash, RevSize} = Val,
#{
winner => false,
+ exists => true,
deleted => not NotDeleted,
rev_id => {RevPos, Rev},
rev_path => tuple_to_list(RevPath),
sequence => undefined,
branch_count => undefined,
- att_hash => AttHash
+ att_hash => AttHash,
+ rev_size => RevSize
};
fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) ->
@@ -1225,6 +1274,14 @@ fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) ->
fdb_to_revinfo(Key, {0, RPath}) ->
Val = {?CURR_REV_FORMAT, RPath, <<>>},
+ fdb_to_revinfo(Key, Val);
+
+fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) ->
+ Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, AttHash, 0},
+ fdb_to_revinfo(Key, Val);
+
+fdb_to_revinfo(Key, {1, RPath, AttHash}) ->
+ Val = {?CURR_REV_FORMAT, RPath, AttHash, 0},
fdb_to_revinfo(Key, Val).
@@ -1244,11 +1301,13 @@ doc_to_fdb(Db, #doc{} = Doc) ->
DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts),
Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]),
+ Chunks = chunkify_binary(Value),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix),
{{Key, Chunk}, ChunkId + 1}
- end, 0, chunkify_binary(Value)),
+ end, 0, Chunks),
+
Rows.
@@ -1299,8 +1358,17 @@ local_doc_to_fdb(Db, #doc{} = Doc) ->
{{K, Chunk}, ChunkId + 1}
end, 0, chunkify_binary(BVal)),
- {Key, StoreRev, Rows}.
+ NewSize = fabric2_util:ldoc_size(Doc),
+ RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, NewSize}),
+
+ % Prefix our tuple encoding to make upgrades easier
+ Value = <<255, RawValue/binary>>,
+ {Key, Value, NewSize, Rows}.
+
+
+fdb_to_local_doc(_Db, _DocId, not_found, []) ->
+ {not_found, missing};
fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) ->
% This is an upgrade clause for the old encoding. We allow reading the old
@@ -1313,18 +1381,48 @@ fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) ->
body = Body
};
-fdb_to_local_doc(_Db, _DocId, not_found, []) ->
- {not_found, missing};
+fdb_to_local_doc(_Db, DocId, <<255, RevBin/binary>>, Rows) when is_list(Rows) ->
+ Rev = case erlfdb_tuple:unpack(RevBin) of
+ {?CURR_LDOC_FORMAT, Rev0, _Size} -> Rev0
+ end,
-fdb_to_local_doc(_Db, DocId, Rev, Rows) when is_list(Rows), is_binary(Rev) ->
BodyBin = iolist_to_binary(Rows),
Body = binary_to_term(BodyBin, [safe]),
+
#doc{
id = DocId,
revs = {0, [Rev]},
deleted = false,
body = Body
- }.
+ };
+
+fdb_to_local_doc(Db, DocId, RawRev, Rows) ->
+ BaseRev = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, RawRev, 0}),
+ Rev = <<255, BaseRev/binary>>,
+ fdb_to_local_doc(Db, DocId, Rev, Rows).
+
+
+sum_add_rev_sizes(RevInfos) ->
+ lists:foldl(fun(RI, Acc) ->
+ #{
+ exists := Exists,
+ rev_size := Size
+ } = RI,
+ case Exists of
+ true -> Acc;
+ false -> Size + Acc
+ end
+ end, 0, RevInfos).
+
+
+sum_rem_rev_sizes(RevInfos) ->
+ lists:foldl(fun(RI, Acc) ->
+ #{
+ exists := true,
+ rev_size := Size
+ } = RI,
+ Size + Acc
+ end, 0, RevInfos).
chunkify_binary(Data) ->
diff --git a/src/fabric/src/fabric2_util.erl b/src/fabric/src/fabric2_util.erl
index 2a940659e..a4faf3987 100644
--- a/src/fabric/src/fabric2_util.erl
+++ b/src/fabric/src/fabric2_util.erl
@@ -17,6 +17,8 @@
revinfo_to_revs/1,
revinfo_to_path/1,
sort_revinfos/1,
+ rev_size/1,
+ ldoc_size/1,
seq_zero_vs/0,
seq_max_vs/0,
@@ -80,6 +82,56 @@ rev_sort_key(#{} = RevInfo) ->
{not Deleted, RevPos, Rev}.
+rev_size(#doc{} = Doc) ->
+ #doc{
+ id = Id,
+ revs = Revs,
+ body = Body,
+ atts = Atts
+ } = Doc,
+
+ {Start, Rev} = case Revs of
+ {0, []} -> {0, <<>>};
+ {N, [RevId | _]} -> {N, RevId}
+ end,
+
+ lists:sum([
+ size(Id),
+ size(erlfdb_tuple:pack({Start})),
+ size(Rev),
+ 1, % FDB tuple encoding of booleans for deleted flag is 1 byte
+ couch_ejson_size:encoded_size(Body),
+ lists:foldl(fun(Att, Acc) ->
+ couch_att:external_size(Att) + Acc
+ end, 0, Atts)
+ ]).
+
+
+ldoc_size(#doc{id = <<"_local/", _/binary>>} = Doc) ->
+ #doc{
+ id = Id,
+ revs = {0, [Rev]},
+ deleted = Deleted,
+ body = Body
+ } = Doc,
+
+ StoreRev = case Rev of
+ _ when is_integer(Rev) -> integer_to_binary(Rev);
+ _ when is_binary(Rev) -> Rev
+ end,
+
+ case Deleted of
+ true ->
+ 0;
+ false ->
+ lists:sum([
+ size(Id),
+ size(StoreRev),
+ couch_ejson_size:encoded_size(Body)
+ ])
+ end.
+
+
seq_zero_vs() ->
{versionstamp, 0, 0, 0}.
diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl
index 184eb4a66..46cd4fcfd 100644
--- a/src/fabric/test/fabric2_doc_crud_tests.erl
+++ b/src/fabric/test/fabric2_doc_crud_tests.erl
@@ -884,11 +884,12 @@ local_doc_with_previous_encoding({Db, _}) ->
?assertEqual(NewBody, Doc3#doc.body),
% Old doc now has only the rev number in it
- OldDocBin = fabric2_fdb:transactional(Db, fun(TxDb) ->
+ <<255, OldDocBin/binary>> = fabric2_fdb:transactional(Db, fun(TxDb) ->
#{tx := Tx} = TxDb,
erlfdb:wait(erlfdb:get(Tx, Key))
end),
- ?assertEqual(<<"2">> , OldDocBin).
+ Unpacked = erlfdb_tuple:unpack(OldDocBin),
+ ?assertMatch({?CURR_LDOC_FORMAT, <<"2">>, _}, Unpacked).
before_doc_update_skips_local_docs({Db0, _}) ->