summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2019-12-04 11:38:48 -0600
committerPaul J. Davis <paul.joseph.davis@gmail.com>2020-02-12 17:19:39 -0600
commit9cecf8a664627b953b65311cb48618af632429ae (patch)
treed7275957a2eb352036943cd7ea41249f1d210772
parent3d4970fec724d49c7565317a3f1d6ca95d344369 (diff)
downloadcouchdb-9cecf8a664627b953b65311cb48618af632429ae.tar.gz
Track the size of data stored in a database
This tracks the number of bytes that would be required to store the contents of a database as flat files on disk. Currently the following items are tracked: * Doc ids * Revisions * Doc body as JSON * Attachment names * Attachment type * Attachment length * Attachment md5s * Attachment headers * Local doc id * Local doc revision * Local doc bodies
-rw-r--r--src/couch/src/couch_att.erl19
-rw-r--r--src/fabric/include/fabric2.hrl7
-rw-r--r--src/fabric/src/fabric2_db.erl6
-rw-r--r--src/fabric/src/fabric2_fdb.erl152
-rw-r--r--src/fabric/test/fabric2_doc_crud_tests.erl5
5 files changed, 159 insertions, 30 deletions
diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl
index 2c3336291..90d498cb9 100644
--- a/src/couch/src/couch_att.erl
+++ b/src/couch/src/couch_att.erl
@@ -27,6 +27,7 @@
]).
-export([
+ external_size/1,
size_info/1,
to_disk_term/1,
from_disk_term/3
@@ -179,6 +180,24 @@ merge_stubs([], _, Merged) ->
{ok, lists:reverse(Merged)}.
+external_size(Att) ->
+ NameSize = size(fetch(name, Att)),
+ TypeSize = case fetch(type, Att) of
+ undefined -> 0;
+ Type -> size(Type)
+ end,
+ AttSize = fetch(att_len, Att),
+ Md5Size = case fetch(md5, Att) of
+ undefined -> 0;
+ Md5 -> size(Md5)
+ end,
+ HeadersSize = case fetch(headers, Att) of
+ undefined -> 0;
+ Headers -> couch_ejson_size:encoded_size(Headers)
+ end,
+ NameSize + TypeSize + AttSize + Md5Size + HeadersSize.
+
+
size_info([]) ->
{ok, []};
size_info(Atts) ->
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index 828a51b8f..5f2571ecd 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -45,8 +45,13 @@
% 0 - Initial implementation
% 1 - Added attachment hash
+% 2 - Added size information
--define(CURR_REV_FORMAT, 1).
+-define(CURR_REV_FORMAT, 2).
+
+% 0 - Adding local doc versions
+
+-define(CURR_LDOC_FORMAT, 0).
% Misc constants
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index 17c899d27..26aad75c0 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -1422,7 +1422,8 @@ update_doc_interactive(Db, Doc0, Future, _Options) ->
rev_path => NewRevPath,
sequence => undefined,
branch_count => undefined,
- att_hash => fabric2_util:hash_atts(Atts)
+ att_hash => fabric2_util:hash_atts(Atts),
+ rev_size => null
},
% Gather the list of possible winnig revisions
@@ -1478,7 +1479,8 @@ update_doc_replicated(Db, Doc0, _Options) ->
rev_path => RevPath,
sequence => undefined,
branch_count => undefined,
- att_hash => <<>>
+ att_hash => <<>>,
+ rev_size => null
},
AllRevInfos = fabric2_fdb:get_all_revs(Db, DocId),
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index 99611b0a1..f447e93fc 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -36,6 +36,7 @@
get_stat/2,
incr_stat/3,
+ incr_stat/4,
get_all_revs/2,
get_winning_revs/3,
@@ -454,6 +455,19 @@ incr_stat(#{} = Db, StatKey, Increment) when is_integer(Increment) ->
erlfdb:add(Tx, Key, Increment).
+incr_stat(_Db, _Section, _Key, 0) ->
+ ok;
+
+incr_stat(#{} = Db, Section, Key, Increment) when is_integer(Increment) ->
+ #{
+ tx := Tx,
+ db_prefix := DbPrefix
+ } = ensure_current(Db),
+
+ BinKey = erlfdb_tuple:pack({?DB_STATS, Section, Key}, DbPrefix),
+ erlfdb:add(Tx, BinKey, Increment).
+
+
get_all_revs(#{} = Db, DocId) ->
#{
tx := Tx,
@@ -573,6 +587,15 @@ get_local_doc(#{} = Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId) ->
get_local_doc_rev(_Db0, <<?LOCAL_DOC_PREFIX, _/binary>> = DocId, Val) ->
case Val of
+ <<255, RevBin/binary>> ->
+ % Versioned local docs
+ try
+ case erlfdb_tuple:unpack(RevBin) of
+ {?CURR_LDOC_FORMAT, Rev, _Size} -> Rev
+ end
+ catch _:_ ->
+ erlang:error({invalid_local_doc_rev, DocId, Val})
+ end;
<<131, _/binary>> ->
% Compatibility clause for an older encoding format
try binary_to_term(Val, [safe]) of
@@ -609,7 +632,7 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
% Doc body
- ok = write_doc_body(Db, Doc),
+ {ok, RevSize} = write_doc_body(Db, Doc),
% Attachment bookkeeping
@@ -639,7 +662,10 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
% Revision tree
- NewWinner = NewWinner0#{winner := true},
+ NewWinner = NewWinner0#{
+ winner := true,
+ rev_size := RevSize
+ },
NewRevId = maps:get(rev_id, NewWinner),
{WKey, WVal, WinnerVS} = revinfo_to_fdb(Tx, DbPrefix, DocId, NewWinner),
@@ -701,7 +727,7 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
NewSeqVal = erlfdb_tuple:pack({DocId, Deleted, NewRevId}),
erlfdb:set_versionstamped_key(Tx, NewSeqKey, NewSeqVal),
- % And all the rest...
+ % Bump db version on design doc changes
IsDDoc = case Doc#doc.id of
<<?DESIGN_DOC_PREFIX, _/binary>> -> true;
@@ -712,6 +738,8 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
bump_db_version(Db)
end,
+ % Update our document counts
+
case UpdateStatus of
created ->
if not IsDDoc -> ok; true ->
@@ -738,6 +766,15 @@ write_doc(#{} = Db0, Doc, NewWinner0, OldWinner, ToUpdate, ToRemove) ->
ok
end,
+ % Update database size
+ SizeIncr = RevSize - lists:foldl(fun(RI, Acc) ->
+ Acc + case maps:get(rev_size, RI, null) of
+ null -> 0;
+ Size -> Size
+ end
+ end, 0, ToRemove),
+ incr_stat(Db, <<"sizes">>, <<"external">>, SizeIncr),
+
ok.
@@ -749,11 +786,18 @@ write_local_doc(#{} = Db0, Doc) ->
Id = Doc#doc.id,
- {LDocKey, LDocVal, Rows} = local_doc_to_fdb(Db, Doc),
+ {LDocKey, LDocVal, NewSize, Rows} = local_doc_to_fdb(Db, Doc),
- WasDeleted = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of
- <<_/binary>> -> false;
- not_found -> true
+ {WasDeleted, PrevSize} = case erlfdb:wait(erlfdb:get(Tx, LDocKey)) of
+ <<255, RevBin/binary>> ->
+ case erlfdb_tuple:unpack(RevBin) of
+ {?CURR_LDOC_FORMAT, _Rev, Size} ->
+ {false, Size}
+ end;
+ <<_/binary>> ->
+ {false, 0};
+ not_found ->
+ {true, 0}
end,
BPrefix = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id}, DbPrefix),
@@ -779,6 +823,8 @@ write_local_doc(#{} = Db0, Doc) ->
ok
end,
+ incr_stat(Db, <<"sizes">>, <<"external">>, NewSize - PrevSize),
+
ok.
@@ -1045,9 +1091,11 @@ write_doc_body(#{} = Db0, #doc{} = Doc) ->
tx := Tx
} = Db = ensure_current(Db0),
+ {Rows, RevSize} = doc_to_fdb(Db, Doc),
lists:foreach(fun({Key, Value}) ->
ok = erlfdb:set(Tx, Key, Value)
- end, doc_to_fdb(Db, Doc)).
+ end, Rows),
+ {ok, RevSize}.
clear_doc_body(_Db, _DocId, not_found) ->
@@ -1123,7 +1171,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) ->
rev_id := {RevPos, Rev},
rev_path := RevPath,
branch_count := BranchCount,
- att_hash := AttHash
+ att_hash := AttHash,
+ rev_size := RevSize
} = RevId,
VS = new_versionstamp(Tx),
Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev},
@@ -1132,7 +1181,8 @@ revinfo_to_fdb(Tx, DbPrefix, DocId, #{winner := true} = RevId) ->
VS,
BranchCount,
list_to_tuple(RevPath),
- AttHash
+ AttHash,
+ RevSize
},
KBin = erlfdb_tuple:pack(Key, DbPrefix),
VBin = erlfdb_tuple:pack_vs(Val),
@@ -1143,18 +1193,19 @@ revinfo_to_fdb(_Tx, DbPrefix, DocId, #{} = RevId) ->
deleted := Deleted,
rev_id := {RevPos, Rev},
rev_path := RevPath,
- att_hash := AttHash
+ att_hash := AttHash,
+ rev_size := RevSize
} = RevId,
Key = {?DB_REVS, DocId, not Deleted, RevPos, Rev},
- Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash},
+ Val = {?CURR_REV_FORMAT, list_to_tuple(RevPath), AttHash, RevSize},
KBin = erlfdb_tuple:pack(Key, DbPrefix),
VBin = erlfdb_tuple:pack(Val),
{KBin, VBin, undefined}.
-fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) ->
+fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _, _} = Val) ->
{?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key,
- {_RevFormat, Sequence, BranchCount, RevPath, AttHash} = Val,
+ {_RevFormat, Sequence, BranchCount, RevPath, AttHash, RevSize} = Val,
#{
winner => true,
deleted => not NotDeleted,
@@ -1162,12 +1213,13 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _, _} = Val) ->
rev_path => tuple_to_list(RevPath),
sequence => Sequence,
branch_count => BranchCount,
- att_hash => AttHash
+ att_hash => AttHash,
+ rev_size => RevSize
};
-fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val) ->
+fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _, _} = Val) ->
{?DB_REVS, _DocId, NotDeleted, RevPos, Rev} = Key,
- {_RevFormat, RevPath, AttHash} = Val,
+ {_RevFormat, RevPath, AttHash, RevSize} = Val,
#{
winner => false,
deleted => not NotDeleted,
@@ -1175,7 +1227,8 @@ fdb_to_revinfo(Key, {?CURR_REV_FORMAT, _, _} = Val) ->
rev_path => tuple_to_list(RevPath),
sequence => undefined,
branch_count => undefined,
- att_hash => AttHash
+ att_hash => AttHash,
+ rev_size => RevSize
};
fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) ->
@@ -1184,6 +1237,14 @@ fdb_to_revinfo(Key, {0, Seq, BCount, RPath}) ->
fdb_to_revinfo(Key, {0, RPath}) ->
Val = {?CURR_REV_FORMAT, RPath, <<>>},
+ fdb_to_revinfo(Key, Val);
+
+fdb_to_revinfo(Key, {1, Seq, BCount, RPath, AttHash}) ->
+ Val = {?CURR_REV_FORMAT, Seq, BCount, RPath, AttHash, null},
+ fdb_to_revinfo(Key, Val);
+
+fdb_to_revinfo(Key, {1, RPath, AttHash}) ->
+ Val = {?CURR_REV_FORMAT, RPath, AttHash, null},
fdb_to_revinfo(Key, Val).
@@ -1203,12 +1264,26 @@ doc_to_fdb(Db, #doc{} = Doc) ->
DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts),
Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]),
+ Chunks = chunkify_binary(Value),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
Key = erlfdb_tuple:pack({?DB_DOCS, Id, Start, Rev, ChunkId}, DbPrefix),
{{Key, Chunk}, ChunkId + 1}
- end, 0, chunkify_binary(Value)),
- Rows.
+ end, 0, Chunks),
+
+ % Calculate the size of this revision
+ TotalSize = lists:sum([
+ size(Id),
+ size(erlfdb_tuple:pack({Start})),
+ size(Rev),
+ 1, % FDB tuple encoding of booleans for deleted flag is 1 byte
+ couch_ejson_size:encoded_size(Body),
+ lists:foldl(fun(Att, Acc) ->
+ couch_att:external_size(Att) + Acc
+ end, 0, Atts)
+ ]),
+
+ {Rows, TotalSize}.
fdb_to_doc(_Db, _DocId, _Pos, _Path, []) ->
@@ -1258,9 +1333,29 @@ local_doc_to_fdb(Db, #doc{} = Doc) ->
{{K, Chunk}, ChunkId + 1}
end, 0, chunkify_binary(BVal)),
- {Key, StoreRev, Rows}.
+ % Calculate size
+ TotalSize = case Doc#doc.deleted of
+ true ->
+ 0;
+ false ->
+ lists:sum([
+ size(Id),
+ size(StoreRev),
+ couch_ejson_size:encoded_size(Body)
+ ])
+ end,
+
+ RawValue = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, StoreRev, TotalSize}),
+
+ % Prefix our tuple encoding to make upgrades easier
+ Value = <<255, RawValue/binary>>,
+
+ {Key, Value, TotalSize, Rows}.
+fdb_to_local_doc(_Db, _DocId, not_found, []) ->
+ {not_found, missing};
+
fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) ->
% This is an upgrade clause for the old encoding. We allow reading the old
% value and will perform an upgrade of the storage format on an update.
@@ -1272,18 +1367,25 @@ fdb_to_local_doc(_Db, DocId, <<131, _/binary>> = Val, []) ->
body = Body
};
-fdb_to_local_doc(_Db, _DocId, not_found, []) ->
- {not_found, missing};
+fdb_to_local_doc(_Db, DocId, <<255, RevBin/binary>>, Rows) when is_list(Rows) ->
+ Rev = case erlfdb_tuple:unpack(RevBin) of
+ {?CURR_LDOC_FORMAT, Rev0, _Size} -> Rev0
+ end,
-fdb_to_local_doc(_Db, DocId, Rev, Rows) when is_list(Rows), is_binary(Rev) ->
BodyBin = iolist_to_binary(Rows),
Body = binary_to_term(BodyBin, [safe]),
+
#doc{
id = DocId,
revs = {0, [Rev]},
deleted = false,
body = Body
- }.
+ };
+
+fdb_to_local_doc(Db, DocId, RawRev, Rows) ->
+ BaseRev = erlfdb_tuple:pack({?CURR_LDOC_FORMAT, RawRev, 0}),
+ Rev = <<255, BaseRev/binary>>,
+ fdb_to_local_doc(Db, DocId, Rev, Rows).
chunkify_binary(Data) ->
diff --git a/src/fabric/test/fabric2_doc_crud_tests.erl b/src/fabric/test/fabric2_doc_crud_tests.erl
index 184eb4a66..46cd4fcfd 100644
--- a/src/fabric/test/fabric2_doc_crud_tests.erl
+++ b/src/fabric/test/fabric2_doc_crud_tests.erl
@@ -884,11 +884,12 @@ local_doc_with_previous_encoding({Db, _}) ->
?assertEqual(NewBody, Doc3#doc.body),
% Old doc now has only the rev number in it
- OldDocBin = fabric2_fdb:transactional(Db, fun(TxDb) ->
+ <<255, OldDocBin/binary>> = fabric2_fdb:transactional(Db, fun(TxDb) ->
#{tx := Tx} = TxDb,
erlfdb:wait(erlfdb:get(Tx, Key))
end),
- ?assertEqual(<<"2">> , OldDocBin).
+ Unpacked = erlfdb_tuple:unpack(OldDocBin),
+ ?assertMatch({?CURR_LDOC_FORMAT, <<"2">>, _}, Unpacked).
before_doc_update_skips_local_docs({Db0, _}) ->