summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2018-08-15 15:23:52 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2018-08-15 17:05:53 -0400
commit28ba48da5877a5d471253c1b587af7e3b3121fd9 (patch)
tree03ab0a55ad9c4e25587d5bb06a13b53545c3083d
parentd3453d22b5daad07e056eb51a994302f7a96e877 (diff)
downloadcouchdb-28ba48da5877a5d471253c1b587af7e3b3121fd9.tar.gz
Reduce size of #leaf.atts keys
`#leaf.atts` data structure is a `[{Position, AttachmentLength}, ...]` proplist which keeps track of attachment lengths and it is used when calculating external data size of documents. `Position` is supposed to uniquely identify an attachment in a file stream. Initially it was just an integer file offset. Then, after some refactoring work it became a list of `{Position, Size}` tuples. During the PSE work streams were abstracted such that each engine can supply its own stream implementation. The position in the stream then became a tuple that looks like `{couch_bt_engine_stream,{<0.1922.0>,[{4267,21}]}}`. This was written to the file the `#leaf.atts` data structure. While still correct, it is unnecessarily verbose wasting around 100 bytes per attachment, per leaf. To fix it use the disk serialized version of the stream position as returned from `couch_stream:to_disk_term`. In case of the default CouchDB engine implementation, this should avoid writing the module name and the pid value for each attachment entry.
-rw-r--r--src/couch/src/couch_att.erl10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl
index 16edd66ce..a24de21d6 100644
--- a/src/couch/src/couch_att.erl
+++ b/src/couch/src/couch_att.erl
@@ -308,8 +308,14 @@ size_info([]) ->
{ok, []};
size_info(Atts) ->
Info = lists:map(fun(Att) ->
- [{_, Pos}, AttLen] = fetch([data, att_len], Att),
- {Pos, AttLen}
+ AttLen = fetch(att_len, Att),
+ case fetch(data, Att) of
+ {stream, StreamEngine} ->
+ {ok, SPos} = couch_stream:to_disk_term(StreamEngine),
+ {SPos, AttLen};
+ {_, SPos} ->
+ {SPos, AttLen}
+ end
end, Atts),
{ok, lists:usort(Info)}.