From 107e11fb63ac0e9beb0bac4f071738dcfba9f12e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 8 Feb 2017 07:25:37 -0600 Subject: Ensure deterministic revisions for attachments This re-fixes a corner case when recreating a document with an attachment in a single multipart request. Since we don't detect that we need a new revision until after the document has been serialized we need to be able to deserialize the body so that we can generate the same revisions regardless of the contents of the database. If we don't do this then we end up including information from the position of the attachment on disk in the revision calculation which can introduce branches in the revision tree. I've left this as a separate commit from the pluggable storage engine work so that its called out clearly for us to revisit. COUCHDB-3255 --- src/couch/src/couch_bt_engine.erl | 10 +++++++++- src/couch/src/couch_db.erl | 12 +----------- src/couch/src/couch_db_updater.erl | 12 +++++++++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/couch/src/couch_bt_engine.erl b/src/couch/src/couch_bt_engine.erl index 0b6f177bf..07ace204f 100644 --- a/src/couch/src/couch_bt_engine.erl +++ b/src/couch/src/couch_bt_engine.erl @@ -331,7 +331,15 @@ serialize_doc(#st{} = St, #doc{} = Doc) -> SummaryBin = ?term_to_bin({Body, Atts}), Md5 = crypto:hash(md5, SummaryBin), Data = couch_file:assemble_file_chunk(SummaryBin, Md5), - Doc#doc{body = Data}. + % TODO: This is a terrible hack to get around the issues + % in COUCHDB-3255. We'll need to come back and figure + % out a better approach to handling the case when we + % need to generate a new revision id after the doc + % has been serialized. + Doc#doc{ + body = Data, + meta = [{comp_body, Body} | Doc#doc.meta] + }. write_doc_body(St, #doc{} = Doc) -> diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 52d1eb12b..f93c1e18e 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -941,7 +941,7 @@ prep_and_validate_replicated_updates(Db, [Bucket|RestBuckets], [OldInfo|RestOldI -new_revid(#doc{body=Body0, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) -> +new_revid(#doc{body=Body, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) -> DigestedAtts = lists:foldl(fun(Att, Acc) -> [N, T, M] = couch_att:fetch([name, type, md5], Att), case M == <<>> of @@ -949,16 +949,6 @@ new_revid(#doc{body=Body0, revs={OldStart,OldRevs}, atts=Atts, deleted=Deleted}) false -> [{N, T, M} | Acc] end end, [], Atts), - Body = case Body0 of - {summary, [_Len, _Md5, BodyAtts], _SizeInfo, _AttsFd} -> - {CompBody, _CompAtts} = binary_to_term(BodyAtts), - couch_compress:decompress(CompBody); - {summary, [_Len, BodyAtts], _SizeInfo, _AttsFd} -> - {CompBody, _CompAtts} = binary_to_term(BodyAtts), - couch_compress:decompress(CompBody); - Else -> - Else - end, case DigestedAtts of Atts2 when length(Atts) =/= length(Atts2) -> % We must have old style non-md5 attachments diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index 665c70577..6f9232a2e 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -557,7 +557,17 @@ merge_rev_tree(OldInfo, NewDoc, Client, Limit, false) % Update the new doc based on revisions in OldInfo #doc_info{revs=[WinningRev | _]} = couch_doc:to_doc_info(OldInfo), #rev_info{rev={OldPos, OldRev}} = WinningRev, - NewRevId = couch_db:new_revid(NewDoc#doc{revs={OldPos, [OldRev]}}), + Body = case couch_util:get_value(comp_body, NewDoc#doc.meta) of + CompBody when is_binary(CompBody) -> + couch_compress:decompress(CompBody); + _ -> + NewDoc#doc.body + end, + RevIdDoc = NewDoc#doc{ + revs = {OldPos, [OldRev]}, + body = Body + }, + NewRevId = couch_db:new_revid(RevIdDoc), NewDoc2 = NewDoc#doc{revs={OldPos + 1, [NewRevId, OldRev]}}, % Merge our modified new doc into the tree -- cgit v1.2.1